lemurproject.indri
Class IndexEnvironment

java.lang.Object
  extended by lemurproject.indri.IndexEnvironment

public class IndexEnvironment
extends java.lang.Object


Field Summary
protected  boolean swigCMemOwn
           
private  long swigCPtr
           
 
Constructor Summary
  IndexEnvironment()
           
protected IndexEnvironment(long cPtr, boolean cMemoryOwn)
           
 
Method Summary
 void addFile(java.lang.String fileName)
          Add the text in a file to the index and repository.
 void addFile(java.lang.String fileName, java.lang.String fileClass)
          add a file of the specified file class to the index and repository
 void addFileClass(Specification spec)
          Add a file class.
 void addFileClass(java.lang.String name, java.lang.String iterator, java.lang.String parser, java.lang.String tokenizer, java.lang.String startDocTag, java.lang.String endDocTag, java.lang.String endMetadataTag, java.lang.String[] include, java.lang.String[] exclude, java.lang.String[] index, java.lang.String[] metadata, java.util.Map conflations)
          Add parsing information for a file class.
 int addParsedDocument(ParsedDocument document)
          add an already parsed document to the index and repository
 int addString(java.lang.String fileName, java.lang.String fileClass, java.util.Map metadata)
          Adds a string to the index and repository.
 int addString(java.lang.String documentString, java.lang.String fileClass, java.util.Map metadata, TagExtent[] tags)
          add an already parsed document to the index and repository
 void close()
          close the index and repository
 void create(java.lang.String repositoryPath)
          create a new index and repository
 void create(java.lang.String repositoryPath, IndexStatus callback)
          create a new index and repository
 void delete()
           
 void deleteDocument(int documentID)
          Delete an existing document.
 int documentsIndexed()
          Returns the number of documents indexed so far in this session.
 int documentsSeen()
          Returns the number of documents considered for indexing, which is the sum of the documents indexed and the documents skipped.
protected  void finalize()
           
protected static long getCPtr(IndexEnvironment obj)
           
 Specification getFileClassSpec(java.lang.String name)
          Get a named file class.
 void open(java.lang.String repositoryPath)
          open an existing index and repository
 void open(java.lang.String repositoryPath, IndexStatus callback)
          open an existing index and repository
 void setAnchorTextPath(java.lang.String anchorTextRoot)
          Set anchor text root path.
 void setDocumentRoot(java.lang.String documentRoot)
          Set the document root path
 void setIndexedFields(java.lang.String[] fieldNames)
          Set names of fields to be indexed.
 void setMemory(long memory)
          set the amount of memory to use for internal structures
 void setMetadataIndexedFields(java.lang.String[] forward, java.lang.String[] backward)
          Set names of metadata fields to be indexed for fast retrieval.
 void setNormalization(boolean normalize)
          set normalization of case and some punctuation; default is true (normalize during indexing and at query time)
 void setNumericField(java.lang.String fieldName, boolean isNumeric)
          Set the numeric property of a field.
 void setNumericField(java.lang.String fieldName, boolean isNumeric, java.lang.String parserName)
          Set the numeric property of a field.
 void setOffsetAnnotationsPath(java.lang.String offsetAnnotationsRoot)
          Set offset annotations root path.
 void setOffsetMetadataPath(java.lang.String offsetMetadataRoot)
          Set offset metadata root path.
 void setStemmer(java.lang.String stemmer)
          set the stemmer to use
 void setStopwords(java.lang.String[] stopwords)
          set the list of stopwords
 void setStoreDocs(boolean flag)
          set the storeDocs flag
 
Methods inherited from class java.lang.Object
clone, equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

swigCPtr

private long swigCPtr

swigCMemOwn

protected boolean swigCMemOwn
Constructor Detail

IndexEnvironment

protected IndexEnvironment(long cPtr,
                           boolean cMemoryOwn)

IndexEnvironment

public IndexEnvironment()
Method Detail

getCPtr

protected static long getCPtr(IndexEnvironment obj)

finalize

protected void finalize()
Overrides:
finalize in class java.lang.Object

delete

public void delete()

setDocumentRoot

public void setDocumentRoot(java.lang.String documentRoot)
                     throws java.lang.Exception
Set the document root path

Parameters:
documentRoot - path to document root.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setAnchorTextPath

public void setAnchorTextPath(java.lang.String anchorTextRoot)
                       throws java.lang.Exception
Set anchor text root path.

Parameters:
anchorTextRoot - path to anchor text root.
Throws:
java.lang.Exception - if

setOffsetMetadataPath

public void setOffsetMetadataPath(java.lang.String offsetMetadataRoot)
                           throws java.lang.Exception
Set offset metadata root path.

Parameters:
offsetMetadataRoot - path to offset metadata root.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setOffsetAnnotationsPath

public void setOffsetAnnotationsPath(java.lang.String offsetAnnotationsRoot)
                              throws java.lang.Exception
Set offset annotations root path.

Parameters:
offsetAnnotationsRoot - path to offset annotations root.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addFileClass

public void addFileClass(java.lang.String name,
                         java.lang.String iterator,
                         java.lang.String parser,
                         java.lang.String tokenizer,
                         java.lang.String startDocTag,
                         java.lang.String endDocTag,
                         java.lang.String endMetadataTag,
                         java.lang.String[] include,
                         java.lang.String[] exclude,
                         java.lang.String[] index,
                         java.lang.String[] metadata,
                         java.util.Map conflations)
                  throws java.lang.Exception
Add parsing information for a file class. Data for these parameters is passed into the FileClassEnvironmentFactory

Parameters:
name - name of this file class, eg trecweb
iterator - document iterator for this file class
parser - document parser for this file class
tokenizer - document tokenizer for this file class
startDocTag - tag indicating start of a document
endDocTag - tag indicating the end of a document
endMetadataTag - tag indicating the end of the metadata fields
include - default tags whose contents should be included in the index
exclude - tags whose contents should be excluded from the index
index - tags that should be forwarded to the index for tag extents
metadata - tags whose contents should be indexed as metadata
conflations - tags that should be conflated
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

getFileClassSpec

public Specification getFileClassSpec(java.lang.String name)
                               throws java.lang.Exception
Get a named file class.

Parameters:
name - The name of the file class to retrieve.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addFileClass

public void addFileClass(Specification spec)
                  throws java.lang.Exception
Add a file class.

Parameters:
spec - The file class to add.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

deleteDocument

public void deleteDocument(int documentID)
                    throws java.lang.Exception
Delete an existing document.

Parameters:
documentID - The document to delete.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setIndexedFields

public void setIndexedFields(java.lang.String[] fieldNames)
                      throws java.lang.Exception
Set names of fields to be indexed. This call indicates to the index that information about these fields should be stored in the index so they can be used in queries. This does not affect whether or not the text in a particular field is stored in an index.

Parameters:
fieldNames - the list of fields.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setNumericField

public void setNumericField(java.lang.String fieldName,
                            boolean isNumeric,
                            java.lang.String parserName)
                     throws java.lang.Exception
Set the numeric property of a field.

Parameters:
fieldName - the field.
isNumeric - true if the field is a numeric field, false if not.
parserName - The name of the Transformation to use to compute the numeric value of the field. Repository currently recognizes the name NumericFieldAnnotator.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setNumericField

public void setNumericField(java.lang.String fieldName,
                            boolean isNumeric)
                     throws java.lang.Exception
Set the numeric property of a field.

Parameters:
fieldName - the field.
isNumeric - true if the field is a numeric field, false if not.
parserName - The name of the Transformation to use to compute the numeric value of the field. Repository currently recognizes the name NumericFieldAnnotator.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setMetadataIndexedFields

public void setMetadataIndexedFields(java.lang.String[] forward,
                                     java.lang.String[] backward)
                              throws java.lang.Exception
Set names of metadata fields to be indexed for fast retrieval. The forward fields are indexed in a B-Tree mapping (documentID, metadataValue). If a field is not forward indexed, the documentMetadata calls will still work, but they will be slower (the document has to be retrieved, decompressed and parsed to get the metadata back, instead of just a B-Tree lookup). The backward indexed fields store a mapping of (metadataValue, documentID). If a field is not backward indexed, the documentIDsFromMetadata and documentFromMetadata calls will not work.

Parameters:
forward - the list of fields to forward index.
backward - the list of fields to backward index.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setStopwords

public void setStopwords(java.lang.String[] stopwords)
                  throws java.lang.Exception
set the list of stopwords

Parameters:
stopwords - the list of stopwords
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setStemmer

public void setStemmer(java.lang.String stemmer)
                throws java.lang.Exception
set the stemmer to use

Parameters:
stemmer - the stemmer to use. One of krovetz, porter
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setMemory

public void setMemory(long memory)
               throws java.lang.Exception
set the amount of memory to use for internal structures

Parameters:
memory - the number of bytes to use.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setNormalization

public void setNormalization(boolean normalize)
                      throws java.lang.Exception
set normalization of case and some punctuation; default is true (normalize during indexing and at query time)

Parameters:
normalize - True, if text should be normalized, false otherwise.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

setStoreDocs

public void setStoreDocs(boolean flag)
                  throws java.lang.Exception
set the storeDocs flag

Parameters:
flag, - false to not store documents in the compressed collection, true to do so (default)
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

create

public void create(java.lang.String repositoryPath,
                   IndexStatus callback)
            throws java.lang.Exception
create a new index and repository

Parameters:
repositoryPath - the path to the repository
callback - IndexStatus object to be notified of indexing progress.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

create

public void create(java.lang.String repositoryPath)
            throws java.lang.Exception
create a new index and repository

Parameters:
repositoryPath - the path to the repository
callback - IndexStatus object to be notified of indexing progress.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

open

public void open(java.lang.String repositoryPath,
                 IndexStatus callback)
          throws java.lang.Exception
open an existing index and repository

Parameters:
repositoryPath - the path to the repository
callback - IndexStatus object to be notified of indexing progress.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

open

public void open(java.lang.String repositoryPath)
          throws java.lang.Exception
open an existing index and repository

Parameters:
repositoryPath - the path to the repository
callback - IndexStatus object to be notified of indexing progress.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

close

public void close()
           throws java.lang.Exception
close the index and repository

Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addFile

public void addFile(java.lang.String fileName)
             throws java.lang.Exception
Add the text in a file to the index and repository. The fileClass of this file will be chosen based on the file extension. If the file has no extension, it will be skipped. Information about indexing progress will be passed to the callback.

Parameters:
fileName - the file to add
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addFile

public void addFile(java.lang.String fileName,
                    java.lang.String fileClass)
             throws java.lang.Exception
add a file of the specified file class to the index and repository

Parameters:
fileName - the file to add
fileClass - the file class to add (eg trecweb).
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addString

public int addString(java.lang.String fileName,
                     java.lang.String fileClass,
                     java.util.Map metadata)
              throws java.lang.Exception
Adds a string to the index and repository. The documentString is assumed to contain the kind of text that would be found in a file of type fileClass.

Parameters:
fileName - the document to add
fileClass - the file class to add (eg trecweb).
metadata - the metadata pairs associated with the string.
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addString

public int addString(java.lang.String documentString,
                     java.lang.String fileClass,
                     java.util.Map metadata,
                     TagExtent[] tags)
              throws java.lang.Exception
add an already parsed document to the index and repository

Parameters:
document - the document to add
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

addParsedDocument

public int addParsedDocument(ParsedDocument document)
                      throws java.lang.Exception
add an already parsed document to the index and repository

Parameters:
document - the document to add
Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

documentsIndexed

public int documentsIndexed()
                     throws java.lang.Exception
Returns the number of documents indexed so far in this session.

Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.

documentsSeen

public int documentsSeen()
                  throws java.lang.Exception
Returns the number of documents considered for indexing, which is the sum of the documents indexed and the documents skipped.

Throws:
java.lang.Exception - if a lemur::api::Exception was thrown by the JNI library.