|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectuk.ac.shef.dcs.oak.jate.core.feature.indexer.GlobalIndex
public abstract class GlobalIndex
Index terms (term canonical form, and variants), documents, and the containing/occur-in relation between terms and documents
Field Summary | |
---|---|
protected int |
_docCounter
|
protected int |
_termCounter
|
protected int |
_variantCounter
|
Constructor Summary | |
---|---|
GlobalIndex()
|
Method Summary | |
---|---|
abstract java.util.Set<java.lang.Integer> |
getDocumentIds()
|
abstract java.util.Set<Document> |
getDocuments()
|
abstract java.util.Set<java.lang.Integer> |
getTermCanonicalIds()
|
abstract java.util.Set<java.lang.String> |
getTermsCanonical()
|
abstract java.util.Set<java.lang.Integer> |
getTermVariantIds()
|
abstract java.util.Set<java.lang.String> |
getTermVariants()
|
protected abstract int |
indexDocument(Document d)
Given a document, index it and return its id. |
protected abstract void |
indexDocWithTermsCanonical(Document d,
java.util.Set<java.lang.String> terms)
Given a document d which contains a set of terms (canonical form), index the binary relation "document contains term canonical" |
protected abstract void |
indexDocWithTermsCanonical(int d,
java.util.Set<java.lang.Integer> terms)
Given a document with id d which contains a set of terms (canonical form), index the binary relation "document contains term canonical" |
protected abstract int |
indexTermCanonical(java.lang.String term)
Given a candidate term's canonical form, index it and return its id. |
protected abstract void |
indexTermCanonicalInDoc(int t,
int d)
Given a candidate term's canonical form id t found in document with id d, index the binary relation "t found_in d" |
protected abstract void |
indexTermCanonicalInDoc(java.lang.String t,
Document d)
Given a candidate term's canonical form t found in document d, index the binary relation "t found_in d" |
protected abstract int |
indexTermVariant(java.lang.String termV)
Given a candidate term variant, index it and return its id. |
protected abstract void |
indexTermWithVariant(java.util.Map<java.lang.String,java.util.Set<java.lang.String>> map)
Given a map containing [term canonical form - term variant forms], index the mapping |
abstract int |
retrieveCanonicalOfTermVariant(java.lang.String termVar)
Given a term variant form, retrieve its canonical form |
abstract java.util.Set<java.lang.Integer> |
retrieveDocIdsContainingTermCanonical(int id)
|
abstract java.util.Set<java.lang.Integer> |
retrieveDocIdsContainingTermCanonical(java.lang.String t)
|
abstract java.util.Set<Document> |
retrieveDocsContainingTermCanonical(int t)
|
abstract java.util.Set<Document> |
retrieveDocsContainingTermCanonical(java.lang.String t)
|
abstract int |
retrieveDocument(Document d)
Given a document, return its id. |
abstract Document |
retrieveDocument(int id)
Given a document id return the document |
abstract java.lang.String |
retrieveTermCanonical(int id)
Given an id, retrieve the candidate term's canonical form |
abstract int |
retrieveTermCanonical(java.lang.String term)
Given a candidate term's canonical form, return its id. |
abstract java.util.Set<java.lang.Integer> |
retrieveTermCanonicalIdsInDoc(Document d)
|
abstract java.util.Set<java.lang.Integer> |
retrieveTermCanonicalIdsInDoc(int d)
|
abstract java.util.Set<java.lang.String> |
retrieveTermCanonicalInDoc(int d)
|
abstract java.util.Set<java.lang.String> |
retrieveTermsCanonicalInDoc(Document d)
|
protected abstract java.lang.String |
retrieveTermVariant(int id)
Given an id of a candidate term variant, retrieve the text |
abstract java.util.Set<java.lang.String> |
retrieveVariantsOfTermCanonical(java.lang.String term)
Given a term canonical form, retrieve its variant forms found in the corpus |
abstract int |
sizeDocHasTerms(Document d)
|
abstract int |
sizeDocHasTerms(int d)
|
abstract int |
sizeTermInDocs(int t)
|
abstract int |
sizeTermInDocs(java.lang.String t)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected int _termCounter
protected int _variantCounter
protected int _docCounter
Constructor Detail |
---|
public GlobalIndex()
Method Detail |
---|
protected abstract int indexTermCanonical(java.lang.String term)
term
-
public abstract java.lang.String retrieveTermCanonical(int id)
id
-
public abstract int retrieveTermCanonical(java.lang.String term)
term
-
public abstract java.util.Set<java.lang.Integer> getTermCanonicalIds()
public abstract java.util.Set<java.lang.String> getTermsCanonical()
protected abstract int indexTermVariant(java.lang.String termV)
termV
-
protected abstract java.lang.String retrieveTermVariant(int id)
id
-
public abstract int retrieveCanonicalOfTermVariant(java.lang.String termVar)
termVar
-
public abstract java.util.Set<java.lang.Integer> getTermVariantIds()
public abstract java.util.Set<java.lang.String> getTermVariants()
protected abstract int indexDocument(Document d)
d
-
public abstract Document retrieveDocument(int id)
id
-
public abstract int retrieveDocument(Document d)
d
-
public abstract java.util.Set<Document> getDocuments()
public abstract java.util.Set<java.lang.Integer> getDocumentIds()
protected abstract void indexTermWithVariant(java.util.Map<java.lang.String,java.util.Set<java.lang.String>> map)
map
- public abstract java.util.Set<java.lang.String> retrieveVariantsOfTermCanonical(java.lang.String term)
term
-
protected abstract void indexTermCanonicalInDoc(java.lang.String t, Document d)
t
- d
- protected abstract void indexTermCanonicalInDoc(int t, int d)
t
- d
- public abstract java.util.Set<java.lang.Integer> retrieveDocIdsContainingTermCanonical(java.lang.String t)
t
- the candidate term's canonical form in question
public abstract java.util.Set<java.lang.Integer> retrieveDocIdsContainingTermCanonical(int id)
id
- the candidate term's canonical form in questoin
public abstract java.util.Set<Document> retrieveDocsContainingTermCanonical(java.lang.String t)
t
- the candidate term's canonical form in question
public abstract java.util.Set<Document> retrieveDocsContainingTermCanonical(int t)
t
- the candidate term's canonical form id in question
public abstract int sizeTermInDocs(java.lang.String t)
t
- the candidate term's canonical form
public abstract int sizeTermInDocs(int t)
t
- the id of candidate term's canonical form
protected abstract void indexDocWithTermsCanonical(Document d, java.util.Set<java.lang.String> terms)
d
- terms
- canonical forms of candidate terms found in document dprotected abstract void indexDocWithTermsCanonical(int d, java.util.Set<java.lang.Integer> terms)
d
- id of documentterms
- canonical forms of candidate terms found in document dpublic abstract java.util.Set<java.lang.Integer> retrieveTermCanonicalIdsInDoc(Document d)
d
-
public abstract java.util.Set<java.lang.Integer> retrieveTermCanonicalIdsInDoc(int d)
d
-
public abstract java.util.Set<java.lang.String> retrieveTermsCanonicalInDoc(Document d)
d
-
public abstract java.util.Set<java.lang.String> retrieveTermCanonicalInDoc(int d)
d
-
public abstract int sizeDocHasTerms(Document d)
d
-
public abstract int sizeDocHasTerms(int d)
d
-
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |