|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectuk.ac.shef.dcs.oak.jate.core.feature.indexer.GlobalIndex
uk.ac.shef.dcs.oak.jate.core.feature.indexer.GlobalIndexMem
public class GlobalIndexMem
GlobalIndexMem stores information (in-memory) of binary relations between candidate terms (word or phrase) and corpus. These
include:
- candidate term canonical forms and their int ids
- candidate term variant forms and their int ids
- mapping from candidate term canonical form to variant forms
- corpus elements (document) and their int ids
- candidate term and their containing documents (id - ids)
- document ids and their contained candidate terms (id - ids)
Field Summary | |
---|---|
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
_doc2Terms
|
protected java.util.HashMap<Document,java.lang.Integer> |
_docIdMap
|
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
_term2Docs
|
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
_term2Variants
|
protected java.util.HashMap<java.lang.String,java.lang.Integer> |
_termIdMap
|
protected java.util.Map<java.lang.Integer,java.lang.Integer> |
_variant2term
|
protected java.util.HashMap<java.lang.String,java.lang.Integer> |
_variantIdMap
|
Fields inherited from class uk.ac.shef.dcs.oak.jate.core.feature.indexer.GlobalIndex |
---|
_docCounter, _termCounter, _variantCounter |
Constructor Summary | |
---|---|
protected |
GlobalIndexMem()
|
Method Summary | |
---|---|
java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
getDoc2Terms()
|
java.util.Map<Document,java.lang.Integer> |
getDocIdMap()
|
java.util.Set<java.lang.Integer> |
getDocumentIds()
|
java.util.Set<Document> |
getDocuments()
|
java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
getTerm2Docs()
|
java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> |
getTerm2Variants()
|
java.util.Set<java.lang.Integer> |
getTermCanonicalIds()
|
java.util.Map<java.lang.String,java.lang.Integer> |
getTermIdMap()
|
java.util.Set<java.lang.String> |
getTermsCanonical()
|
java.util.Set<java.lang.Integer> |
getTermVariantIds()
|
java.util.Set<java.lang.String> |
getTermVariants()
|
java.util.Map<java.lang.Integer,java.lang.Integer> |
getVariant2Term()
|
java.util.Map<java.lang.String,java.lang.Integer> |
getVariantIdMap()
|
protected int |
indexDocument(Document d)
Given a document, return its id. |
protected void |
indexDocWithTermsCanonical(Document d,
java.util.Set<java.lang.String> terms)
Given a document d which contains a set of terms (canonical form), index the binary relation "document contains term canonical" |
protected void |
indexDocWithTermsCanonical(int d,
java.util.Set<java.lang.Integer> terms)
Given a document with id d which contains a set of terms (canonical form), index the binary relation "document contains term canonical" |
protected int |
indexTermCanonical(java.lang.String term)
Given a candidate term's canonical form, return its id. |
protected void |
indexTermCanonicalInDoc(int t,
int d)
Given a candidate term's canonical form id t found in document with id d, index the binary relation "t found_in d" |
protected void |
indexTermCanonicalInDoc(java.lang.String t,
Document d)
Given a candidate term's canonical form t found in document d, index the binary relation "t found_in d" |
protected int |
indexTermVariant(java.lang.String termV)
Given a candidate term variant, index it and return its id. |
protected void |
indexTermWithVariant(java.util.Map<java.lang.String,java.util.Set<java.lang.String>> map)
Given a map containing [term canonical form - term variant forms], index the mapping, plus the mapping from term variant to term canonical |
int |
retrieveCanonicalOfTermVariant(java.lang.String termVar)
Given a term variant form, retrieve its canonical form |
java.util.Set<java.lang.Integer> |
retrieveDocIdsContainingTermCanonical(int id)
|
java.util.Set<java.lang.Integer> |
retrieveDocIdsContainingTermCanonical(java.lang.String t)
|
java.util.Set<Document> |
retrieveDocsContainingTermCanonical(int t)
|
java.util.Set<Document> |
retrieveDocsContainingTermCanonical(java.lang.String t)
|
int |
retrieveDocument(Document d)
Given a document, return its id. |
Document |
retrieveDocument(int id)
Given a document id return the document |
java.lang.String |
retrieveTermCanonical(int id)
Given an id, retrieve the candidate term's canonical form |
int |
retrieveTermCanonical(java.lang.String term)
Given a candidate term's canonical form, return its id. |
java.util.Set<java.lang.Integer> |
retrieveTermCanonicalIdsInDoc(Document d)
|
java.util.Set<java.lang.Integer> |
retrieveTermCanonicalIdsInDoc(int d)
|
java.util.Set<java.lang.String> |
retrieveTermCanonicalInDoc(int d)
|
java.util.Set<java.lang.String> |
retrieveTermsCanonicalInDoc(Document d)
|
protected java.lang.String |
retrieveTermVariant(int id)
Given an id of a candidate term variant, retrieve the text |
java.util.Set<java.lang.String> |
retrieveVariantsOfTermCanonical(java.lang.String term)
Given a term canonical form, retrieve its variant forms found in the corpus |
int |
sizeDocHasTerms(Document d)
|
int |
sizeDocHasTerms(int d)
|
int |
sizeTermInDocs(int t)
|
int |
sizeTermInDocs(java.lang.String t)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected java.util.HashMap<java.lang.String,java.lang.Integer> _termIdMap
protected java.util.HashMap<java.lang.String,java.lang.Integer> _variantIdMap
protected java.util.HashMap<Document,java.lang.Integer> _docIdMap
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> _term2Docs
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> _doc2Terms
protected java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> _term2Variants
protected java.util.Map<java.lang.Integer,java.lang.Integer> _variant2term
Constructor Detail |
---|
protected GlobalIndexMem()
Method Detail |
---|
public java.util.Map<java.lang.String,java.lang.Integer> getTermIdMap()
public java.util.Map<java.lang.String,java.lang.Integer> getVariantIdMap()
public java.util.Map<Document,java.lang.Integer> getDocIdMap()
public java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> getTerm2Docs()
public java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> getDoc2Terms()
public java.util.Map<java.lang.Integer,java.util.Set<java.lang.Integer>> getTerm2Variants()
public java.util.Map<java.lang.Integer,java.lang.Integer> getVariant2Term()
protected int indexTermCanonical(java.lang.String term)
indexTermCanonical
in class GlobalIndex
term
-
public int retrieveTermCanonical(java.lang.String term)
retrieveTermCanonical
in class GlobalIndex
term
-
public java.lang.String retrieveTermCanonical(int id)
retrieveTermCanonical
in class GlobalIndex
id
-
public java.util.Set<java.lang.Integer> getTermCanonicalIds()
getTermCanonicalIds
in class GlobalIndex
public java.util.Set<java.lang.String> getTermsCanonical()
getTermsCanonical
in class GlobalIndex
protected int indexTermVariant(java.lang.String termV)
indexTermVariant
in class GlobalIndex
termV
-
protected java.lang.String retrieveTermVariant(int id)
retrieveTermVariant
in class GlobalIndex
id
-
public java.util.Set<java.lang.Integer> getTermVariantIds()
getTermVariantIds
in class GlobalIndex
public java.util.Set<java.lang.String> getTermVariants()
getTermVariants
in class GlobalIndex
protected int indexDocument(Document d)
indexDocument
in class GlobalIndex
d
-
public int retrieveDocument(Document d)
retrieveDocument
in class GlobalIndex
d
-
public Document retrieveDocument(int id)
retrieveDocument
in class GlobalIndex
id
-
public java.util.Set<Document> getDocuments()
getDocuments
in class GlobalIndex
public java.util.Set<java.lang.Integer> getDocumentIds()
getDocumentIds
in class GlobalIndex
protected void indexTermWithVariant(java.util.Map<java.lang.String,java.util.Set<java.lang.String>> map)
indexTermWithVariant
in class GlobalIndex
map
- public java.util.Set<java.lang.String> retrieveVariantsOfTermCanonical(java.lang.String term)
retrieveVariantsOfTermCanonical
in class GlobalIndex
term
-
public int retrieveCanonicalOfTermVariant(java.lang.String termVar)
retrieveCanonicalOfTermVariant
in class GlobalIndex
termVar
-
protected void indexTermCanonicalInDoc(java.lang.String t, Document d)
indexTermCanonicalInDoc
in class GlobalIndex
t
- d
- protected void indexTermCanonicalInDoc(int t, int d)
indexTermCanonicalInDoc
in class GlobalIndex
t
- d
- public java.util.Set<java.lang.Integer> retrieveDocIdsContainingTermCanonical(java.lang.String t)
retrieveDocIdsContainingTermCanonical
in class GlobalIndex
t
- the candidate term's canonical form in question
public java.util.Set<java.lang.Integer> retrieveDocIdsContainingTermCanonical(int id)
retrieveDocIdsContainingTermCanonical
in class GlobalIndex
id
- the candidate term's canonical form in questoin
public java.util.Set<Document> retrieveDocsContainingTermCanonical(java.lang.String t)
retrieveDocsContainingTermCanonical
in class GlobalIndex
t
- the candidate term's canonical form in question
public java.util.Set<Document> retrieveDocsContainingTermCanonical(int t)
retrieveDocsContainingTermCanonical
in class GlobalIndex
t
- the candidate term's canonical form id in question
public int sizeTermInDocs(java.lang.String t)
sizeTermInDocs
in class GlobalIndex
t
- the candidate term's canonical form
public int sizeTermInDocs(int t)
sizeTermInDocs
in class GlobalIndex
t
- the id of candidate term's canonical form
protected void indexDocWithTermsCanonical(Document d, java.util.Set<java.lang.String> terms)
indexDocWithTermsCanonical
in class GlobalIndex
d
- terms
- canonical forms of candidate terms found in document dprotected void indexDocWithTermsCanonical(int d, java.util.Set<java.lang.Integer> terms)
indexDocWithTermsCanonical
in class GlobalIndex
d
- id of documentterms
- canonical forms of candidate terms found in document dpublic java.util.Set<java.lang.Integer> retrieveTermCanonicalIdsInDoc(Document d)
retrieveTermCanonicalIdsInDoc
in class GlobalIndex
d
-
public java.util.Set<java.lang.Integer> retrieveTermCanonicalIdsInDoc(int d)
retrieveTermCanonicalIdsInDoc
in class GlobalIndex
d
-
public java.util.Set<java.lang.String> retrieveTermsCanonicalInDoc(Document d)
retrieveTermsCanonicalInDoc
in class GlobalIndex
d
-
public java.util.Set<java.lang.String> retrieveTermCanonicalInDoc(int d)
retrieveTermCanonicalInDoc
in class GlobalIndex
d
-
public int sizeDocHasTerms(Document d)
sizeDocHasTerms
in class GlobalIndex
d
-
public int sizeDocHasTerms(int d)
sizeDocHasTerms
in class GlobalIndex
d
-
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |