public class AceToken
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static int |
CASE_ALLCAPS |
static int |
CASE_ALLCAPSORDOTS |
static int |
CASE_ALLDIGITS |
static int |
CASE_ALLDIGITSORDOTS |
static int |
CASE_CAPINI |
static int |
CASE_INCAP |
static int |
CASE_OTHER |
static StringDictionary |
LEMMAS
Dictionary for all lemmas in the corpus
|
static StringDictionary |
OTHERS
Dictionary for all other strings in the corpus
|
static java.util.Map<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> |
PROX_CLASSES
Map of all proximity classes
|
static StringDictionary |
WORDS
Dictionary for all words in the corpus
|
Constructor and Description |
---|
AceToken(java.lang.String word,
java.lang.String lemma,
java.lang.String pos,
java.lang.String chunk,
java.lang.String nerc,
java.lang.String start,
java.lang.String end,
int sentence)
Constructs an AceToken from a tokenized line generated by Tokey
|
Modifier and Type | Method and Description |
---|---|
int |
adjustPhrasePositions(int offsetToSubtract,
java.lang.String word)
Recomputes start/end phrase positions by removing SGML tag strings This is
required because ACE annotations skip over SGML tags when computing
positions in stream, hence annotations do not match with our preprocessing
positions, which count everything
|
java.lang.String |
display()
Pretty display
|
static boolean |
exists(java.util.Map<java.lang.String,java.lang.String> dict,
java.lang.String elem)
Verifies if the given string exists in the given dictionary
|
int |
getByteEnd() |
Span |
getByteOffset() |
int |
getByteStart() |
int |
getCase() |
int |
getChunk() |
int |
getLemma() |
java.lang.String |
getLiteral() |
java.lang.String |
getMassiBbn() |
java.lang.String |
getMassiClass() |
java.lang.String |
getMassiWnss() |
int |
getNerc() |
int |
getPos() |
int |
getRawByteEnd() |
Span |
getRawByteOffset() |
int |
getRawByteStart() |
int |
getSentence() |
int[] |
getSuffixes() |
int |
getWord() |
static boolean |
isFirstName(java.lang.String lower) |
static boolean |
isLastName(java.lang.String lower) |
static boolean |
isLocation(java.lang.String lower) |
static boolean |
isSgml(java.lang.String s) |
static java.lang.String |
isTriggerWord(java.lang.String lower) |
static void |
loadGazetteers(java.lang.String dataPath) |
static void |
loadProximityClasses(java.lang.String proxFileName)
Loads all proximity classes from the hard disk The WORDS map must be
created before!
|
static java.lang.String |
removeSpaces(java.lang.String s) |
void |
setMassiBbn(java.lang.String i) |
void |
setMassiClass(java.lang.String i) |
void |
setMassiWnss(java.lang.String i) |
java.lang.String |
toString() |
public static final StringDictionary WORDS
public static final StringDictionary LEMMAS
public static final StringDictionary OTHERS
public static final java.util.Map<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> PROX_CLASSES
public static final int CASE_OTHER
public static final int CASE_ALLCAPS
public static final int CASE_ALLCAPSORDOTS
public static final int CASE_CAPINI
public static final int CASE_INCAP
public static final int CASE_ALLDIGITS
public static final int CASE_ALLDIGITSORDOTS
public AceToken(java.lang.String word, java.lang.String lemma, java.lang.String pos, java.lang.String chunk, java.lang.String nerc, java.lang.String start, java.lang.String end, int sentence)
public static void loadGazetteers(java.lang.String dataPath) throws java.io.FileNotFoundException, java.io.IOException
java.io.FileNotFoundException
java.io.IOException
public static boolean isLocation(java.lang.String lower)
public static boolean isFirstName(java.lang.String lower)
public static boolean isLastName(java.lang.String lower)
public static java.lang.String isTriggerWord(java.lang.String lower)
public static boolean exists(java.util.Map<java.lang.String,java.lang.String> dict, java.lang.String elem)
public static void loadProximityClasses(java.lang.String proxFileName) throws java.io.IOException
java.io.IOException
public java.lang.String getLiteral()
public int getWord()
public int getCase()
public int[] getSuffixes()
public int getLemma()
public int getPos()
public int getChunk()
public int getNerc()
public Span getByteOffset()
public int getByteStart()
public int getByteEnd()
public int getSentence()
public Span getRawByteOffset()
public int getRawByteStart()
public int getRawByteEnd()
public void setMassiClass(java.lang.String i)
public java.lang.String getMassiClass()
public void setMassiBbn(java.lang.String i)
public java.lang.String getMassiBbn()
public void setMassiWnss(java.lang.String i)
public java.lang.String getMassiWnss()
public static boolean isSgml(java.lang.String s)
public static java.lang.String removeSpaces(java.lang.String s)
public int adjustPhrasePositions(int offsetToSubtract, java.lang.String word)
public java.lang.String display()
public java.lang.String toString()
toString
in class java.lang.Object