public class ArabicSegmenter extends java.lang.Object implements WordSegmenter, ThreadsafeProcessor<java.lang.String,java.lang.String>
Constructor and Description |
---|
ArabicSegmenter(ArabicSegmenter other)
Copy constructor.
|
ArabicSegmenter(java.util.Properties props)
Make an Arabic Segmenter.
|
Modifier and Type | Method and Description |
---|---|
void |
finishTraining() |
static ArabicSegmenter |
getSegmenter(java.util.Properties options)
Train a new segmenter or load an trained model from file.
|
void |
initializeTraining(double numTrees) |
void |
loadSegmenter(java.lang.String filename) |
void |
loadSegmenter(java.lang.String filename,
java.util.Properties p) |
static void |
main(java.lang.String[] args) |
ThreadsafeProcessor<java.lang.String,java.lang.String> |
newInstance()
Return a new threadsafe instance.
|
java.lang.String |
process(java.lang.String nextInput)
Set the input item that will be processed when a thread is allocated to
this processor.
|
long |
segment(java.io.BufferedReader br,
java.io.PrintWriter pwOut)
Segment all strings from an input.
|
java.util.List<HasWord> |
segment(java.lang.String line) |
java.lang.String |
segmentString(java.lang.String line) |
java.util.List<CoreLabel> |
segmentStringToTokenList(java.lang.String line) |
void |
serializeSegmenter(java.lang.String filename) |
void |
train()
Train a segmenter from raw text.
|
void |
train(java.util.Collection<Tree> trees) |
void |
train(java.util.List<TaggedWord> sentence) |
void |
train(Tree tree) |
public ArabicSegmenter(java.util.Properties props)
props
- Options for how to tokenize. See the main method of ArabicTokenizer
for detailspublic ArabicSegmenter(ArabicSegmenter other)
other
- public void initializeTraining(double numTrees)
initializeTraining
in interface WordSegmenter
public void train(java.util.Collection<Tree> trees)
train
in interface WordSegmenter
public void train(Tree tree)
train
in interface WordSegmenter
public void train(java.util.List<TaggedWord> sentence)
train
in interface WordSegmenter
public void finishTraining()
finishTraining
in interface WordSegmenter
public java.lang.String process(java.lang.String nextInput)
ThreadsafeProcessor
process
in interface ThreadsafeProcessor<java.lang.String,java.lang.String>
nextInput
- the object to be processedpublic ThreadsafeProcessor<java.lang.String,java.lang.String> newInstance()
ThreadsafeProcessor
newInstance
in interface ThreadsafeProcessor<java.lang.String,java.lang.String>
public java.util.List<HasWord> segment(java.lang.String line)
segment
in interface WordSegmenter
public java.util.List<CoreLabel> segmentStringToTokenList(java.lang.String line)
public java.lang.String segmentString(java.lang.String line)
public long segment(java.io.BufferedReader br, java.io.PrintWriter pwOut)
br
- -- input stream to segmentpwOut
- -- output stream to write the segmenter textpublic void train()
public void serializeSegmenter(java.lang.String filename)
public void loadSegmenter(java.lang.String filename, java.util.Properties p)
public void loadSegmenter(java.lang.String filename)
loadSegmenter
in interface WordSegmenter
public static void main(java.lang.String[] args)
args
- public static ArabicSegmenter getSegmenter(java.util.Properties options)
options
- Properties to specify segmenter behavior