public class IOBUtils extends Object
Modifier and Type | Field and Description |
---|---|
static String |
BeginSymbol |
static String |
ContinuationSymbol |
static String |
NosegSymbol |
static String |
RewriteSymbol |
static String |
RewriteTahSymbol
Deprecated.
use RewriteSymbol instead
|
static String |
RewriteTareefSymbol
Deprecated.
use RewriteSymbol instead
|
Modifier and Type | Method and Description |
---|---|
static String |
getBoundaryCharacter() |
static String |
IOBToString(List<CoreLabel> labeledSequence)
Convert a list of labeled characters to a String.
|
static String |
IOBToString(List<CoreLabel> labeledSequence,
String segmentationMarker)
Convert a list of labeled characters to a String.
|
static String |
IOBToString(List<CoreLabel> labeledSequence,
String prefixMarker,
String suffixMarker)
Convert a list of labeled characters to a String.
|
static void |
labelDomain(List<CoreLabel> tokenList,
String domain) |
static List<CoreLabel> |
StringToIOB(List<CoreLabel> tokenList,
Character segMarker,
boolean applyRewriteRules)
Convert a String to a list of characters suitable for labeling in an IOB
segmentation model.
|
static List<CoreLabel> |
StringToIOB(List<CoreLabel> tokenList,
Character segMarker,
boolean applyRewriteRules,
boolean stripRewrites)
Convert a String to a list of characters suitable for labeling in an IOB
segmentation model.
|
static List<CoreLabel> |
StringToIOB(String string)
This version is for turning an unsegmented string to an IOB input, i.e.,
for processing raw text.
|
static List<CoreLabel> |
StringToIOB(String str,
Character segMarker) |
public static final String BeginSymbol
public static final String ContinuationSymbol
public static final String NosegSymbol
public static final String RewriteSymbol
public static final String RewriteTahSymbol
public static final String RewriteTareefSymbol
public static String getBoundaryCharacter()
public static List<CoreLabel> StringToIOB(List<CoreLabel> tokenList, Character segMarker, boolean applyRewriteRules)
tokenList
- segMarker
- applyRewriteRules
- add rewrite labels (for training data)public static List<CoreLabel> StringToIOB(List<CoreLabel> tokenList, Character segMarker, boolean applyRewriteRules, boolean stripRewrites)
tokenList
- segMarker
- applyRewriteRules
- add rewrite labels (for training data)stripRewrites
- revert training data to old Green & DeNero model (remove
rewrite labels but still rewrite to try to preserve raw text)public static List<CoreLabel> StringToIOB(String string)
public static String IOBToString(List<CoreLabel> labeledSequence, String prefixMarker, String suffixMarker)
public static String IOBToString(List<CoreLabel> labeledSequence, String segmentationMarker)
public static String IOBToString(List<CoreLabel> labeledSequence)