public class MultiWordStringMatcher
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
MultiWordStringMatcher.LongestStringComparator |
static class |
MultiWordStringMatcher.MatchType
if
matchType is EXCT : match exact string
if matchType is EXCTWS : match exact string, except whitespace can match multiple whitespaces
if matchType is LWS : match case insensitive string, except whitespace can match multiple whitespaces
if matchType is LNRM : disregards punctuation, does case insensitive match
if matchType is REGEX : interprets string as regex already |
Modifier and Type | Field and Description |
---|---|
static java.util.Comparator<java.lang.String> |
LONGEST_STRING_COMPARATOR |
Constructor and Description |
---|
MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType) |
MultiWordStringMatcher(java.lang.String matchTypeStr) |
Modifier and Type | Method and Description |
---|---|
java.util.regex.Pattern |
createPattern(java.lang.String targetString) |
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text)
Finds pattern in text and returns offsets.
|
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text,
int start,
int end)
Finds pattern in text span from character start to end (exclusive) and returns offsets.
|
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString)
Finds target string in text and returns offsets
(matches based on set matchType).
|
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(matches based on set matchType).
|
protected static java.util.List<IntPair> |
findTargetStringOffsetsExct(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(does EXCT string matching).
|
protected java.util.List<IntPair> |
findTargetStringOffsetsRegex(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text and returns offsets using regular expressions
(matches based on set matchType).
|
static java.lang.String |
getExctWsRegex(java.lang.String targetString) |
static java.lang.String |
getLnrmRegex(java.lang.String targetString) |
static java.lang.String |
getLWsRegex(java.lang.String targetString) |
MultiWordStringMatcher.MatchType |
getMatchType() |
java.util.regex.Pattern |
getPattern(java.lang.String targetString) |
java.util.regex.Pattern |
getPattern(java.lang.String[] targetStrings) |
java.lang.String |
getRegex(java.lang.String targetString) |
java.lang.String |
getRegex(java.lang.String[] targetStrings) |
protected static java.lang.String |
markTargetString(java.lang.String text,
java.lang.String targetString,
java.lang.String beginMark,
java.lang.String endMark,
boolean markOnlyIfSpace) |
static java.lang.String |
putSpacesAroundTargetString(java.lang.String text,
java.lang.String targetString)
Finds target string in text and put spaces around it so it will be matched with we match against tokens.
|
void |
setMatchType(MultiWordStringMatcher.MatchType matchType) |
public static final java.util.Comparator<java.lang.String> LONGEST_STRING_COMPARATOR
public MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType)
public MultiWordStringMatcher(java.lang.String matchTypeStr)
public MultiWordStringMatcher.MatchType getMatchType()
public void setMatchType(MultiWordStringMatcher.MatchType matchType)
public static java.lang.String putSpacesAroundTargetString(java.lang.String text, java.lang.String targetString)
text
- - String in which to look for the target stringtargetString
- - Target string to look forprotected static java.lang.String markTargetString(java.lang.String text, java.lang.String targetString, java.lang.String beginMark, java.lang.String endMark, boolean markOnlyIfSpace)
protected static java.util.List<IntPair> findTargetStringOffsetsExct(java.lang.String text, java.lang.String targetString, int start, int end)
text
- - String in which to look for the target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end searchpublic java.util.regex.Pattern getPattern(java.lang.String[] targetStrings)
public java.lang.String getRegex(java.lang.String[] targetStrings)
public java.util.regex.Pattern getPattern(java.lang.String targetString)
public java.util.regex.Pattern createPattern(java.lang.String targetString)
public java.lang.String getRegex(java.lang.String targetString)
public static java.lang.String getExctWsRegex(java.lang.String targetString)
public static java.lang.String getLWsRegex(java.lang.String targetString)
public static java.lang.String getLnrmRegex(java.lang.String targetString)
protected java.util.List<IntPair> findTargetStringOffsetsRegex(java.lang.String text, java.lang.String targetString, int start, int end)
text
- - String in which to find target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end searchpublic static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern, java.lang.String text)
pattern
- - pattern to look fortext
- - String in which to look for the patternpublic static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern, java.lang.String text, int start, int end)
pattern
- - pattern to look fortext
- - String in which to look for the patternstart
- - position to start searchend
- - position to end searchpublic java.util.List<IntPair> findTargetStringOffsets(java.lang.String text, java.lang.String targetString)
text
- - String in which to look for the target stringtargetString
- - Target string to look forpublic java.util.List<IntPair> findTargetStringOffsets(java.lang.String text, java.lang.String targetString, int start, int end)
text
- - String in which to look for the target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end search