public class MultiWordStringMatcher extends Object
Modifier and Type | Class and Description |
---|---|
static class |
MultiWordStringMatcher.LongestStringComparator |
static class |
MultiWordStringMatcher.MatchType
if
matchType is EXCT : match exact string
if matchType is EXCTWS : match exact string, except whitespace can match multiple whitespaces
if matchType is LWS : match case insensitive string, except whitespace can match multiple whitespaces
if matchType is LNRM : disregards punctuation, does case insensitive match
if matchType is REGEX : interprets string as regex already |
Modifier and Type | Field and Description |
---|---|
static Comparator<String> |
LONGEST_STRING_COMPARATOR |
Constructor and Description |
---|
MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType) |
MultiWordStringMatcher(String matchTypeStr) |
Modifier and Type | Method and Description |
---|---|
Pattern |
createPattern(String targetString) |
static List<IntPair> |
findOffsets(Pattern pattern,
String text)
Finds pattern in text and returns offsets
|
static List<IntPair> |
findOffsets(Pattern pattern,
String text,
int start,
int end)
Finds pattern in text span from character start to end (exclusive) and returns offsets
|
List<IntPair> |
findTargetStringOffsets(String text,
String targetString)
Finds target string in text and returns offsets
(matches based on set matchType)
|
List<IntPair> |
findTargetStringOffsets(String text,
String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(matches based on set matchType)
|
protected List<IntPair> |
findTargetStringOffsetsExct(String text,
String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets
(does EXCT string matching)
|
protected List<IntPair> |
findTargetStringOffsetsRegex(String text,
String targetString,
int start,
int end)
Finds target string in text and returns offsets using regular expressions
(matches based on set matchType)
|
String |
getExctWsRegex(String targetString) |
String |
getLnrmRegex(String targetString) |
String |
getLWsRegex(String targetString) |
MultiWordStringMatcher.MatchType |
getMatchType() |
Pattern |
getPattern(String targetString) |
Pattern |
getPattern(String[] targetStrings) |
String |
getRegex(String targetString) |
String |
getRegex(String[] targetStrings) |
protected String |
markTargetString(String text,
String targetString,
String beginMark,
String endMark,
boolean markOnlyIfSpace) |
String |
putSpacesAroundTargetString(String text,
String targetString)
Finds target string in text and put spaces around it so it will be matched with we match against tokens
|
void |
setMatchType(MultiWordStringMatcher.MatchType matchType) |
public static final Comparator<String> LONGEST_STRING_COMPARATOR
public MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType)
public MultiWordStringMatcher(String matchTypeStr)
public MultiWordStringMatcher.MatchType getMatchType()
public void setMatchType(MultiWordStringMatcher.MatchType matchType)
public String putSpacesAroundTargetString(String text, String targetString)
text
- - String in which to look for the target stringtargetString
- - Target string to look forprotected String markTargetString(String text, String targetString, String beginMark, String endMark, boolean markOnlyIfSpace)
protected List<IntPair> findTargetStringOffsetsExct(String text, String targetString, int start, int end)
text
- - String in which to look for the target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end searchprotected List<IntPair> findTargetStringOffsetsRegex(String text, String targetString, int start, int end)
text
- - String in which to find target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end searchpublic static List<IntPair> findOffsets(Pattern pattern, String text)
pattern
- - pattern to look fortext
- - String in which to look for the patternpublic static List<IntPair> findOffsets(Pattern pattern, String text, int start, int end)
pattern
- - pattern to look fortext
- - String in which to look for the patternstart
- - position to start searchend
- - position to end searchpublic List<IntPair> findTargetStringOffsets(String text, String targetString)
text
- - String in which to look for the target stringtargetString
- - Target string to look forpublic List<IntPair> findTargetStringOffsets(String text, String targetString, int start, int end)
text
- - String in which to look for the target stringtargetString
- - Target string to look forstart
- - position to start searchend
- - position to end search