public class QuoteAnnotator extends java.lang.Object implements Annotator
Considers regular ascii ("", '', ``'', and `') as well as "smart" and international quotation marks as follows: “”,‘’, «», ‹›, 「」, 『』, „”, and ‚’.
Note: extracts everything within these pairs as a whole quote segment, which may or may not be the desired behaviour for texts that use different formatting styles than standard english ones.
There are a number of options that can be passed to the quote annotator to customize its' behaviour:
Modifier and Type | Field and Description |
---|---|
boolean |
ALLOW_EMBEDDED_SAME |
boolean |
ASCII_QUOTES |
boolean |
ATTRIBUTE_QUOTES |
static java.util.Map<java.lang.String,java.lang.String> |
DIRECTED_QUOTES |
boolean |
EXTRACT_UNCLOSED |
int |
MAX_LENGTH |
QuoteAttributionAnnotator |
quoteAttributionAnnotator |
boolean |
SMART_QUOTES |
boolean |
USE_SINGLE |
DEFAULT_REQUIREMENTS, STANFORD_CDC_TOKENIZE, STANFORD_CLEAN_XML, STANFORD_COLUMN_DATA_CLASSIFIER, STANFORD_COREF, STANFORD_COREF_MENTION, STANFORD_DEPENDENCIES, STANFORD_DETERMINISTIC_COREF, STANFORD_DOCDATE, STANFORD_ENTITY_MENTIONS, STANFORD_GENDER, STANFORD_KBP, STANFORD_LEMMA, STANFORD_LINK, STANFORD_MWT, STANFORD_NATLOG, STANFORD_NER, STANFORD_OPENIE, STANFORD_PARSE, STANFORD_POS, STANFORD_QUOTE, STANFORD_QUOTE_ATTRIBUTION, STANFORD_REGEXNER, STANFORD_RELATION, STANFORD_SENTIMENT, STANFORD_SSPLIT, STANFORD_TOKENIZE, STANFORD_TOKENSREGEX, STANFORD_TRUECASE, STANFORD_UD_FEATURES
Constructor and Description |
---|
QuoteAnnotator(java.util.Properties props)
Return a QuoteAnnotator that isolates quotes denoted by the
ASCII characters " and ' as well as a variety of smart and international quotes.
|
QuoteAnnotator(java.lang.String name,
java.util.Properties props)
Return a QuoteAnnotator that isolates quotes denoted by the
ASCII characters " and '.
|
QuoteAnnotator(java.lang.String name,
java.util.Properties props,
boolean verbose)
Return a QuoteAnnotator that isolates quotes denoted by the
ASCII characters " and '.
|
Modifier and Type | Method and Description |
---|---|
void |
annotate(Annotation annotation)
Given an Annotation, perform a task on this Annotation.
|
static java.util.List<CoreMap> |
gatherQuotes(CoreMap curr)
Helper method to recursively gather all embedded quotes.
|
static java.util.List<CoreMap> |
getCoreMapQuotes(java.util.List<Pair<java.lang.Integer,java.lang.Integer>> quotes,
java.util.List<CoreLabel> tokens,
java.util.List<CoreMap> sentences,
java.lang.String text,
java.lang.String docID,
boolean unclosed) |
static java.util.Comparator<CoreMap> |
getQuoteComparator() |
Pair<java.util.List<Pair<java.lang.Integer,java.lang.Integer>>,java.util.List<Pair<java.lang.Integer,java.lang.Integer>>> |
getQuotes(java.lang.String text) |
static boolean |
isSingleQuote(java.lang.String c) |
static boolean |
isWhitespaceOrPunct(java.lang.String c) |
static Annotation |
makeQuote(java.lang.String surfaceForm,
int begin,
int end,
java.util.List<CoreLabel> quoteTokens,
int tokenOffset,
int sentenceBeginIndex,
int sentenceEndIndex,
java.lang.String docID) |
Pair<java.util.List<Pair<java.lang.Integer,java.lang.Integer>>,java.util.List<Pair<java.lang.Integer,java.lang.Integer>>> |
recursiveQuotes(java.lang.String text,
int offset,
java.lang.String prevQuote) |
static java.lang.String |
replaceUnicode(java.lang.String text) |
java.util.Set<java.lang.Class<? extends CoreAnnotation>> |
requirementsSatisfied()
Returns a set of requirements for which tasks this annotator can
provide.
|
java.util.Set<java.lang.Class<? extends CoreAnnotation>> |
requires()
Returns the set of tasks which this annotator requires in order
to perform.
|
static java.lang.String |
xmlFreeText(java.lang.String documentText,
Annotation annotation)
helper method for creating version of document text without xml.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
exactRequirements, unmount
public boolean USE_SINGLE
public int MAX_LENGTH
public boolean ASCII_QUOTES
public boolean ALLOW_EMBEDDED_SAME
public boolean SMART_QUOTES
public boolean EXTRACT_UNCLOSED
public boolean ATTRIBUTE_QUOTES
public QuoteAttributionAnnotator quoteAttributionAnnotator
public static final java.util.Map<java.lang.String,java.lang.String> DIRECTED_QUOTES
public QuoteAnnotator(java.lang.String name, java.util.Properties props)
name
- String that is ignored but allows for creation of the
QuoteAnnotator via a customAnnotatorClassprops
- Properties object that contains the customizable properties
attributes.public QuoteAnnotator(java.util.Properties props)
props
- Properties object that contains the customizable properties
attributes.public QuoteAnnotator(java.lang.String name, java.util.Properties props, boolean verbose)
props
- Properties object that contains the customizable properties
attributes.verbose
- whether or not to output verbose information.public static java.lang.String xmlFreeText(java.lang.String documentText, Annotation annotation)
public void annotate(Annotation annotation)
Annotator
public static java.lang.String replaceUnicode(java.lang.String text)
public static java.util.Comparator<CoreMap> getQuoteComparator()
public static java.util.List<CoreMap> getCoreMapQuotes(java.util.List<Pair<java.lang.Integer,java.lang.Integer>> quotes, java.util.List<CoreLabel> tokens, java.util.List<CoreMap> sentences, java.lang.String text, java.lang.String docID, boolean unclosed)
public static Annotation makeQuote(java.lang.String surfaceForm, int begin, int end, java.util.List<CoreLabel> quoteTokens, int tokenOffset, int sentenceBeginIndex, int sentenceEndIndex, java.lang.String docID)
public Pair<java.util.List<Pair<java.lang.Integer,java.lang.Integer>>,java.util.List<Pair<java.lang.Integer,java.lang.Integer>>> getQuotes(java.lang.String text)
public Pair<java.util.List<Pair<java.lang.Integer,java.lang.Integer>>,java.util.List<Pair<java.lang.Integer,java.lang.Integer>>> recursiveQuotes(java.lang.String text, int offset, java.lang.String prevQuote)
public static boolean isWhitespaceOrPunct(java.lang.String c)
public static boolean isSingleQuote(java.lang.String c)
public java.util.Set<java.lang.Class<? extends CoreAnnotation>> requires()
Annotator
public java.util.Set<java.lang.Class<? extends CoreAnnotation>> requirementsSatisfied()
Annotator
requirementsSatisfied
in interface Annotator