edu.stanford.nlp.tagger.maxent
Class ExtractorFramesRare

java.lang.Object
  extended by edu.stanford.nlp.tagger.maxent.ExtractorFramesRare

public class ExtractorFramesRare
extends java.lang.Object

Maintains an array of ExtractorFrames for rare words. This file also defines all the rare word extractors as non-public classes.


Field Summary
static Extractor cAllCap
          "1" if token has no lower case letters
static Extractor cAllCapitalized
          "1" if token has only upper case letters
static Extractor cCapDist
          Distance to lowercase word
static Extractor cCompany
          "1" if capitalized and one of following 3 words is Inc., Co., or Corp.
static Extractor cLetterDigitDash
          "1" if word contains letter, digit, and dash, in any position and case
static Extractor cMidSentence
          "1" if not first word of sentence and _some_ letter is uppercase
static int[] countFeats
           
static Extractor cPluralAcronym
          "1" if a plural acronym: capital letters followed by 's'
static Extractor cUCaseLCase
          "0" if first letter isn't uppercase or if lowercase version isn't in dictionary.
static Extractor cUpperDigitDash
          "1" if word contains uppercase letter, digit, and dash
static Extractor cWordDash
          "1" iff word contains 1 or more dash characters (somewhere)
static Extractor cWordMidUCase
          "0" if first word of sentence or not first letter uppercase or if lowercase version isn't in dictionary.
static Extractor cWordNumber
          "1" iff word contains 1 or more digit characters (somewhere)
static Extractor cWordPref1
          First 1-4 characters of word
static Extractor cWordPref2
           
static Extractor cWordPref3
           
static Extractor cWordPref4
           
static Extractor cWordStartUCase
          "0" if not 1st word of sentence or not upper case, or lowercased version not in dictionary.
static Extractor cWordSuff1
          Last 1-4 characters of word
static Extractor cWordSuff2
           
static Extractor cWordSuff3
           
static Extractor cWordSuff4
           
static Extractor cWordUppCase
          "1" iff word contains 1 or more upper case characters (somewhere)
static Extractor[] eFrames
           
static int[] nums
           
static int size
           
static java.lang.String zeroSt
           
 
Constructor Summary
ExtractorFramesRare()
           
 
Method Summary
static void addASBCprefeatures(int n)
           
static void addASBCsuffeatures(int n)
           
static void addASBCunkfeatures(int n)
           
static void addConjunctions()
          this adds the conjunctions of various features in the rare words features
static void addCTBprefeatures(int n)
           
static void addCTBsuffeatures(int n)
           
static void addCTBunkDictfeatures(int n)
           
static void addFeatures()
          expand the extractors list
static void addLCTagFeatures()
           
static void addSighanChineseFeatures()
           
static void clearEmpty()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

cWordSuff1

public static Extractor cWordSuff1
Last 1-4 characters of word


cWordSuff2

public static Extractor cWordSuff2

cWordSuff3

public static Extractor cWordSuff3

cWordSuff4

public static Extractor cWordSuff4

cWordPref1

public static Extractor cWordPref1
First 1-4 characters of word


cWordPref2

public static Extractor cWordPref2

cWordPref3

public static Extractor cWordPref3

cWordPref4

public static Extractor cWordPref4

cWordUppCase

public static Extractor cWordUppCase
"1" iff word contains 1 or more upper case characters (somewhere)


cWordNumber

public static Extractor cWordNumber
"1" iff word contains 1 or more digit characters (somewhere)


cWordDash

public static Extractor cWordDash
"1" iff word contains 1 or more dash characters (somewhere)


cWordStartUCase

public static Extractor cWordStartUCase
"0" if not 1st word of sentence or not upper case, or lowercased version not in dictionary. Else first tag of word lowercased.


cWordMidUCase

public static Extractor cWordMidUCase
"0" if first word of sentence or not first letter uppercase or if lowercase version isn't in dictionary. Otherwise first tag of lowercase equivalent.


cUCaseLCase

public static Extractor cUCaseLCase
"0" if first letter isn't uppercase or if lowercase version isn't in dictionary. Otherwise first tag of lowercase equivalent.


cMidSentence

public static Extractor cMidSentence
"1" if not first word of sentence and _some_ letter is uppercase


cAllCap

public static Extractor cAllCap
"1" if token has no lower case letters


cAllCapitalized

public static Extractor cAllCapitalized
"1" if token has only upper case letters


cCompany

public static Extractor cCompany
"1" if capitalized and one of following 3 words is Inc., Co., or Corp.


cPluralAcronym

public static Extractor cPluralAcronym
"1" if a plural acronym: capital letters followed by 's'


cLetterDigitDash

public static Extractor cLetterDigitDash
"1" if word contains letter, digit, and dash, in any position and case


cUpperDigitDash

public static Extractor cUpperDigitDash
"1" if word contains uppercase letter, digit, and dash


cCapDist

public static Extractor cCapDist
Distance to lowercase word


eFrames

public static Extractor[] eFrames

size

public static int size

countFeats

public static int[] countFeats

nums

public static int[] nums

zeroSt

public static java.lang.String zeroSt
Constructor Detail

ExtractorFramesRare

public ExtractorFramesRare()
Method Detail

addFeatures

public static void addFeatures()
expand the extractors list


addConjunctions

public static void addConjunctions()
this adds the conjunctions of various features in the rare words features


addLCTagFeatures

public static void addLCTagFeatures()

clearEmpty

public static void clearEmpty()

addSighanChineseFeatures

public static void addSighanChineseFeatures()

addCTBprefeatures

public static void addCTBprefeatures(int n)

addCTBsuffeatures

public static void addCTBsuffeatures(int n)

addASBCprefeatures

public static void addASBCprefeatures(int n)

addASBCsuffeatures

public static void addASBCsuffeatures(int n)

addASBCunkfeatures

public static void addASBCunkfeatures(int n)

addCTBunkDictfeatures

public static void addCTBunkDictfeatures(int n)