#location of the training file trainFile = jane-austen-emma-ch1.tsv #location where you would like to save (serialize to) your #classifier; adding .gz at the end automatically gzips the file, #making it faster and smaller serializeTo = ner-model.gaz.ser.gz #structure of your training file; this tells the classifier #that the word is in column 0 and the correct answer is in #column 1 map = word=0,answer=1 #these are the features we'd like to train with #some are discussed below, the rest can be #understood by looking at NERFeatureFactory useClassFeature=true useWord=true useNGrams=true #no ngrams will be included that do not contain either the #beginning or end of the word noMidNGrams=true useDisjunctive=true maxNGramLeng=6 usePrev=true useNext=true useSequences=true usePrevSequences=true maxLeft=1 #the next 4 deal with word shape features useTypeSeqs=true useTypeSeqs2=true useTypeySequences=true wordShape=chris2useLC cleanGazette=true gazette=austen.gaz.txt