trainFile=20news-bydate-devtrain-stanford-classifier.txt testFile=20news-bydate-devtest-stanford-classifier.txt 2.useLowercaseSplitWords=true 2.useSplitWords=true 2.splitWordsTokenizerRegexp=M[ -`]{60} |[\\p{L}][\\p{L}0-9]*|(?:\\$ ?)?[0-9]+(?:\\.[0-9]{2})?%?|\\s+|[\\x80-\\uFFFD]|. 2.splitWordsIgnoreRegexp=M[ -`]{60} |\\s+ 2.useNGrams=true 2.maxNGramLeng=4 2.minNGramLeng=1 2.splitWordShape=chris4