trainFile=20news-bydate-devtrain-stanford-classifier.txt testFile=20news-bydate-devtest-stanford-classifier.txt 2.useSplitWords=true 2.splitWordsTokenizerRegexp=[\\p{L}][\\p{L}0-9]*|(?:\\$ ?)?[0-9]+(?:\\.[0-9]{2})?%?|\\s+|[\\x80-\\uFFFD]|. 2.splitWordsIgnoreRegexp=\\s+