trainFile=20news-bydate-devtrain-stanford-classifier.txt testFile=20news-bydate-devtest-stanford-classifier.txt 2.useSplitWords=true # The first option is for a (full) uuencoded line, which is then ignored 2.splitWordsTokenizerRegexp=M[ -`]{60} |[\\p{L}][\\p{L}0-9]*|(?:\\$ ?)?[0-9]+(?:\\.[0-9]{2})?%?|\\s+|[\\x80-\\uFFFD]|. 2.splitWordsIgnoreRegexp=M[ -`]{60} |\\s+