edu.stanford.nlp.tagger.maxent
Class TestSentence

java.lang.Object
  extended by edu.stanford.nlp.tagger.maxent.TestSentence
All Implemented Interfaces:
SequenceModel

public class TestSentence
extends java.lang.Object
implements SequenceModel


Field Summary
static int[] hPos
           
static boolean[] isTag
           
 int mikeKnown
           
 int numRight
           
 int numWrong
           
static LambdaSolve prob
           
 
Constructor Summary
TestSentence()
           
TestSentence(LambdaSolve prob, java.lang.String s)
           
TestSentence(LambdaSolve prob, java.lang.String[] s, java.lang.String[] correctTags, PrintFile pf, Dictionary wrongWords)
           
TestSentence(LambdaSolve prob, java.lang.String[] s, java.lang.String[] tags, java.lang.String[] correctTags, PrintFile pf, Dictionary wrongWords)
           
TestSentence(LambdaSolve prob, java.lang.String s, PrintFile pf)
           
 
Method Summary
 void addUnknown(Dictionary uDict)
          This method should be called after the sentence has been tagged.
 java.lang.String[] append(java.lang.String[] tags, java.lang.String word)
           
 java.lang.String[] appendOld(java.lang.String[] tags, java.lang.String word)
          Deterministically adds other possible tags for words given observed tags.
 void dumpActivations(java.lang.String s)
          tokenize s into words, and dump unknown word activations
 double[] getHistories(History h)
           
 int[] getPossibleValues(int pos)
          0...leftWindow-1 etc are null, leftWindow...length+leftWindow-1 are words, length+leftWindow...length+leftWindow+rightWindow-1 are null;
 double[][][] getProbs()
           
 double[] getScores(History h)
          this scores the current assignment in PairsHolder current position h.current
 java.lang.String getTaggedNice()
           
 void init()
           
 void init1()
           
static int[] intersect(int[] arr1, int[] arr2)
           
 boolean known(java.lang.String w)
           
 int leftWindow()
          How many positions to the left a position is dependent on.
 int length()
          Implementation of the TagScorer interface follows
static void main(java.lang.String[] args)
          Tags a test sentence.
 void printActivations(History h)
          print out the unknown word feature values of the features in ExtractorFramesRare
 void printProbs()
           
 void printTop(PrintFile pfu)
          This method should be called after the sentence has been tagged.
 void printUnknown(int numSent, PrintFile pfu)
          This method should be called after the sentence has been tagged.
 boolean reliable(int current)
           
 void revert(int prevSize, int afterSize)
           
 int rightWindow()
          How many positions to the right a position is dependent on.
 double scoreOf(int[] sequence)
          Computes the score assigned by this model to the whole sequence.
 double scoreOf(int[] tags, int pos)
          Computes the unnormalized log conditional distribution over values of the element at position pos in the sequence, conditioned on the values of the elements in all other positions of the provided sequence.
 double[] scoresOf(int[] tags, int pos)
          Computes the unnormalized log conditional distribution over values of the element at position pos in the sequence, conditioned on the values of the elements in all other positions of the provided sequence.
 java.lang.String[] stringTagsAt(int pos)
           
 Sentence tagSentence(LambdaSolve prob, Sentence s)
           
static void tagSentenceTagScorer(java.lang.String[] sent)
          tag a sentence using Dan's TagScorer interface
 java.lang.String[] test(java.lang.String outFile)
           
 void test1(PrintFile pf, Dictionary wrongWords)
           
 void testTagInference(PrintFile pf, Dictionary wrongWords)
          test using TagInference
static java.lang.String toNice(java.lang.String s)
           
static java.lang.String toSt(java.lang.String s)
           
static int[] unite(int[] arr1, int[] arr2)
           
 void writeProbs()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

prob

public static LambdaSolve prob

hPos

public static int[] hPos

isTag

public static boolean[] isTag

numRight

public int numRight

numWrong

public int numWrong

mikeKnown

public int mikeKnown
Constructor Detail

TestSentence

public TestSentence()

TestSentence

public TestSentence(LambdaSolve prob,
                    java.lang.String s,
                    PrintFile pf)

TestSentence

public TestSentence(LambdaSolve prob,
                    java.lang.String s)

TestSentence

public TestSentence(LambdaSolve prob,
                    java.lang.String[] s,
                    java.lang.String[] correctTags,
                    PrintFile pf,
                    Dictionary wrongWords)

TestSentence

public TestSentence(LambdaSolve prob,
                    java.lang.String[] s,
                    java.lang.String[] tags,
                    java.lang.String[] correctTags,
                    PrintFile pf,
                    Dictionary wrongWords)
Method Detail

tagSentence

public Sentence tagSentence(LambdaSolve prob,
                            Sentence s)

revert

public void revert(int prevSize,
                   int afterSize)

init

public void init()

getTaggedNice

public java.lang.String getTaggedNice()

toNice

public static java.lang.String toNice(java.lang.String s)

tagSentenceTagScorer

public static void tagSentenceTagScorer(java.lang.String[] sent)
tag a sentence using Dan's TagScorer interface


init1

public void init1()

writeProbs

public void writeProbs()

dumpActivations

public void dumpActivations(java.lang.String s)
tokenize s into words, and dump unknown word activations


toSt

public static java.lang.String toSt(java.lang.String s)

test

public java.lang.String[] test(java.lang.String outFile)

test1

public void test1(PrintFile pf,
                  Dictionary wrongWords)

testTagInference

public void testTagInference(PrintFile pf,
                             Dictionary wrongWords)
test using TagInference


known

public boolean known(java.lang.String w)

reliable

public boolean reliable(int current)

append

public java.lang.String[] append(java.lang.String[] tags,
                                 java.lang.String word)

appendOld

public java.lang.String[] appendOld(java.lang.String[] tags,
                                    java.lang.String word)
Deterministically adds other possible tags for words given observed tags.


getScores

public double[] getScores(History h)
this scores the current assignment in PairsHolder current position h.current


printActivations

public void printActivations(History h)
print out the unknown word feature values of the features in ExtractorFramesRare


getHistories

public double[] getHistories(History h)

intersect

public static int[] intersect(int[] arr1,
                              int[] arr2)

unite

public static int[] unite(int[] arr1,
                          int[] arr2)

getProbs

public double[][][] getProbs()

printProbs

public void printProbs()

addUnknown

public void addUnknown(Dictionary uDict)
This method should be called after the sentence has been tagged. For every unknown word, this method adds the 3 most probable tags to the dictionary uDict


printUnknown

public void printUnknown(int numSent,
                         PrintFile pfu)
This method should be called after the sentence has been tagged. For every unknown word, this method prints the 3 most probable tags to the file pfu


printTop

public void printTop(PrintFile pfu)
This method should be called after the sentence has been tagged. For every word token, this method prints the 3 most probable tags to the file pfu except for


length

public int length()
Implementation of the TagScorer interface follows

Specified by:
length in interface SequenceModel
Returns:
the length of the sequences modeled by this SequenceModel

leftWindow

public int leftWindow()
Description copied from interface: SequenceModel
How many positions to the left a position is dependent on.

Specified by:
leftWindow in interface SequenceModel
Returns:
the size of the left window used by this sequence model

rightWindow

public int rightWindow()
Description copied from interface: SequenceModel
How many positions to the right a position is dependent on.

Specified by:
rightWindow in interface SequenceModel
Returns:
the size of the right window used by this sequence model

getPossibleValues

public int[] getPossibleValues(int pos)
Description copied from interface: SequenceModel
0...leftWindow-1 etc are null, leftWindow...length+leftWindow-1 are words, length+leftWindow...length+leftWindow+rightWindow-1 are null;

Specified by:
getPossibleValues in interface SequenceModel
Parameters:
pos - the position
Returns:
the set of possible int values at this position, as an int array

stringTagsAt

public java.lang.String[] stringTagsAt(int pos)

scoreOf

public double scoreOf(int[] tags,
                      int pos)
Description copied from interface: SequenceModel
Computes the unnormalized log conditional distribution over values of the element at position pos in the sequence, conditioned on the values of the elements in all other positions of the provided sequence.

Specified by:
scoreOf in interface SequenceModel
Parameters:
tags - the sequence containing the rest of the values to condition on
pos - the position of the element to give a distribution for
Returns:
the log score of the token at the specified position in the sequence

scoreOf

public double scoreOf(int[] sequence)
Description copied from interface: SequenceModel
Computes the score assigned by this model to the whole sequence. Typically this will be an unnormalized probability in log space (since the probabilities are small).

Specified by:
scoreOf in interface SequenceModel
Parameters:
sequence - the sequence to compute a score for
Returns:
the score for the sequence

scoresOf

public double[] scoresOf(int[] tags,
                         int pos)
Description copied from interface: SequenceModel
Computes the unnormalized log conditional distribution over values of the element at position pos in the sequence, conditioned on the values of the elements in all other positions of the provided sequence.

Specified by:
scoresOf in interface SequenceModel
Parameters:
tags - the sequence containing the rest of the values to condition on
pos - the position of the element to give a distribution for
Returns:
the scores of the possible tokens at the specified position in the sequence

main

public static void main(java.lang.String[] args)
Tags a test sentence.

Parameters:
args - A single argument giving the filename of the parameter files. This should be a complete filename for holder file, for example, ~/mine/wsj0-20.holder, but the program will also use other files by adding a second extension onto this filename. If none is provided, a default tagger in /u/nlp/data is used.