Chart Parser

by Jim Chapman
Home| Where we live | Jim| Isabel| James| Edward]

//
// Chart.cpp
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This file defines the behaviour of the core classes of the Chart Parser program.
// Refer to "Chart.h" for an overview of the program, and of its constituent classes.
// 

#include "Chart.h"

#include <fstream.h>
#include <afxwin.h>

//
// Grammar ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Constructs a Grammar object by reading it from the file whose name is passed as
// an argument.  If the file cannot be opened or read a message box is displayed, and
// the program halts.
Grammar::Grammar(const CString & fileName)
{
    try {
        CStdioFile grammarFile(fileName, CFile::modeRead);

        CString ruleLine;

        while(grammarFile.ReadString(ruleLine))
        {
            // skip blank lines and  comments
            if(ruleLine.IsEmpty())
                continue;
            else if(ruleLine[0]==';')
                continue;

            Rule *rule=new Rule(ruleLine,*this);
        }
    }
    catch (...) {
        AfxMessageBox(CString("There was a problem reading the grammar file, ")
                            + fileName);
        exit(1);
    }
    return;
};

//
// Grammar::m_addRule
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method adds a rule to the grammar.  The method's parameter is a pointer to a
// rule.  Do not delete the pointer after adding it to the grammar; the Grammar object
// deletes all its rules in its dtor.
void Grammar::m_addRule(Rule *rule)
{
    if(rule->m_getSubcats().IsEmpty())
        m_emptyRules.AddHead(rule);
    else
    {
        POSITION pos=rule->m_getSubcats().GetHeadPosition();

        int cat=rule->m_getSubcats().GetNext(pos);
        m_rulesByFirstCat[cat]->AddHead(rule);
    };
}

//
// Grammar::m_lookUpCatNumber
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Given a category name ("NP", for example), this method returns its category number
// (i.e. its index in this->m_catNames).  If the category has not been seen before,
// the method creates a new array entry for it.
int Grammar::m_lookUpCatNumber(const CString &catName)
{
    POSITION pos = m_catNames.GetHeadPosition();

    int count=0;
    while( pos != NULL )
    {
        CatDescription *d=m_catNames.GetNext(pos);
        if(d->m_name==catName)
            return d->m_number;
        count ++;
    }

    // If we got to this point, it means that the cat is a new one, and must be
    // added to the grammar.  This needs doing in two places:
    // 1) Its name must be added to m_catNames
    // 2) We should create a new, empty rule-list for this category in m_rulesByFirstCat.
    m_catNames.AddTail(new CatDescription(count,catName));
    m_rulesByFirstCat.SetAtGrow(count,new RuleList);

    return count;
};

//
// Grammar::m_lookUpCatName
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Given a category's number, this method returns its name.  The method does not check
// the validity of the number (though the CArray does, if built for _DEBUG).
CString Grammar::m_lookUpCatName(int cat) const
{
    return m_catNames.GetAt(m_catNames.FindIndex(cat))->m_name;
}

//
// Grammar::m_getEmptyRules
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Returns the list of empty rules for the grammar (i.e. all those rules that have
// no categories on their right hand side.
const RuleList & Grammar::m_getEmptyRules() const
{
    return m_emptyRules;
};

//
// Grammar::m_getRulesStartingWith
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method gives you all the rules in the grammar that start with a particular
// category (i.e. such category is the first category on the rule's right hand side).
const RuleList & Grammar::m_getRulesStartingWith(int cat) const
{
    return *m_rulesByFirstCat[cat];
};

//
// Grammar dtor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Destroys the grammar and all its rules and categories.
Grammar::~Grammar()
{
    // Delete the array of category descriptions.
    POSITION pos=m_catNames.GetHeadPosition();
    while(pos!=NULL)
        delete m_catNames.GetNext(pos);

    // Delete the empty rules.
    pos=m_emptyRules.GetHeadPosition();
    while(pos!=NULL)
        delete m_emptyRules.GetNext(pos);

    // For each category, delete all the rules that start with that category. 
    int numCats=m_rulesByFirstCat.GetSize();
    for(int i=0; i<numCats; i++)
    {
        RuleList *rules=m_rulesByFirstCat[i];
        pos=rules->GetHeadPosition();
        while(pos!=NULL)
            delete rules->GetNext(pos);
        delete rules;
    }
};

//
// Rule ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The Rule constructor reads in the rule and inserts it into the 'inversion'
// data structures in the grammar.
// NB: A pointer to the rule will be stored in the grammar and will be deleted
//     when the grammar is destroyed.  Therefore (1) you must only create rules
//     on the heap (2) you must not delete them - leave it up to the grammar to
//     do this.
Rule::Rule(CString ruleLine, Grammar &g)
{
    ruleLine.TrimLeft(); // Remove leading white-space from rule text.

    CString catName=ruleLine.SpanExcluding(" \t"); // Read up to delimiter of cat.
    if(catName.IsEmpty())
    {
        AfxMessageBox("The grammar is badly formatted");
        exit(1);
    }
    m_cat=g.m_lookUpCatNumber(catName); // Get a cat number for the cat.
    ruleLine=ruleLine.Mid(catName.GetLength());
    ruleLine.TrimLeft();

    // Read in the cats on the right hand side of the rule and add them to
    // the m_subcats list.
    CString nextCatName;
    while(!(nextCatName=ruleLine.SpanExcluding(" \t")).IsEmpty())
    {
        int nextCatNumber=g.m_lookUpCatNumber(nextCatName);
        m_subcats.AddTail(nextCatNumber);
        ruleLine=ruleLine.Mid(nextCatName.GetLength());
        ruleLine.TrimLeft();
    }

    // Add self to the grammar.
    g.m_addRule(this);
};

//
// Lexicon ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Constructs a Lexicon object by reading it from the file whose name is passed as
// an argument.  If the file cannot be opened or read a message box is displayed, and
// the program halts.
// The method must be passed a grammar - it will use this grammar to look up category
// numbers for the categories it finds in the grammar (and to add any necessary new
// categories to the grammar).
Lexicon::Lexicon(Grammar &g, const CString &fileName)
{
    try {
        CStdioFile lexiconFile(fileName, CFile::modeRead);

        CString line;

        while(lexiconFile.ReadString(line))
        {
            line.TrimLeft(); // Remove leading white-space from lexicon entry.

            // skip blank lines and  comments
            if(line.IsEmpty())
                continue;
            else if(line[0]==';')
                continue;

            CString wordName=line.SpanExcluding(" \t"); // Read up to delimiter of word.
            if(wordName.IsEmpty())
            {
                AfxMessageBox("The lexicon is badly formatted");
                exit(1);
            }

            // If we have no cats listed for this word in the lexicon, create an
            // empty list to hold entries for the word.  Otherwise, we just modify
            // the existing list of cats for the word.
            CList<int,int> *entriesForWord;
            if(!m_entries.Lookup(wordName,(CObject *&)entriesForWord))
                entriesForWord=new CList<int,int>;

            line=line.Mid(wordName.GetLength());
            line.TrimLeft();

            // For every cat listed on this line of the lexicon file,
            // add that cat's number onto the list of cats for the word.
            CString nextCatName;
            while(!(nextCatName=line.SpanExcluding(" \t")).IsEmpty())
            {
                int nextCatNumber=g.m_lookUpCatNumber(nextCatName);
                entriesForWord->AddTail(nextCatNumber);
                line=line.Mid(nextCatName.GetLength());
                line.TrimLeft();
            }

            // Put the new (or changed) cat list into the lexicon's map member.
            m_entries.SetAt(wordName,entriesForWord);
        }
    }
    catch (...) {
        AfxMessageBox(CString("There was a problem reading the lexicon file, ")
                              +fileName);
        exit(1);
    }
    return;
};

//
// Lexicon dtor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The Lexicon destructor deletes all the lists of category-numbers in the map.
Lexicon::~Lexicon()
{
    CString dummy;
    POSITION pos=m_entries.GetStartPosition();
    while(pos!=NULL)
    {
        CList<int, int> *lexEntries;

        // Set lexEntries to the next pointer in the map.
        m_entries.GetNextAssoc(pos,dummy,(CObject*&)lexEntries);
        delete lexEntries; // Delete the list of cat-numbers.
    };

};
//
// Lexicon::m_getCatsForWord
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method returns a const pointer to the list of categories for some word.
// It is an error if the word is not in the lexicon (i.e. it has no categories); such
// an error is signalled by the method returning 0.
// Do not modify or delete the pointer that this method returns - it is the
// actual list object from the Lexicon, and remains the property of the lexicon
// object.
const CList<int,int> * Lexicon::m_getCatsForWord(const CString &word) const
{
    CList<int,int> *cats=0;

    if(!m_entries.Lookup( word,(CObject*&) cats ))
        AfxMessageBox(CString("Word is not in lexicon: ")+word);
    return cats;
};

//
// NonTerminalEdge ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This constructor takes an incomplete edge and a complete edge, and constructs a new
// edge by extending the incomplete edge over the complete edge.  Do not call this
// method except with a suitable combination of edges.
NonTerminalEdge::NonTerminalEdge(NonTerminalEdge *incompleteEdge, Edge *completeEdge)
    : m_rule(incompleteEdge->m_rule),
      m_leftVertex(incompleteEdge->m_leftVertex),
      m_rightVertex(completeEdge->m_getRightVertex()),
      m_depth(-1)
{
    assert(incompleteEdge->m_rightVertex==completeEdge->m_getLeftVertex());

    // Advance the new edge's position over one category of its rule, checking that
    // the rule-category advanced over does match the category of the complete edge.
    m_positionInRule=incompleteEdge->m_positionInRule;
    VERIFY(m_rule->m_getSubcats().GetNext(m_positionInRule)==completeEdge->m_getCat());

    // Build up the new edge's list of subedges, from the incomplete edge's subedges
    // plus the complete edge.
    POSITION pos=incompleteEdge->m_subedges.GetHeadPosition();
    while(pos!=NULL)
        m_subedges.AddTail(incompleteEdge->m_subedges.GetNext(pos));
    m_subedges.AddTail(completeEdge);
};

//
// NonTerminalEdge ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This constructor builds a new non-terminal edge out of a rule, by suggesting that
// this rule might generate a category sequence starting at the given vertex.
NonTerminalEdge::NonTerminalEdge(Rule *r, int vertex)
    : m_leftVertex(vertex),
      m_rightVertex(vertex),
      m_depth(-1)
{
    m_rule=r;
    m_positionInRule = m_rule->m_getSubcats().GetHeadPosition();
};

//
// NonTerminalEdge::m_isComplete
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// An edge is complete if it isn't looking for any more categories - i.e. it has
// found enough categories to substantiate the hypothesis that m_rule does generate
// the lexical sequence running from m_leftVertex to m_rightVertex.
BOOL NonTerminalEdge::m_isComplete() const
{
    return m_positionInRule==NULL;
};

//
// NonTerminalEdge::m_getCat
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The category of an edge is equal to the category named on the left hand side of
// its rule.
int NonTerminalEdge::m_getCat() const
{
    return m_rule->m_getCat();
};

//
// NonTerminalEdge::m_getNeededCat
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method tells you what category an incomplete edge is looking for.  Do not call it
// on a complete edge.
int NonTerminalEdge::m_getNeededCat() const
{
    POSITION p=m_positionInRule;
    assert(p!=NULL); // Do not call m_getNeededCat on a complete edge!
    return m_rule->m_getSubcats().GetNext(p);
};

//
// NonTerminalEdge::m_getDepth
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method determines (recursively) how far above the 'lexical layer' of the tree
// this edge lies - i.e. the length of the longest path that leads from this edge
// down to a lexical edge.
// The method caches its result in the m_depth member of the edge, so that if it is
// called again, it won't need to recurse in order to find the answer.
int NonTerminalEdge::m_getDepth()
{
    if(m_depth!=-1)
        return m_depth;
    else
    {
        int maxChildDepth=0;
        POSITION pos=m_subedges.GetHeadPosition();
        while(pos!=NULL)
        {
            int d=m_subedges.GetNext(pos)->m_getDepth();
            if(d>maxChildDepth)
                maxChildDepth=d;
        }
        m_depth=maxChildDepth+1;
        return m_depth;
    }
}

//
// gpFont
//
// This is the font for graphical display of the parse tree, defined and initialised in
// ChartApp.cpp.  It is declared here because Edge::m_display needs to see it in order
// to position graphical items on the screen.
extern CFont *gpFont;

//
// NonTerminalEdge::m_display
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method draws the edge and all its subedges (except empty ones) on a device
// context.  It returns a CPoint to say where the middle of the top of the edge is
// positioned, and adjusts boundingRect to include the edge that it drew.
// Use this method only on complete edges - it is intended only for displaying the
// final result(s) of the parse.
// The reason for returning the CPoint is that we want the parent edge, if any, to
// draw a line to this edge.
// The reason for adjusting boundingRect is that we want to know the total area taken up
// by drawing the edge and its subedges, so we can tell whether some of it extended
// outside the area available to draw it - in which case we will want to make scroll bars
// available.
// We pass parentDepth to the method so that we know how far down the screen to draw
// non-terminal child eges.
// We pass sentenceDepth to it so that we know how far down the screen to draw
// the words for lexical edges.
// We pass Sentence so that we can look up the names of the words in lexical edges.
// We pass Grammar so that we can look up category names.
CPoint NonTerminalEdge::m_display(CDC *pDC, int sentenceDepth, int parentDepth,
                                  Sentence &s, Grammar &g, CRect &boundingRect)
{
    if(m_rule->m_getSubcats().IsEmpty())
        return CPoint(0,0); // Don't draw empty edges, and pass back (0,0) to tell
                                // the calling function that it's an empty edge.

    // Look up size, etc. for our display font
    LOGFONT fontData;
    VERIFY(gpFont->GetLogFont(&fontData));

    // Figure out where to draw the edge.
    int left=s.m_vertexToCharNum(m_getLeftVertex());
    int right=s.m_vertexToCharNum(m_getRightVertex());
    CPoint location((left+right)/2,parentDepth);
    location.x*=fontData.lfWidth;
    location.y*=2*fontData.lfHeight;

    // Look up the category name.
    CString catName=g.m_lookUpCatName(m_getCat());
    int offset=(catName.GetLength()*fontData.lfWidth)/2;

    // Draw the category name on the device context.
    VERIFY(pDC->TextOut(location.x-offset,location.y, catName));

    // Expand boundingRect to include the area covered by this output text.
    CRect textRect(location.x-offset,location.y,location.x-offset,location.y);
    VERIFY(pDC->DrawText(catName, textRect, DT_LEFT | DT_CALCRECT));
    boundingRect.UnionRect(boundingRect,textRect);

    // Draw all the subedges.
    POSITION pos=m_subedges.GetHeadPosition();
    while(pos!=NULL)
    {
        Edge *e=m_subedges.GetNext(pos);
        assert(e->m_isComplete()); // Incomplete edges should not appear in the
                                   // parse tree for the final result.
        CPoint childPos=e->m_display(pDC,sentenceDepth,parentDepth+1,s,g,boundingRect);

        if(childPos!=CPoint(0,0)) // magic value to say "empty edge: draw nothing"
        {
            pDC->MoveTo(location+CPoint(0,fontData.lfHeight));
            pDC->LineTo(childPos);
        }
    }

    return location;
};

//
// LexicalEdge::m_display
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method draws a lexical edge and its associated word on the device context
// passed as its first argument.  It returns a CPoint to say where the middle of
// the top of the edge is positioned, and adjusts boundingRect to include the
// edge that it drew.
// Refer to the comments for CNonTerminalEdge::m_display for an explanation
// of the other parameters.
CPoint LexicalEdge::m_display(CDC *pDC, int sentenceDepth, int parentDepth, Sentence &s,
                              Grammar &g, CRect &boundingRect)
{
    // Calculate the position of the lexical edge on the screen 
    LOGFONT fontData;
    VERIFY(gpFont->GetLogFont(&fontData));
    int left=s.m_vertexToCharNum(m_getLeftVertex());
    int right=s.m_vertexToCharNum(m_getRightVertex());
    CPoint location((left+right)/2,parentDepth);
    location.x*=fontData.lfWidth;
    location.y*=2*fontData.lfHeight;

    // Draw the name of the lexical category in the appropriate position on the
    // screen.
    CString catName=g.m_lookUpCatName(m_getCat());
    int offset=(catName.GetLength()*fontData.lfWidth)/2;
    VERIFY(pDC->TextOut(location.x-offset,location.y, catName));

    // Expand boundingRect to include the area covered by this output text.
    CRect textRect(location.x-offset,location.y,location.x-offset,location.y);
    VERIFY(pDC->DrawText(catName, textRect, DT_LEFT | DT_CALCRECT));
    boundingRect.UnionRect(boundingRect,textRect);

    // Calculate the position of the word text on the screen.
    CPoint location2=CPoint((left+right)/2,sentenceDepth);
    location2.x*=fontData.lfWidth;
    location2.y*=2*fontData.lfHeight;

    // Draw a line from the lexical edge to the word text.
    pDC->MoveTo(location+CPoint(0,fontData.lfHeight));
    pDC->LineTo(location2+CPoint(0,fontData.lfHeight));

    // Look up the text of the word and display it on the screen.
    CString word=s.m_getWord(m_wordNumber);
    offset=(word.GetLength()*fontData.lfWidth)/2;
    VERIFY(pDC->TextOut(location2.x-offset,location2.y+fontData.lfHeight, word));

    // Expand boundingRect to include the area covered by this output text.
    textRect=CRect(location2.x-offset,location2.y+fontData.lfHeight,
                   location2.x-offset,location2.y+fontData.lfHeight);
    VERIFY(pDC->DrawText(word, textRect, DT_LEFT | DT_CALCRECT));
    boundingRect.UnionRect(boundingRect,textRect);

    // Return the position of the lexical edge.
    return location;
};

//
// Chart ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The chart ctor has to be told how many vertices there are (one plus the number
// of words in the sentence) and how many categories there are.
// The constructor builds the internal data structures of the chart - these are
// two arrays[vertex][category], each element of which is a list of Edges.
// One of the arrays lists all the complete edges for each category starting at
// each vertex.  The other array lists all the incomplete edges needing each
// category at each vertex.
// At each element of the two arrays, it creates an empty edge list, which will
// subsequently be added to by Chart::m_addEdge().
Chart::Chart(int maxVertex, int maxCat)
{
    m_completeEdges.SetSize(maxVertex+1);
    m_incompleteEdges.SetSize(maxVertex+1);
    for(int v=0; v<=maxVertex; v++)
    {
        m_completeEdges[v].SetSize(maxCat+1);
        m_incompleteEdges[v].SetSize(maxCat+1);
        for(int c=0; c<=maxCat; c++)
        {
            m_completeEdges[v][c]=new EdgeList;
            m_incompleteEdges[v][c]=new EdgeList;
        };
    }
};

//
// Chart dtor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The chart destructor deletes all the edges in the chart, and deletes the
// lists that were created to hold them.
Chart::~Chart()
{
    int maxVertex=m_completeEdges.GetSize()-1;
    int maxCat=m_completeEdges[0].GetSize()-1;
    for(int v=0; v<=maxVertex; v++)
    {
        for(int c=0; c<=maxCat; c++)
        {
            // Iterate down the list of complete edges for category c
            // at vertex v, deleting every edge.
            EdgeList * completeEdges=m_completeEdges[v][c];
            POSITION pos=completeEdges->GetHeadPosition();
            while(pos!=NULL)
                delete completeEdges->GetNext(pos);
            // Delete the list object.
            delete completeEdges;

            // Iterate down the list of incomplete edges for category c
            // at vertex v, deleting every edge.
            EdgeList * incompleteEdges=m_incompleteEdges[v][c];
            pos=incompleteEdges->GetHeadPosition();
            while(pos!=NULL)
                delete incompleteEdges->GetNext(pos);
            // Delete the list object.
            delete incompleteEdges;
        }
    }
}

//
// Chart::m_completeEdgesForCatStartingAt
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method accesses the chart to find the list of all complete edges that start
// at a given vertex and represent a particular category.
// The chart data structure stores these edges in a list, a pointer to which
// is kept in a two-dimensional array indexed by vertex and category - so this method
// need only return the relevant element of that array.
EdgeList * Chart::m_completeEdgesForCatStartingAt(int cat, int vertex)
{
    return m_completeEdges[vertex][cat];
};

//
// Chart::m_incompleteEdgesNeedingCatAt
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method accesses the chart to find the list of all incomplete edges that end
// at a given vertex and need a particular category for their next symbol.
// The chart data structure stores these edges in a list, a pointer to which
// is kept in a two-dimensional array indexed by vertex and category - so this method
// need only return the relevant element of that array.
EdgeList * Chart::m_incompleteEdgesNeedingCatAt(int cat, int vertex)
{
    return m_incompleteEdges[vertex][cat];
};

//
// Chart::m_addEdge
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method adds a new edge to the chart.  Once you have added the edge to the
// chart, do not modify it (the chart has indexed it, based on category, position 
// in rule, start/end vertices), and do not delete it (the chart will delete the
// edge when it is finished with it).
// An incomplete edge gets stored in m_incompleteEdges, indexed by right vertex
// and needed category.
// A complete edge gets stored in m_completeEdges, indexed by left vertex and
// category.
void Chart::m_addEdge(Edge *e)
{
    if(e->m_isComplete())
        m_completeEdges[e->m_getLeftVertex()][e->m_getCat()]->AddHead(e);
    else // e is incomplete (so it must be a nonterminal edge)
        m_incompleteEdges[e->m_getRightVertex()]
                         [((NonTerminalEdge *)e)->m_getNeededCat()]->AddHead(e);
};

//
// Sentence ctor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The sentence ctor builds a sentence object out of the string.  The string must
// not be empty.
// The sentence ctor sets up the array 'm_words'; m_words[i] holds the string text of the
// (i-1)th word.  It also sets up m_verticesArray[j].  m_verticesArray[j] holds the
//  character position of vertex #j - vertex #0 is the start of the sentence, and
// vertex #<number of words> is the end of the sentence.  m_verticesArray is used for
// working out how to lay out edges when they are displayed.
// The sentence ctor sets m_agenda and m_chart to zero; Sentence::m_parse creates these
// members as needed (this allows for the - theoretical - possibility of parsing a
// sentence several times using different grammars and or lexica).
Sentence::Sentence(CString text)
{
    assert(!text.IsEmpty()); // The user interface shouldn't let you create an empty
                             // sentence string

    int vertexNum=0;
    int charNum=0;
    CString nextWord;

    text.TrimLeft(); // Remove leading white-space from sentence text.

    // For each word in the sentence ...
    while(!(nextWord=text.SpanExcluding(" \t")).IsEmpty())
    {
        // Add the character position to m_verticesArray, expanding the array to include
        // the new element.
        // The word starts at character position charNum+2*vertexNum - i.e. we allow two
        // spaces between words.
        m_verticesArray.SetAtGrow(vertexNum,charNum+2*vertexNum);

        // Add nextWord to m_words, expanding the array to include it.
        m_words.SetAtGrow(vertexNum,nextWord);

        vertexNum++;
        charNum+=nextWord.GetLength();

        text=text.Mid(nextWord.GetLength());
        text.TrimLeft();
    }
    // Add a vertex to repesent the end of the sentence (i.e. the end of the last word
    // of the sentence.
    m_verticesArray.SetAtGrow(vertexNum,charNum+2*vertexNum);

    // Initially, the sentence contains no chart and no agenda.  Sentence::m_parse will
    // create them.
    m_agenda=0;
    m_chart=0;
};


//
// Sentence dtor
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// The sentence dtor  deletes the agenda and chart, if they have been created (the
// sentence ctor sets these members to zero, and Sentence::m_parse sets them to point
// to actual chart/agenda objects.
Sentence::~Sentence()
{
    if(m_agenda)
        delete m_agenda;
    if(m_chart)
        delete m_chart;
};

//
// Sentence::m_parse
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method parses the sentence, using the given grammar and lexicon.  It creates a
// chart and an agenda, and stores them as member variables of the sentence.
// The method returns all the parses of the sentence - i.e. all the edges of category S
// that start of the first vertex and end on the last.
// The return value of the m_parse method is a list of edges.  When you have finished
// with this list, you should delete it - but do not delete its elements (they are edges,
// which are contained inside Sentence::m_chart).  Equally, do not attempt to refer to
// the elements of the list after you have deleted the sentence, or after you have
// called Sentence::m_parse a second time.
//
// In pseudo-code, Sentence::m_parse is as follows:
//
//     for each word, w, in the sentence
//         for each category c, that is in the lexicon for w
//             add to the agenda a new lexical edge indicating the presence of
//             category c at w's position
//
//     for each vertex, v
//         for each grammar rule, r, that has {} on its right hand side
//             add to the agenda a new non-terminal edge that hypothesises the
//             presence of r's category at vertex v.
//
//     until(the agenda is empty)
//         e=next edge on the agenda
//         if e is complete
//             for each incomplete edge, e2, that needs e's category as its next
//             symbol, and is looking for it at e's left vertex
//                 add to the agenda a new non-terminal edge formed by extending e2
//                 over e.
//
//             if we have not already 'proposed' e's category at e's left vertex
//             then propose it, viz:
//                 for each rule, r, that has e's category as its first symbol on the
//                 right hand side
//                     add to the agenda a new edge non-terminal edge that hypothesises
//                     the presence of r's category starting at e's left vertex.
//
//         else (e is incomplete)
//             for each complete edge, e2, whose category equals e's first needed
//             category, and which starts at e's right vertex
//                 add to the agenda a new non-terminal edge formed by extending e
//                 over e2.
//
//         add e to the chart
//
// When the above pseudo-code terminates, the chart will contain all the edges that were
// found in the sentence.  Potential parses for the sentence will be complete edges that
// run from the sentence's first vertex to its last, and have category S (strictly,
// the sentence category is the head category of the first rule in the grammar - it
// doesn't have to be S).
EdgeList * Sentence::m_parse(Grammar &grammar, Lexicon &lexicon)
{
    int maxVertex=m_verticesArray.GetUpperBound();
    int maxCat=grammar.m_getNumCats()-1;

    // Create, and initialise to FALSE, an array that we'll use to tell us whether any
    // particular category has already been proposed at a particular vertex.
    CArray<CArray<BOOL,BOOL>,CArray<BOOL,BOOL>&> proposedCats;
    proposedCats.SetSize(maxVertex+1);
    for(int v=0; v<=maxVertex; v++)
    {
        proposedCats[v].SetSize(maxCat+1);
        for(int c=0; c<=maxCat; c++)
            proposedCats[v][c]=FALSE;
    }

    // If m_parse has been called before, it will have left a chart and agenda behind
    // it.  Delete them.
    if(m_chart)
        delete m_chart;
    if(m_agenda)
        delete m_agenda;

    // Create a chart of suitable size to hold entries for all the categories at all the
    // vertices, and create an empty agenda.
    m_chart=new Chart(maxVertex,maxCat);
    m_agenda=new Agenda;

    // For each word in the sentence, create lexical edges for every category that the
    // word can have.  Add these edges to the agenda.
    for(int w=0; w<m_words.GetSize(); w++)
    {
        const CList<int,int> * catsForWord=lexicon.m_getCatsForWord(m_words[w]);
        if(catsForWord) // Will be 0 if the word hasn't been seen before - in which case
                        // Lexicon::m_getCatsForWord will have displayed an error.
        {
            POSITION pos=catsForWord->GetHeadPosition();
            while(pos!=NULL)
            {
                int cat=catsForWord->GetNext(pos);
                m_agenda->m_addEdge(new LexicalEdge(cat,w));
            }
        }
    }

    // For each rule that can be satisfied by NULL, create complete edges for that rule
    // at every vertex.
    POSITION pos=grammar.m_getEmptyRules().GetHeadPosition();
    while(pos!=NULL)
    {
        Rule *r=grammar.m_getEmptyRules().GetNext(pos);
        for(v=0; v<=maxVertex; v++)
            m_agenda->m_addEdge(new NonTerminalEdge(r,v));
    }

    // Repeatedly remove an edge from the agenda and process it.
    Edge *e;
    while(e=m_agenda->m_nextEdge())
    {
        if(e->m_isComplete())
        {
            // Find all the incomplete edges that can extend over e, and create new
            // edges corresponding to such extension.
            int cat=e->m_getCat();
            int vertex=e->m_getLeftVertex();
            EdgeList *extendableEdges=
             m_chart->m_incompleteEdgesNeedingCatAt(cat, vertex);
            POSITION pos=extendableEdges->GetHeadPosition();
            while(pos!=NULL)
            {
                NonTerminalEdge *incompleteEdge
                 =(NonTerminalEdge *) extendableEdges->GetNext(pos); // Incomplete edges
                                                                     // must be 
                                                                     // non-terminal, so
                                                                                     // this is OK.
                m_agenda->m_addEdge(new NonTerminalEdge(incompleteEdge, e));
            }

            // If we haven't already 'proposed' the category of this complete edge at
            // this vertex, do so now.
            if(!proposedCats[vertex][cat])
            {
                proposedCats[vertex][cat]=TRUE;
                const RuleList &rules=grammar.m_getRulesStartingWith(cat);
                POSITION pos=rules.GetHeadPosition();
                while(pos!=NULL)
                {
                    Rule *r=rules.GetNext(pos);
                    m_agenda->m_addEdge(new NonTerminalEdge(r,vertex));
                }
            }
        }
        else // e is incomplete.
        {
            NonTerminalEdge *nte=(NonTerminalEdge *)e;// Incomplete edges must be
                                                      // non-terminal, so this is OK.

            // Find all the complete edges that e can extend over, and create new
            // edges corresponding to such extension.
            int cat=nte->m_getNeededCat();
            int vertex=nte->m_getRightVertex();
            EdgeList *usableEdges=
             m_chart->m_completeEdgesForCatStartingAt(cat, vertex);
            POSITION pos=usableEdges->GetHeadPosition();
            while(pos!=NULL)
            {
                Edge *completeEdge=usableEdges->GetNext(pos);
                m_agenda->m_addEdge(new NonTerminalEdge(nte,completeEdge));
            }
        }
        // Add e to the chart, so that other edges will know about it.
        m_chart->m_addEdge(e);
    }

    // When we reach this point, the agenda is empty, which means that we have done all
    // the parsing we can.  Build a list of all sentence edges that go from vertex 0 to
    // the vertex number maxVertex and return it.
    EdgeList * result=new EdgeList;

     // We assume that the sentence symbol is the LHS of the first rule in the grammar.
    EdgeList * sentenceEdges=m_chart->m_completeEdgesForCatStartingAt(0,0);
    pos=sentenceEdges->GetHeadPosition();
    while(pos!=NULL)
    {
        Edge *sentenceEdge=sentenceEdges->GetNext(pos);
        if(sentenceEdge->m_getRightVertex()==maxVertex)
            result->AddHead(sentenceEdge);
    }

    return result;
}

//
// Sentence::m_vertexToCharNum
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Given a vertex number, this method tells you what character position to display it at.
// The information is stored in an array by the sentence ctor.
int Sentence::m_vertexToCharNum(int v) const
{
    return m_verticesArray[v];
};

//
// Sentence::m_getWord
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// This method gives you a copy of the (i+1)th word of the text: i is a zero-based index,
// so s.m_getWord[0] would give you the first word of the sentence.
CString Sentence::m_getWord(int w) const
{
    return m_words[w];
};

#ifdef _DEBUG
//
// Rule::m_traceOut
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Prints a rule out to the debug output.  The method needs a Grammar parameter so it
// can look up category names.
void Rule::m_traceOut(Grammar &g)
{
    CString ruleTail;
    POSITION pos=m_subcats.GetHeadPosition();
    while(pos!=NULL)
    {
        int cat=m_subcats.GetNext(pos);
        ruleTail+=CString(" ");
        ruleTail+=g.m_lookUpCatName(cat);
    }
    if(ruleTail.IsEmpty())
        TRACE1("Rule %s <- {}\n",(const char *)g.m_lookUpCatName(m_cat));
    else
        TRACE2("Rule %s <- %s\n",(const char *)g.m_lookUpCatName(m_cat),
                                 (const char *)ruleTail);
};

//
// Grammar::m_traceOut
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Prints a grammar object to the debug output.
void Grammar::m_traceOut()
{
    TRACE0("Grammar:\n");
    TRACE0("Category Names:\n");
    POSITION pos=m_catNames.GetHeadPosition();
    while(pos!=NULL)
    {
        CatDescription *c=m_catNames.GetNext(pos);
        TRACE2(" %d : %s\n",c->m_number,(const char *)c->m_name);
    }
    TRACE0("Empty Rules\n");
    pos=m_emptyRules.GetHeadPosition();
    while(pos!=NULL)
    {
        Rule *r=m_emptyRules.GetNext(pos);
        r->m_traceOut(*this);
    }
    TRACE0("Rules by category:\n");
    for(int c=0; c<m_rulesByFirstCat.GetSize(); c++)
    {
        TRACE1("Rules starting with cat %s\n",(const char *)m_lookUpCatName(c));
        pos=m_rulesByFirstCat[c]->GetHeadPosition();
        while(pos!=NULL)
        {
            Rule *r=m_rulesByFirstCat[c]->GetNext(pos);
            r->m_traceOut(*this);
        }
    }
};

//
// Lexicon::m_traceOut
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Prints a lexicon object to the debug output.  It needs a Grammar parameter so it can
// look up category names.
void Lexicon::m_traceOut(Grammar &g)
{
    TRACE0("Lexicon:\n");
    CString word;
    POSITION pos=m_entries.GetStartPosition();
    while(pos!=NULL)
    {
        CList<int, int> *lexEntries;

        CString cats;
        m_entries.GetNextAssoc(pos,word,(CObject*&)lexEntries); // Set lexEntries to next
                                                                // pointer in map
        POSITION pos2=lexEntries->GetHeadPosition();
        while(pos2!=NULL)
        {
            int cat=lexEntries->GetNext(pos2);
            cats+=CString(" ");
            cats+=g.m_lookUpCatName(cat);
        };
        TRACE2(" word %s : %s\n",(const char *)word, (const char *)cats);
    };

};

//
// LexicalEdge::m_traceOut
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Prints a lexical edge to the debug output.  It needs a Grammar parameter so it can
// look up category names.
void LexicalEdge::m_traceOut(Grammar &g)
{
    TRACE3("LexicalEdge: <%s,%d,%d>\n",
    (const char *)g.m_lookUpCatName(m_cat),m_wordNumber,m_wordNumber+1);
};

//
// NonTerminalEdge::m_traceOut
//
// Who             When       What
// ---             ----       ----
// Jim Chapman     13 May 97  Released.
//
// Prints a non-terminal edge to the debug output.  It needs a Grammar parameter so it
// can look up category names.
void NonTerminalEdge::m_traceOut(Grammar &g)
{
    int cat;
    POSITION pos=m_positionInRule;
    if(pos==NULL)
    {
        cat=m_rule->m_getCat();
        TRACE3("Complete NonTerminalEdge: <%s,%d,%d,{}>\n",
            (const char *)g.m_lookUpCatName(cat),m_leftVertex,m_rightVertex);
    }
    else
    {
        CString cats;
        while(pos!=NULL)
        {
            cat=m_rule->m_getSubcats().GetNext(pos);
            cats+=CString(" ");
            cats+=g.m_lookUpCatName(cat);
        }
        cat=m_rule->m_getCat();
        CString traceMsg;
        traceMsg.Format("Incomplete NonTerminalEdge: <%s,%d,%d,{%s}>\n",
                        (const char *)g.m_lookUpCatName(cat),m_leftVertex,m_rightVertex,
                        (const char *) cats);
        TRACE0(traceMsg);
    }
};
#endif // #ifdef _DEBUG



home@jim-chapman.net