Chart Parser
by Jim Chapman
Home| Where we live | Jim| Isabel| James| Edward]
//
// Chart.cpp
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This file defines the behaviour of the core classes of the Chart Parser program.
// Refer to "Chart.h" for an overview of the program, and of its constituent classes.
//
#include "Chart.h"
#include <fstream.h>
#include <afxwin.h>
//
// Grammar ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Constructs a Grammar object by reading it from the file whose name is passed as
// an argument. If the file cannot be opened or read a message box is displayed, and
// the program halts.
Grammar::Grammar(const CString & fileName)
{
try {
CStdioFile grammarFile(fileName, CFile::modeRead);
CString ruleLine;
while(grammarFile.ReadString(ruleLine))
{
// skip blank lines and comments
if(ruleLine.IsEmpty())
continue;
else if(ruleLine[0]==';')
continue;
Rule *rule=new Rule(ruleLine,*this);
}
}
catch (...) {
AfxMessageBox(CString("There was a problem reading the grammar file, ")
+ fileName);
exit(1);
}
return;
};
//
// Grammar::m_addRule
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method adds a rule to the grammar. The method's parameter is a pointer to a
// rule. Do not delete the pointer after adding it to the grammar; the Grammar object
// deletes all its rules in its dtor.
void Grammar::m_addRule(Rule *rule)
{
if(rule->m_getSubcats().IsEmpty())
m_emptyRules.AddHead(rule);
else
{
POSITION pos=rule->m_getSubcats().GetHeadPosition();
int cat=rule->m_getSubcats().GetNext(pos);
m_rulesByFirstCat[cat]->AddHead(rule);
};
}
//
// Grammar::m_lookUpCatNumber
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Given a category name ("NP", for example), this method returns its category number
// (i.e. its index in this->m_catNames). If the category has not been seen before,
// the method creates a new array entry for it.
int Grammar::m_lookUpCatNumber(const CString &catName)
{
POSITION pos = m_catNames.GetHeadPosition();
int count=0;
while( pos != NULL )
{
CatDescription *d=m_catNames.GetNext(pos);
if(d->m_name==catName)
return d->m_number;
count ++;
}
// If we got to this point, it means that the cat is a new one, and must be
// added to the grammar. This needs doing in two places:
// 1) Its name must be added to m_catNames
// 2) We should create a new, empty rule-list for this category in m_rulesByFirstCat.
m_catNames.AddTail(new CatDescription(count,catName));
m_rulesByFirstCat.SetAtGrow(count,new RuleList);
return count;
};
//
// Grammar::m_lookUpCatName
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Given a category's number, this method returns its name. The method does not check
// the validity of the number (though the CArray does, if built for _DEBUG).
CString Grammar::m_lookUpCatName(int cat) const
{
return m_catNames.GetAt(m_catNames.FindIndex(cat))->m_name;
}
//
// Grammar::m_getEmptyRules
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Returns the list of empty rules for the grammar (i.e. all those rules that have
// no categories on their right hand side.
const RuleList & Grammar::m_getEmptyRules() const
{
return m_emptyRules;
};
//
// Grammar::m_getRulesStartingWith
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method gives you all the rules in the grammar that start with a particular
// category (i.e. such category is the first category on the rule's right hand side).
const RuleList & Grammar::m_getRulesStartingWith(int cat) const
{
return *m_rulesByFirstCat[cat];
};
//
// Grammar dtor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Destroys the grammar and all its rules and categories.
Grammar::~Grammar()
{
// Delete the array of category descriptions.
POSITION pos=m_catNames.GetHeadPosition();
while(pos!=NULL)
delete m_catNames.GetNext(pos);
// Delete the empty rules.
pos=m_emptyRules.GetHeadPosition();
while(pos!=NULL)
delete m_emptyRules.GetNext(pos);
// For each category, delete all the rules that start with that category.
int numCats=m_rulesByFirstCat.GetSize();
for(int i=0; i<numCats; i++)
{
RuleList *rules=m_rulesByFirstCat[i];
pos=rules->GetHeadPosition();
while(pos!=NULL)
delete rules->GetNext(pos);
delete rules;
}
};
//
// Rule ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The Rule constructor reads in the rule and inserts it into the 'inversion'
// data structures in the grammar.
// NB: A pointer to the rule will be stored in the grammar and will be deleted
// when the grammar is destroyed. Therefore (1) you must only create rules
// on the heap (2) you must not delete them - leave it up to the grammar to
// do this.
Rule::Rule(CString ruleLine, Grammar &g)
{
ruleLine.TrimLeft(); // Remove leading white-space from rule text.
CString catName=ruleLine.SpanExcluding(" \t"); // Read up to delimiter of cat.
if(catName.IsEmpty())
{
AfxMessageBox("The grammar is badly formatted");
exit(1);
}
m_cat=g.m_lookUpCatNumber(catName); // Get a cat number for the cat.
ruleLine=ruleLine.Mid(catName.GetLength());
ruleLine.TrimLeft();
// Read in the cats on the right hand side of the rule and add them to
// the m_subcats list.
CString nextCatName;
while(!(nextCatName=ruleLine.SpanExcluding(" \t")).IsEmpty())
{
int nextCatNumber=g.m_lookUpCatNumber(nextCatName);
m_subcats.AddTail(nextCatNumber);
ruleLine=ruleLine.Mid(nextCatName.GetLength());
ruleLine.TrimLeft();
}
// Add self to the grammar.
g.m_addRule(this);
};
//
// Lexicon ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Constructs a Lexicon object by reading it from the file whose name is passed as
// an argument. If the file cannot be opened or read a message box is displayed, and
// the program halts.
// The method must be passed a grammar - it will use this grammar to look up category
// numbers for the categories it finds in the grammar (and to add any necessary new
// categories to the grammar).
Lexicon::Lexicon(Grammar &g, const CString &fileName)
{
try {
CStdioFile lexiconFile(fileName, CFile::modeRead);
CString line;
while(lexiconFile.ReadString(line))
{
line.TrimLeft(); // Remove leading white-space from lexicon entry.
// skip blank lines and comments
if(line.IsEmpty())
continue;
else if(line[0]==';')
continue;
CString wordName=line.SpanExcluding(" \t"); // Read up to delimiter of word.
if(wordName.IsEmpty())
{
AfxMessageBox("The lexicon is badly formatted");
exit(1);
}
// If we have no cats listed for this word in the lexicon, create an
// empty list to hold entries for the word. Otherwise, we just modify
// the existing list of cats for the word.
CList<int,int> *entriesForWord;
if(!m_entries.Lookup(wordName,(CObject *&)entriesForWord))
entriesForWord=new CList<int,int>;
line=line.Mid(wordName.GetLength());
line.TrimLeft();
// For every cat listed on this line of the lexicon file,
// add that cat's number onto the list of cats for the word.
CString nextCatName;
while(!(nextCatName=line.SpanExcluding(" \t")).IsEmpty())
{
int nextCatNumber=g.m_lookUpCatNumber(nextCatName);
entriesForWord->AddTail(nextCatNumber);
line=line.Mid(nextCatName.GetLength());
line.TrimLeft();
}
// Put the new (or changed) cat list into the lexicon's map member.
m_entries.SetAt(wordName,entriesForWord);
}
}
catch (...) {
AfxMessageBox(CString("There was a problem reading the lexicon file, ")
+fileName);
exit(1);
}
return;
};
//
// Lexicon dtor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The Lexicon destructor deletes all the lists of category-numbers in the map.
Lexicon::~Lexicon()
{
CString dummy;
POSITION pos=m_entries.GetStartPosition();
while(pos!=NULL)
{
CList<int, int> *lexEntries;
// Set lexEntries to the next pointer in the map.
m_entries.GetNextAssoc(pos,dummy,(CObject*&)lexEntries);
delete lexEntries; // Delete the list of cat-numbers.
};
};
//
// Lexicon::m_getCatsForWord
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method returns a const pointer to the list of categories for some word.
// It is an error if the word is not in the lexicon (i.e. it has no categories); such
// an error is signalled by the method returning 0.
// Do not modify or delete the pointer that this method returns - it is the
// actual list object from the Lexicon, and remains the property of the lexicon
// object.
const CList<int,int> * Lexicon::m_getCatsForWord(const CString &word) const
{
CList<int,int> *cats=0;
if(!m_entries.Lookup( word,(CObject*&) cats ))
AfxMessageBox(CString("Word is not in lexicon: ")+word);
return cats;
};
//
// NonTerminalEdge ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This constructor takes an incomplete edge and a complete edge, and constructs a new
// edge by extending the incomplete edge over the complete edge. Do not call this
// method except with a suitable combination of edges.
NonTerminalEdge::NonTerminalEdge(NonTerminalEdge *incompleteEdge, Edge *completeEdge)
: m_rule(incompleteEdge->m_rule),
m_leftVertex(incompleteEdge->m_leftVertex),
m_rightVertex(completeEdge->m_getRightVertex()),
m_depth(-1)
{
assert(incompleteEdge->m_rightVertex==completeEdge->m_getLeftVertex());
// Advance the new edge's position over one category of its rule, checking that
// the rule-category advanced over does match the category of the complete edge.
m_positionInRule=incompleteEdge->m_positionInRule;
VERIFY(m_rule->m_getSubcats().GetNext(m_positionInRule)==completeEdge->m_getCat());
// Build up the new edge's list of subedges, from the incomplete edge's subedges
// plus the complete edge.
POSITION pos=incompleteEdge->m_subedges.GetHeadPosition();
while(pos!=NULL)
m_subedges.AddTail(incompleteEdge->m_subedges.GetNext(pos));
m_subedges.AddTail(completeEdge);
};
//
// NonTerminalEdge ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This constructor builds a new non-terminal edge out of a rule, by suggesting that
// this rule might generate a category sequence starting at the given vertex.
NonTerminalEdge::NonTerminalEdge(Rule *r, int vertex)
: m_leftVertex(vertex),
m_rightVertex(vertex),
m_depth(-1)
{
m_rule=r;
m_positionInRule = m_rule->m_getSubcats().GetHeadPosition();
};
//
// NonTerminalEdge::m_isComplete
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// An edge is complete if it isn't looking for any more categories - i.e. it has
// found enough categories to substantiate the hypothesis that m_rule does generate
// the lexical sequence running from m_leftVertex to m_rightVertex.
BOOL NonTerminalEdge::m_isComplete() const
{
return m_positionInRule==NULL;
};
//
// NonTerminalEdge::m_getCat
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The category of an edge is equal to the category named on the left hand side of
// its rule.
int NonTerminalEdge::m_getCat() const
{
return m_rule->m_getCat();
};
//
// NonTerminalEdge::m_getNeededCat
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method tells you what category an incomplete edge is looking for. Do not call it
// on a complete edge.
int NonTerminalEdge::m_getNeededCat() const
{
POSITION p=m_positionInRule;
assert(p!=NULL); // Do not call m_getNeededCat on a complete edge!
return m_rule->m_getSubcats().GetNext(p);
};
//
// NonTerminalEdge::m_getDepth
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method determines (recursively) how far above the 'lexical layer' of the tree
// this edge lies - i.e. the length of the longest path that leads from this edge
// down to a lexical edge.
// The method caches its result in the m_depth member of the edge, so that if it is
// called again, it won't need to recurse in order to find the answer.
int NonTerminalEdge::m_getDepth()
{
if(m_depth!=-1)
return m_depth;
else
{
int maxChildDepth=0;
POSITION pos=m_subedges.GetHeadPosition();
while(pos!=NULL)
{
int d=m_subedges.GetNext(pos)->m_getDepth();
if(d>maxChildDepth)
maxChildDepth=d;
}
m_depth=maxChildDepth+1;
return m_depth;
}
}
//
// gpFont
//
// This is the font for graphical display of the parse tree, defined and initialised in
// ChartApp.cpp. It is declared here because Edge::m_display needs to see it in order
// to position graphical items on the screen.
extern CFont *gpFont;
//
// NonTerminalEdge::m_display
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method draws the edge and all its subedges (except empty ones) on a device
// context. It returns a CPoint to say where the middle of the top of the edge is
// positioned, and adjusts boundingRect to include the edge that it drew.
// Use this method only on complete edges - it is intended only for displaying the
// final result(s) of the parse.
// The reason for returning the CPoint is that we want the parent edge, if any, to
// draw a line to this edge.
// The reason for adjusting boundingRect is that we want to know the total area taken up
// by drawing the edge and its subedges, so we can tell whether some of it extended
// outside the area available to draw it - in which case we will want to make scroll bars
// available.
// We pass parentDepth to the method so that we know how far down the screen to draw
// non-terminal child eges.
// We pass sentenceDepth to it so that we know how far down the screen to draw
// the words for lexical edges.
// We pass Sentence so that we can look up the names of the words in lexical edges.
// We pass Grammar so that we can look up category names.
CPoint NonTerminalEdge::m_display(CDC *pDC, int sentenceDepth, int parentDepth,
Sentence &s, Grammar &g, CRect &boundingRect)
{
if(m_rule->m_getSubcats().IsEmpty())
return CPoint(0,0); // Don't draw empty edges, and pass back (0,0) to tell
// the calling function that it's an empty edge.
// Look up size, etc. for our display font
LOGFONT fontData;
VERIFY(gpFont->GetLogFont(&fontData));
// Figure out where to draw the edge.
int left=s.m_vertexToCharNum(m_getLeftVertex());
int right=s.m_vertexToCharNum(m_getRightVertex());
CPoint location((left+right)/2,parentDepth);
location.x*=fontData.lfWidth;
location.y*=2*fontData.lfHeight;
// Look up the category name.
CString catName=g.m_lookUpCatName(m_getCat());
int offset=(catName.GetLength()*fontData.lfWidth)/2;
// Draw the category name on the device context.
VERIFY(pDC->TextOut(location.x-offset,location.y, catName));
// Expand boundingRect to include the area covered by this output text.
CRect textRect(location.x-offset,location.y,location.x-offset,location.y);
VERIFY(pDC->DrawText(catName, textRect, DT_LEFT | DT_CALCRECT));
boundingRect.UnionRect(boundingRect,textRect);
// Draw all the subedges.
POSITION pos=m_subedges.GetHeadPosition();
while(pos!=NULL)
{
Edge *e=m_subedges.GetNext(pos);
assert(e->m_isComplete()); // Incomplete edges should not appear in the
// parse tree for the final result.
CPoint childPos=e->m_display(pDC,sentenceDepth,parentDepth+1,s,g,boundingRect);
if(childPos!=CPoint(0,0)) // magic value to say "empty edge: draw nothing"
{
pDC->MoveTo(location+CPoint(0,fontData.lfHeight));
pDC->LineTo(childPos);
}
}
return location;
};
//
// LexicalEdge::m_display
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method draws a lexical edge and its associated word on the device context
// passed as its first argument. It returns a CPoint to say where the middle of
// the top of the edge is positioned, and adjusts boundingRect to include the
// edge that it drew.
// Refer to the comments for CNonTerminalEdge::m_display for an explanation
// of the other parameters.
CPoint LexicalEdge::m_display(CDC *pDC, int sentenceDepth, int parentDepth, Sentence &s,
Grammar &g, CRect &boundingRect)
{
// Calculate the position of the lexical edge on the screen
LOGFONT fontData;
VERIFY(gpFont->GetLogFont(&fontData));
int left=s.m_vertexToCharNum(m_getLeftVertex());
int right=s.m_vertexToCharNum(m_getRightVertex());
CPoint location((left+right)/2,parentDepth);
location.x*=fontData.lfWidth;
location.y*=2*fontData.lfHeight;
// Draw the name of the lexical category in the appropriate position on the
// screen.
CString catName=g.m_lookUpCatName(m_getCat());
int offset=(catName.GetLength()*fontData.lfWidth)/2;
VERIFY(pDC->TextOut(location.x-offset,location.y, catName));
// Expand boundingRect to include the area covered by this output text.
CRect textRect(location.x-offset,location.y,location.x-offset,location.y);
VERIFY(pDC->DrawText(catName, textRect, DT_LEFT | DT_CALCRECT));
boundingRect.UnionRect(boundingRect,textRect);
// Calculate the position of the word text on the screen.
CPoint location2=CPoint((left+right)/2,sentenceDepth);
location2.x*=fontData.lfWidth;
location2.y*=2*fontData.lfHeight;
// Draw a line from the lexical edge to the word text.
pDC->MoveTo(location+CPoint(0,fontData.lfHeight));
pDC->LineTo(location2+CPoint(0,fontData.lfHeight));
// Look up the text of the word and display it on the screen.
CString word=s.m_getWord(m_wordNumber);
offset=(word.GetLength()*fontData.lfWidth)/2;
VERIFY(pDC->TextOut(location2.x-offset,location2.y+fontData.lfHeight, word));
// Expand boundingRect to include the area covered by this output text.
textRect=CRect(location2.x-offset,location2.y+fontData.lfHeight,
location2.x-offset,location2.y+fontData.lfHeight);
VERIFY(pDC->DrawText(word, textRect, DT_LEFT | DT_CALCRECT));
boundingRect.UnionRect(boundingRect,textRect);
// Return the position of the lexical edge.
return location;
};
//
// Chart ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The chart ctor has to be told how many vertices there are (one plus the number
// of words in the sentence) and how many categories there are.
// The constructor builds the internal data structures of the chart - these are
// two arrays[vertex][category], each element of which is a list of Edges.
// One of the arrays lists all the complete edges for each category starting at
// each vertex. The other array lists all the incomplete edges needing each
// category at each vertex.
// At each element of the two arrays, it creates an empty edge list, which will
// subsequently be added to by Chart::m_addEdge().
Chart::Chart(int maxVertex, int maxCat)
{
m_completeEdges.SetSize(maxVertex+1);
m_incompleteEdges.SetSize(maxVertex+1);
for(int v=0; v<=maxVertex; v++)
{
m_completeEdges[v].SetSize(maxCat+1);
m_incompleteEdges[v].SetSize(maxCat+1);
for(int c=0; c<=maxCat; c++)
{
m_completeEdges[v][c]=new EdgeList;
m_incompleteEdges[v][c]=new EdgeList;
};
}
};
//
// Chart dtor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The chart destructor deletes all the edges in the chart, and deletes the
// lists that were created to hold them.
Chart::~Chart()
{
int maxVertex=m_completeEdges.GetSize()-1;
int maxCat=m_completeEdges[0].GetSize()-1;
for(int v=0; v<=maxVertex; v++)
{
for(int c=0; c<=maxCat; c++)
{
// Iterate down the list of complete edges for category c
// at vertex v, deleting every edge.
EdgeList * completeEdges=m_completeEdges[v][c];
POSITION pos=completeEdges->GetHeadPosition();
while(pos!=NULL)
delete completeEdges->GetNext(pos);
// Delete the list object.
delete completeEdges;
// Iterate down the list of incomplete edges for category c
// at vertex v, deleting every edge.
EdgeList * incompleteEdges=m_incompleteEdges[v][c];
pos=incompleteEdges->GetHeadPosition();
while(pos!=NULL)
delete incompleteEdges->GetNext(pos);
// Delete the list object.
delete incompleteEdges;
}
}
}
//
// Chart::m_completeEdgesForCatStartingAt
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method accesses the chart to find the list of all complete edges that start
// at a given vertex and represent a particular category.
// The chart data structure stores these edges in a list, a pointer to which
// is kept in a two-dimensional array indexed by vertex and category - so this method
// need only return the relevant element of that array.
EdgeList * Chart::m_completeEdgesForCatStartingAt(int cat, int vertex)
{
return m_completeEdges[vertex][cat];
};
//
// Chart::m_incompleteEdgesNeedingCatAt
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method accesses the chart to find the list of all incomplete edges that end
// at a given vertex and need a particular category for their next symbol.
// The chart data structure stores these edges in a list, a pointer to which
// is kept in a two-dimensional array indexed by vertex and category - so this method
// need only return the relevant element of that array.
EdgeList * Chart::m_incompleteEdgesNeedingCatAt(int cat, int vertex)
{
return m_incompleteEdges[vertex][cat];
};
//
// Chart::m_addEdge
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method adds a new edge to the chart. Once you have added the edge to the
// chart, do not modify it (the chart has indexed it, based on category, position
// in rule, start/end vertices), and do not delete it (the chart will delete the
// edge when it is finished with it).
// An incomplete edge gets stored in m_incompleteEdges, indexed by right vertex
// and needed category.
// A complete edge gets stored in m_completeEdges, indexed by left vertex and
// category.
void Chart::m_addEdge(Edge *e)
{
if(e->m_isComplete())
m_completeEdges[e->m_getLeftVertex()][e->m_getCat()]->AddHead(e);
else // e is incomplete (so it must be a nonterminal edge)
m_incompleteEdges[e->m_getRightVertex()]
[((NonTerminalEdge *)e)->m_getNeededCat()]->AddHead(e);
};
//
// Sentence ctor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The sentence ctor builds a sentence object out of the string. The string must
// not be empty.
// The sentence ctor sets up the array 'm_words'; m_words[i] holds the string text of the
// (i-1)th word. It also sets up m_verticesArray[j]. m_verticesArray[j] holds the
// character position of vertex #j - vertex #0 is the start of the sentence, and
// vertex #<number of words> is the end of the sentence. m_verticesArray is used for
// working out how to lay out edges when they are displayed.
// The sentence ctor sets m_agenda and m_chart to zero; Sentence::m_parse creates these
// members as needed (this allows for the - theoretical - possibility of parsing a
// sentence several times using different grammars and or lexica).
Sentence::Sentence(CString text)
{
assert(!text.IsEmpty()); // The user interface shouldn't let you create an empty
// sentence string
int vertexNum=0;
int charNum=0;
CString nextWord;
text.TrimLeft(); // Remove leading white-space from sentence text.
// For each word in the sentence ...
while(!(nextWord=text.SpanExcluding(" \t")).IsEmpty())
{
// Add the character position to m_verticesArray, expanding the array to include
// the new element.
// The word starts at character position charNum+2*vertexNum - i.e. we allow two
// spaces between words.
m_verticesArray.SetAtGrow(vertexNum,charNum+2*vertexNum);
// Add nextWord to m_words, expanding the array to include it.
m_words.SetAtGrow(vertexNum,nextWord);
vertexNum++;
charNum+=nextWord.GetLength();
text=text.Mid(nextWord.GetLength());
text.TrimLeft();
}
// Add a vertex to repesent the end of the sentence (i.e. the end of the last word
// of the sentence.
m_verticesArray.SetAtGrow(vertexNum,charNum+2*vertexNum);
// Initially, the sentence contains no chart and no agenda. Sentence::m_parse will
// create them.
m_agenda=0;
m_chart=0;
};
//
// Sentence dtor
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// The sentence dtor deletes the agenda and chart, if they have been created (the
// sentence ctor sets these members to zero, and Sentence::m_parse sets them to point
// to actual chart/agenda objects.
Sentence::~Sentence()
{
if(m_agenda)
delete m_agenda;
if(m_chart)
delete m_chart;
};
//
// Sentence::m_parse
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method parses the sentence, using the given grammar and lexicon. It creates a
// chart and an agenda, and stores them as member variables of the sentence.
// The method returns all the parses of the sentence - i.e. all the edges of category S
// that start of the first vertex and end on the last.
// The return value of the m_parse method is a list of edges. When you have finished
// with this list, you should delete it - but do not delete its elements (they are edges,
// which are contained inside Sentence::m_chart). Equally, do not attempt to refer to
// the elements of the list after you have deleted the sentence, or after you have
// called Sentence::m_parse a second time.
//
// In pseudo-code, Sentence::m_parse is as follows:
//
// for each word, w, in the sentence
// for each category c, that is in the lexicon for w
// add to the agenda a new lexical edge indicating the presence of
// category c at w's position
//
// for each vertex, v
// for each grammar rule, r, that has {} on its right hand side
// add to the agenda a new non-terminal edge that hypothesises the
// presence of r's category at vertex v.
//
// until(the agenda is empty)
// e=next edge on the agenda
// if e is complete
// for each incomplete edge, e2, that needs e's category as its next
// symbol, and is looking for it at e's left vertex
// add to the agenda a new non-terminal edge formed by extending e2
// over e.
//
// if we have not already 'proposed' e's category at e's left vertex
// then propose it, viz:
// for each rule, r, that has e's category as its first symbol on the
// right hand side
// add to the agenda a new edge non-terminal edge that hypothesises
// the presence of r's category starting at e's left vertex.
//
// else (e is incomplete)
// for each complete edge, e2, whose category equals e's first needed
// category, and which starts at e's right vertex
// add to the agenda a new non-terminal edge formed by extending e
// over e2.
//
// add e to the chart
//
// When the above pseudo-code terminates, the chart will contain all the edges that were
// found in the sentence. Potential parses for the sentence will be complete edges that
// run from the sentence's first vertex to its last, and have category S (strictly,
// the sentence category is the head category of the first rule in the grammar - it
// doesn't have to be S).
EdgeList * Sentence::m_parse(Grammar &grammar, Lexicon &lexicon)
{
int maxVertex=m_verticesArray.GetUpperBound();
int maxCat=grammar.m_getNumCats()-1;
// Create, and initialise to FALSE, an array that we'll use to tell us whether any
// particular category has already been proposed at a particular vertex.
CArray<CArray<BOOL,BOOL>,CArray<BOOL,BOOL>&> proposedCats;
proposedCats.SetSize(maxVertex+1);
for(int v=0; v<=maxVertex; v++)
{
proposedCats[v].SetSize(maxCat+1);
for(int c=0; c<=maxCat; c++)
proposedCats[v][c]=FALSE;
}
// If m_parse has been called before, it will have left a chart and agenda behind
// it. Delete them.
if(m_chart)
delete m_chart;
if(m_agenda)
delete m_agenda;
// Create a chart of suitable size to hold entries for all the categories at all the
// vertices, and create an empty agenda.
m_chart=new Chart(maxVertex,maxCat);
m_agenda=new Agenda;
// For each word in the sentence, create lexical edges for every category that the
// word can have. Add these edges to the agenda.
for(int w=0; w<m_words.GetSize(); w++)
{
const CList<int,int> * catsForWord=lexicon.m_getCatsForWord(m_words[w]);
if(catsForWord) // Will be 0 if the word hasn't been seen before - in which case
// Lexicon::m_getCatsForWord will have displayed an error.
{
POSITION pos=catsForWord->GetHeadPosition();
while(pos!=NULL)
{
int cat=catsForWord->GetNext(pos);
m_agenda->m_addEdge(new LexicalEdge(cat,w));
}
}
}
// For each rule that can be satisfied by NULL, create complete edges for that rule
// at every vertex.
POSITION pos=grammar.m_getEmptyRules().GetHeadPosition();
while(pos!=NULL)
{
Rule *r=grammar.m_getEmptyRules().GetNext(pos);
for(v=0; v<=maxVertex; v++)
m_agenda->m_addEdge(new NonTerminalEdge(r,v));
}
// Repeatedly remove an edge from the agenda and process it.
Edge *e;
while(e=m_agenda->m_nextEdge())
{
if(e->m_isComplete())
{
// Find all the incomplete edges that can extend over e, and create new
// edges corresponding to such extension.
int cat=e->m_getCat();
int vertex=e->m_getLeftVertex();
EdgeList *extendableEdges=
m_chart->m_incompleteEdgesNeedingCatAt(cat, vertex);
POSITION pos=extendableEdges->GetHeadPosition();
while(pos!=NULL)
{
NonTerminalEdge *incompleteEdge
=(NonTerminalEdge *) extendableEdges->GetNext(pos); // Incomplete edges
// must be
// non-terminal, so
// this is OK.
m_agenda->m_addEdge(new NonTerminalEdge(incompleteEdge, e));
}
// If we haven't already 'proposed' the category of this complete edge at
// this vertex, do so now.
if(!proposedCats[vertex][cat])
{
proposedCats[vertex][cat]=TRUE;
const RuleList &rules=grammar.m_getRulesStartingWith(cat);
POSITION pos=rules.GetHeadPosition();
while(pos!=NULL)
{
Rule *r=rules.GetNext(pos);
m_agenda->m_addEdge(new NonTerminalEdge(r,vertex));
}
}
}
else // e is incomplete.
{
NonTerminalEdge *nte=(NonTerminalEdge *)e;// Incomplete edges must be
// non-terminal, so this is OK.
// Find all the complete edges that e can extend over, and create new
// edges corresponding to such extension.
int cat=nte->m_getNeededCat();
int vertex=nte->m_getRightVertex();
EdgeList *usableEdges=
m_chart->m_completeEdgesForCatStartingAt(cat, vertex);
POSITION pos=usableEdges->GetHeadPosition();
while(pos!=NULL)
{
Edge *completeEdge=usableEdges->GetNext(pos);
m_agenda->m_addEdge(new NonTerminalEdge(nte,completeEdge));
}
}
// Add e to the chart, so that other edges will know about it.
m_chart->m_addEdge(e);
}
// When we reach this point, the agenda is empty, which means that we have done all
// the parsing we can. Build a list of all sentence edges that go from vertex 0 to
// the vertex number maxVertex and return it.
EdgeList * result=new EdgeList;
// We assume that the sentence symbol is the LHS of the first rule in the grammar.
EdgeList * sentenceEdges=m_chart->m_completeEdgesForCatStartingAt(0,0);
pos=sentenceEdges->GetHeadPosition();
while(pos!=NULL)
{
Edge *sentenceEdge=sentenceEdges->GetNext(pos);
if(sentenceEdge->m_getRightVertex()==maxVertex)
result->AddHead(sentenceEdge);
}
return result;
}
//
// Sentence::m_vertexToCharNum
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Given a vertex number, this method tells you what character position to display it at.
// The information is stored in an array by the sentence ctor.
int Sentence::m_vertexToCharNum(int v) const
{
return m_verticesArray[v];
};
//
// Sentence::m_getWord
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// This method gives you a copy of the (i+1)th word of the text: i is a zero-based index,
// so s.m_getWord[0] would give you the first word of the sentence.
CString Sentence::m_getWord(int w) const
{
return m_words[w];
};
#ifdef _DEBUG
//
// Rule::m_traceOut
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Prints a rule out to the debug output. The method needs a Grammar parameter so it
// can look up category names.
void Rule::m_traceOut(Grammar &g)
{
CString ruleTail;
POSITION pos=m_subcats.GetHeadPosition();
while(pos!=NULL)
{
int cat=m_subcats.GetNext(pos);
ruleTail+=CString(" ");
ruleTail+=g.m_lookUpCatName(cat);
}
if(ruleTail.IsEmpty())
TRACE1("Rule %s <- {}\n",(const char *)g.m_lookUpCatName(m_cat));
else
TRACE2("Rule %s <- %s\n",(const char *)g.m_lookUpCatName(m_cat),
(const char *)ruleTail);
};
//
// Grammar::m_traceOut
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Prints a grammar object to the debug output.
void Grammar::m_traceOut()
{
TRACE0("Grammar:\n");
TRACE0("Category Names:\n");
POSITION pos=m_catNames.GetHeadPosition();
while(pos!=NULL)
{
CatDescription *c=m_catNames.GetNext(pos);
TRACE2(" %d : %s\n",c->m_number,(const char *)c->m_name);
}
TRACE0("Empty Rules\n");
pos=m_emptyRules.GetHeadPosition();
while(pos!=NULL)
{
Rule *r=m_emptyRules.GetNext(pos);
r->m_traceOut(*this);
}
TRACE0("Rules by category:\n");
for(int c=0; c<m_rulesByFirstCat.GetSize(); c++)
{
TRACE1("Rules starting with cat %s\n",(const char *)m_lookUpCatName(c));
pos=m_rulesByFirstCat[c]->GetHeadPosition();
while(pos!=NULL)
{
Rule *r=m_rulesByFirstCat[c]->GetNext(pos);
r->m_traceOut(*this);
}
}
};
//
// Lexicon::m_traceOut
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Prints a lexicon object to the debug output. It needs a Grammar parameter so it can
// look up category names.
void Lexicon::m_traceOut(Grammar &g)
{
TRACE0("Lexicon:\n");
CString word;
POSITION pos=m_entries.GetStartPosition();
while(pos!=NULL)
{
CList<int, int> *lexEntries;
CString cats;
m_entries.GetNextAssoc(pos,word,(CObject*&)lexEntries); // Set lexEntries to next
// pointer in map
POSITION pos2=lexEntries->GetHeadPosition();
while(pos2!=NULL)
{
int cat=lexEntries->GetNext(pos2);
cats+=CString(" ");
cats+=g.m_lookUpCatName(cat);
};
TRACE2(" word %s : %s\n",(const char *)word, (const char *)cats);
};
};
//
// LexicalEdge::m_traceOut
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Prints a lexical edge to the debug output. It needs a Grammar parameter so it can
// look up category names.
void LexicalEdge::m_traceOut(Grammar &g)
{
TRACE3("LexicalEdge: <%s,%d,%d>\n",
(const char *)g.m_lookUpCatName(m_cat),m_wordNumber,m_wordNumber+1);
};
//
// NonTerminalEdge::m_traceOut
//
// Who When What
// --- ---- ----
// Jim Chapman 13 May 97 Released.
//
// Prints a non-terminal edge to the debug output. It needs a Grammar parameter so it
// can look up category names.
void NonTerminalEdge::m_traceOut(Grammar &g)
{
int cat;
POSITION pos=m_positionInRule;
if(pos==NULL)
{
cat=m_rule->m_getCat();
TRACE3("Complete NonTerminalEdge: <%s,%d,%d,{}>\n",
(const char *)g.m_lookUpCatName(cat),m_leftVertex,m_rightVertex);
}
else
{
CString cats;
while(pos!=NULL)
{
cat=m_rule->m_getSubcats().GetNext(pos);
cats+=CString(" ");
cats+=g.m_lookUpCatName(cat);
}
cat=m_rule->m_getCat();
CString traceMsg;
traceMsg.Format("Incomplete NonTerminalEdge: <%s,%d,%d,{%s}>\n",
(const char *)g.m_lookUpCatName(cat),m_leftVertex,m_rightVertex,
(const char *) cats);
TRACE0(traceMsg);
}
};
#endif // #ifdef _DEBUG
home@jim-chapman.net