|
(*
Eddy C. Vasile
PO BOX 71313
L.A. CA 90071
[[Email Removed]]
This program was my entry for the Winter/83 Linguistics 145
(Intro to Computational Linguistics) final exam at UCLA
with Dr. Eric Wehrli.
The program is a context free, top down, left to right parser.
It uses recursion, binary trees, linked lists
and sorts and all sorts of other tools.
The rules are linked lists of final symbols of the format:
S=>NP VP Sentence => Noun Phrase + Verb Phrase
S=>NP VP PP Sentence => Noun Pharse + Verb Phrase + Prepositional Phrase
NP=>DET N Noun Phrase => Determinant + Noun
NP=>DET ADJ N
VP=>V
VP=>V NP
VP=>V NP PP
PP=>P NP Prepositional Phrase=>Prep + Noun Phrase
The parser will allow for ambiguities in the grammar.
Since in the case above S=>NP VP PP or S=>NP VP and VP=>V PP
you will see 2 versions for an ambiguous sentence like:
The man sees the woman with a telescope
Version 1 S=>NP VP
NP [The man]
VP [sees the woman with a telescope]
Here the prepositional phrase is part of the verb phrase and the
meaning implies that the woman was carying a telescope.
Version 2 S=>NP VP PP
NP [The man]
VP [sees the woman]
PP [with a telescope]
Here the PP hangs right off the top and the meaning implies that the man
used a telescope to see the woman.
Originally written with an obscure mainframe Pascal. This version
will run with Turbo Pascal of any version higher than 3.0 (3.xx, 4.xx, 5.xx, 6.xx)
*)
program parser;
const
line_len = 80; (*maximum line length *)
word_len = 20; (*maximum word length *)
max_trace= 150;(*the number of traces*)
type
line_type= string[line_len];
word_type= string[word_len];
symbol = (S,NP,VP,PP,Det,Adj,N,V,P,X,Clause);
ptr_exp = ^expansion;
vn = s..pp;
vt = det..Clause;
ptr_el = ^entry;
ptr_tree = ^tree;
tree = record
data:word_type;
category:symbol;
left:ptr_tree;
right:ptr_tree
end;
VNanVT = ^symb_map;
symb_map= record
key:symbol;
link:VNanVT
end;
ptr_node =^node;
node =record
category:symbol;
word :word_type;
daughter, sister:ptr_node
end;
expansion= record
first_sy: VNanVT;
next_exp: ptr_exp
end;
traces = record
nod : ptr_node;
loc:integer;
trace_rem:ptr_el;
end;
entry = record
key : ptr_node;
link: ptr_el
end;
gram_type = array[symbol] of ptr_exp;
array_Type = array[0..max_trace] of traces;
conver_type = array[symbol] of word_type;
var
newword : boolean;
gramfile,
dict : text;
grammar : gram_type;
Alt_arr : array_Type;
conver : conver_type;
root : ptr_tree;
line : line_type;
c : char;
procedure preorder(tree:ptr_tree);
begin
if tree<>nil
then
begin
writeln(Dict,tree^.data);
writeln(Dict,conver[tree^.category]);
preorder(tree^.left);
preorder(tree^.right)
end
end; (*preorder*)
function compress(line:line_type):line_type;
var
temp:line_type;
p :integer;
begin
p:=pos(' ',line);
if p<>0
then temp:=compress(copy(line,1,p)+copy(line,p+2,length(line)-p-1))
else temp:=line;
compress:=temp
end;
function ltrim(instring:line_type):line_type;
var
i:integer;
begin
i:=1;
while instring[i]=' ' do
begin
if instring[i]=' ' then delete(instring,i,1);
i:=i+1
end;
ltrim:=instring
end;
function rtrim(instring:line_type):line_type;
var
i:integer;
begin
i:=length(instring);
while instring[i]=' ' do
begin
if instring[i]=' ' then delete(instring,i,1);
i:=i-1
end;
rtrim:=instring
end;
function Uppercase(InputStr : LIne_Type): Line_Type;
var
i: integer;
begin
for i := 1 to length(InputStr) do
InputStr[i] := upcase(InputStr[i]);
Uppercase := InputStr
end;
procedure clean(var line:line_type);
var
i:integer;
begin
line:=uppercase(line);
for i:=1 to length(line) do
if line[i] in ['>','-','=']
then line[i]:=' ';
line:=rtrim(ltrim(compress(line)))
end;
procedure initialize(var conver:conver_type;var grammar:gram_type;
var root :ptr_tree);
var
category : symbol;
word : word_type;
the_cat : char;
procedure make_tree(var root :ptr_tree; word:word_type;
category: symbol);
begin
if root=nil
then
begin
new(root);
root^.data:=word;
root^.category:=category;
root^.left:=nil;
root^.right:=nil;
end
else
if word<root^.data
then make_tree(root^.left,word,category)
else if word>root^.data
then make_tree(root^.right,word,category)
end; (*make_tree*)
begin(*initialize*)
writeln('1) First you will be asked for you grammar rules.');
writeln('Enter your rules in the following manner:');
writeln('S=> NP VP');
writeln('NP=> DET N');
writeln('NP=>DET ADJ N');
writeln('VP=> V');
writeln('VP=>V NP');
writeln('VP=>V NP PP');
writeln('PP=>P NP');
writeln('..etc. You may omit "=>" and you may enter rules in upper or lower case.');
writeln('These are the symbols to be used:');
writeln('S NP VP PP DET ADJ N V P');
writeln('(Sentence, Noun Phrase, Verb Phrase, Prepositional Phrase,');
writeln('Determinant, Adjective, Noun, Verb, Preposition)');
writeln('When finished entering rules, hit return.');
writeln('You will be able to retrive, save and append rules from a file.');
writeln('2) Next enter sentences (upper or lower case).');
writeln('If the words you use are not in the lexicon,');
writeln('you will be asked to enter their value (a terminal symbol)');
writeln('To see the grammar rules type ? instead of a sentence.');
newword:=false;
line:='xxx';
root:=nil;
{$I-}
assign(dict,'dict.txt');
reset(dict);
{$I+}
if ioresult<>0
then
begin
word:='THE';
category:=Det;
make_tree(root,word,category)
end
else
begin
while not eof(dict) do
begin
readln(dict,word);
readln(dict,the_cat);
case the_cat of
'A': category:=Adj;
'D': category:=Det;
'N': category:=N;
'V': category:=V;
'P': category:=P;
'C': category:=Clause;
else category:=X
end; (*case*)
make_tree(root,word,category)
end; (*while*)
close(dict)
end;
for category:=S to PP do
grammar[category]:=nil;
conver[s] :='S';
conver[np] :='NP';
conver[vp] :='VP';
conver[pp] :='PP';
conver[det]:='DET';
conver[adj]:='ADJ';
conver[n] :='N';
conver[p] :='P';
conver[v] :='V';
conver[Clause]:='CLAUSE';
end;(*initialize*)
function rule_ok(line:line_type; var lhs:symbol;
var rhs:ptr_exp):boolean;
var
idx : integer;
wrd : word_type;
el : VNanVT;
first: VNanVT;
last : VNanVT;
ok : boolean;
procedure symb_ok(str:word_type; var cat:symbol;
var found:boolean);
var
over : boolean;
begin
cat:=s;
found:=false;
over:=false;
while not over and not found do
if conver[cat]=str
then found:=true
else if cat<P
then cat:=succ(cat)
else over:=true
end;(*symb ok*)
begin(*ok*)
clean(line);line:=line+' ';
idx :=pos(' ',line);
wrd :=copy(line,1,idx-1);
symb_ok(wrd,lhs,ok);
first:=nil;
while (idx<length(line)) and ok do
begin
delete(line,1,idx);
idx :=pos(' ',line);
wrd :=copy(line,1,idx-1);
new(el);
symb_ok(wrd,el^.key,ok);
el^.link:=nil;
if ok
then
if first=nil
then
begin
first:=el;
last:=el
end
else
begin
last^.link:=el;
last:=el
end
end;
if ok
then
begin
new(rhs);
with rhs^ do
begin
first_sy:=first;
next_exp:=nil
end
end;
rule_ok:=ok
end;
procedure get_rules(var grammar: gram_type);
var
rhs :ptr_exp;
lhs :symbol; (*vn*)
line:line_type;
c :char;
fn :line_type;
procedure get_rule_line;
begin
Clean(line);
if rule_ok(line,lhs,rhs)
then
begin
rhs^.next_exp:=grammar[lhs];
grammar[lhs]:=rhs
end
else writeln('Rule ',line,' not accepted, symbols not in V+')
end;
begin
write('Would you like to use the rules from a file (Y/N) > ');
readln(c);
c:=upcase(c);
if c='Y'
then
begin
{$I-}
write('Enter the name of the file with the rules > ');
readln(fn);
assign(gramfile,fn);
reset(gramfile);
{$I+}
if ioresult=0
then
while not eof(gramfile) do
begin
readln(gramfile,line);
get_rule_line
end
else writeln('Can''t open the file ',fn)
end;
writeln('Enter the rules in LHS => RHS manner or Enter to quit:');
line:='XXX';
while line<>'' do
begin
write('Rule: ');
readln(line);
if line='' then writeln('The rule entry task completed.')
else get_rule_line
end;
close(gramfile)
end;
procedure display_rules(grammar : gram_type);
var
non_term : vn;
item : VNanVT;
expan : ptr_exp;
begin
for non_term:=S to PP do
begin
expan:=grammar[non_term];
while expan <> nil do (*classical que dump*)
begin
write((conver[non_term]):-2,' => ');
item:=expan^.first_sy;
while item<>nil do
begin
write((conver[item^.key]):-2,' ');
item:=item^.link
end;
writeln;
expan:=expan^.next_exp
end
end
end;
procedure get_gramfile;
var
fn : line_type;
begin
write('Please enter the name of the file to save the rules > ');
readln(fn);
{$I-} (*check for existing files*)
assign(gramfile,fn);
reset(gramfile);
{I+}
if ioresult=0
then writeln('File ',fn,' already exists. You will overwrite it.');
rewrite(gramfile);
end;
procedure save_rules(grammar : gram_type);
var
non_term : vn;
item : VNanVT;
expan : ptr_exp;
begin
for non_term:=S to PP do
begin
expan:=grammar[non_term];
while expan <> nil do (*classical que dump*)
begin
write(gramfile,(conver[non_term]):-2,' => ');
item:=expan^.first_sy;
while item<>nil do
begin
write(gramfile,(conver[item^.key]):-2,' ');
item:=item^.link
end;
writeln(gramfile);
expan:=expan^.next_exp
end
end
end;
procedure Parse_All(line:line_type);
var
c_pos :integer;
c_rem :ptr_el;
stk_ptr :integer;
all_done :boolean;
break :boolean;
cat :symbol;
ptr,
temp_p :ptr_exp;
exit :boolean;
sent_root,
sent_left :ptr_node;
word :word_type;
procedure get_word(var i:integer; var word:word_type);
var
j:integer;
begin
j:=i;
while line[i] in ['a'..'z','A'..'Z'] do
i:=i+1;
if i<>j then
begin
word:=copy(line,j,i-j);
while (i<length(line)) and (line[i]=' ') do
i:=i+1;
if i=length(line)
then exit:=true else exit:=false
end
else word:=''
end;
procedure look_up(var root:ptr_tree; word:word_type;
var cat:symbol);
var
answ: line_type;
begin
if root=nil
then
begin
new(root);
with root^ do
begin
newword:=true;
data:=word;
left:=nil;
right:=nil;
writeln('Sorry but I do not know ',word);
writeln('Enter the symbol for the gramatical category:');
writeln('D)eterminant, A)djective, N)oun, V)erb, P)reposition, X)tra, C)lause');
write('Category: ');
readln(answ); clean(answ);
case answ[1] of
'A': cat:=Adj;
'D': cat:=Det;
'N': cat:=N;
'V': cat:=V;
'P': cat:=P;
'C': cat:=Clause
else cat:=X
end;
category:=cat
end
end
else with root^ do
begin
if word<data
then look_up(left,word,cat)
else if word>data
then look_up(right,word,cat)
else cat:=category
end
end;
procedure get_cat(var idx:integer; var cat:symbol;var word:word_type);
begin
get_word(idx,word);
if word=''
then
begin
exit:=true;
cat:=X
end
else look_up(root,word,cat)
end;
procedure Draw_tree(root: ptr_node);
begin
if root<>nil
then
begin
if root^.category in [S..PP]
then
begin
write('[ ',conver[root^.category]);
Draw_tree(root^.daughter);
write(' ]')
end
else
begin
write(' ',root^.word)
end;
Draw_tree(root^.sister)
end
end; (* Draw_tree *)
procedure copy(p:VNanVT; q:ptr_el; var list:ptr_el);
var
temp1 : ptr_el;
temp2 : ptr_el;
begin
if p <> nil
then
begin
new(temp1);
with temp1^ do
begin
link:=nil;
new(key);
with key^ do
begin
daughter:=nil;
sister :=nil;
category:=p^.key
end
end;
list:=temp1;
p:=p^.link;
while p<>nil do
begin
new(temp2);
with temp2^ do
begin
link:=nil;
new(key);
with key^ do
begin
category:=p^.key;
daughter:=nil;
sister:=nil
end;
temp1^.key^.sister:=key
end;
temp1^.link:=temp2;
temp1:=temp2;
p:=p^.link
end;
temp1^.link:=q
end
end;
begin
stk_ptr:=0;
exit:=false;
ptr:=grammar[S];
new(sent_root);
with sent_root^ do
begin
category:=s;
sister:=nil;
daughter:=nil;
end;
while ptr<>nil do
begin
stk_ptr:=stk_ptr+1;
with Alt_arr[stk_ptr] do
begin
loc:=1;
nod:=sent_root;
copy(ptr^.first_sy,nil,trace_rem)
end;
ptr:=ptr^.next_exp
end;
all_done:=false;
while stk_ptr<>0 do
begin
break:=false;
sent_left:=Alt_arr[stk_ptr].nod;
sent_left^.daughter:=Alt_arr[stk_ptr].trace_rem^.key;
c_rem:=Alt_arr[stk_ptr].trace_rem;
c_pos:=Alt_arr[stk_ptr].loc;
stk_ptr:=stk_ptr-1;
while not break do
begin
break:=true;
if c_rem^.key^.category in [Det..Clause] then
begin
get_cat(c_pos,cat,word);
if c_rem^.key^.category=cat then
begin
c_rem^.key^.word:=word;
c_rem:=c_rem^.link;
if c_rem=nil then break:=true
else break:=false
end
end
end;
if c_rem=nil
then
begin
if exit
then
begin
Draw_tree(sent_root);
writeln;
all_done:=true
end
end
else
if c_rem^.key^.category in [S..PP]
then
begin
temp_p:=grammar[c_rem^.key^.category];
while temp_p<>nil do
with temp_p^ do
begin
stk_ptr:=stk_ptr+1;
with Alt_arr[stk_ptr] do
begin
loc:=c_pos;
nod:=c_rem^.key;
copy(first_sy,c_rem^.link,trace_rem);
end;
temp_p:=temp_p^.next_exp
end
end
end;
if all_done
then writeln('This sentence IS grammatical')
else writeln('This sentence is **NOT** grammatical')
end;
begin
initialize(conver,grammar,root);
get_rules(grammar);
display_rules(grammar);
writeln('Hit Enter to quit');
while line<>'' do
begin
write('Sentence: ');
readln(line);
if line<>''
then
begin
if line='?' then display_rules(grammar)
else
begin
clean(line);
line:=line+' ';
Parse_All(line)
end
end
end;
if newword then
begin
write('Shall I save the new words you have just taught me (Y/N) > ');
readln(c);
c:=upcase(c);
writeln(c);
if c='Y'
then
begin
writeln('Wait while I''m saving the new words');
rewrite(dict);
preorder(root);
end
end;
close(dict);
write('Shall I save the grammar rules you have just taught me (Y/N) > ');
readln(c);
c:=upcase(c);
if c='Y'
then
begin
get_gramfile;
writeln('Wait while I''m saving the new rules');
save_rules(grammar);
close(gramfile)
end
end.
|