PROGRAM pascalformatter;
{
| ** Pascal Program Formatter **
| ** **
| ** by J. E. Crider, Shell Oil Company, Houston, Texas 77025 **
| ** **
| ** Copyright (c) 1980 by Shell Oil Company. Permission to **
| ** copy, modify, and distribute, but not for profit, is **
| ** hereby granted, provided that this note is included. **
|
| Changes:
| The program has been updated to replace keywords according to
| the TURBO Pascal conventions.
|
| This portable program formats Pascal programs and acceptable
| program fragments according to structured formatting principles
| [SIGPLAN Notices, Vol. 13, No. 11, Nov. 1978, pp. 15-22].
| The actions of the program are as follows:
|
| PREPARATION: For each structured statement that controls a
| structured statement, the program converts the controlled
| statement into a compound statement. The inserted BEGIN/END
| pair are in capital letters. A null statement (with semicolon)
| is inserted before the last END symbol of each program/
| procedure/function, if needed. The semicolon forces the END
| symbol to appear on a line by itself.
|
| FORMATTING: Each structured statement that controls a simple
| statement is placed on a single line, as if it were a simple
| statement. Otherwise, each structured statement is formatted
| in the following pattern (with indentation "indent"):
|
| XXXXXX header XXXXXXXX
| XXXXXXXXXXXXXXXXXX
| XXXXX body XXXXXX
| XXXXXXXXXXXXXXXXXX
|
| where the header is one of:
|
| while <expression> do begin
| for <control variable> := <for list> do begin
| with <record variable list> do begin
| repeat
| if <expression> then begin
| else if <expression> then begin
| else begin
| case <expression> of
| <case label list>: begin
|
| and the last line either begins with UNTIL or ends with END.
| Other program parts are formatted similarly. The headers are:
|
| <program/procedure/function heading>;
| label
| const
| type
| var
| begin
| (various for records and record variants)
|
| COMMENTS: Each comment that starts before or on a specified
| column on an input line (program constant "commthresh") is
| copied without shifting or reformatting. Each comment that
| starts after "commthresh" is reformatted and left-justified
| following the aligned comment base column ("alcommbase").
|
| LABELS: Each statement label is justified to the left margin and
| is placed on a line by itself.
|
| SPACES AND BLANK LINES: Spaces not at line breaks are copied from
| the input. Blank lines are copied from the input if they appear
| between statements (or appropriate declaration units). A blank
| line is inserted above each significant part of each program/
| procedure/function if one is not already there.
|
| CONTINUATION: Lines that are too long for an output line are
| continued with additional indentation ("contindent").
|
| INPUT FORM: The program expects as input a program or program
| fragment in Standard Pascal. A program fragment is acceptable
| if it consists of a sequence of (one or more) properly ordered
| program parts; examples are: a statement part (that is, a
| compound statement), or a TYPE part and a VAR part followed by
| procedure declarations. If the program fragment is in serious
| error, then the program may copy the remainder of the input file
| to the output file without significant modification. Error
| messages may be inserted into the output file as comments.
|}
CONST
maxrwlen = 10; { size of reserved word strings }
ordminchar = 32; { ord of lowest char in char set }
ordmaxchar = 126; { ord of highest char in char set }
{ Although this program uses the ASCII
character set, conversion to most other
character sets should be straightforward.
}
{ The following parameters may be adjusted for the installation: }
maxinlen = 255; { maximum width of input line + 1 }
maxoutlen = 80; { maximum width of output line }
initmargin = 1; { initial value of output margin }
commthresh = 4; { column threshhold in input for comments to
be aligned }
alcommbase = 35; { aligned comments in output start AFTER this
column }
indent = 3; { RECOMMENDED indentation increment }
contindent = 5; { continuation indentation, >indent }
endspaces = 3; { number of spaces to precede 'END' }
commindent = 3; { comment continuation indentation }
line_number : INTEGER = 0;
TYPE
natural = 0..MaxInt;
inrange = 0..maxinlen;
outrange = 0..maxoutlen;
errortype = (longline, noendcomm, notquote, longword, notdo, notof,
notend, notthen, notbegin, notuntil, notsemicolon, notcolon,
notparen, noeof);
chartype = (illegal, special, chapostrophe, chleftparen, chrightparen,
chperiod, digit, chcolon, chsemicolon, chlessthan, chgreaterthan,
letter, chleftbrace);
{ for reserved word recognition }
resword = ( { reserved words ordered by length }
rwif, rwdo, rwof, rwto, rwin, rwor,
{ length: 2 }
rwend, rwfor, rwvar, rwdiv, rwmod, rwset, rwand, rwnot, rwnil,
{ length: 3 }
rwthen, rwelse, rwwith, rwgoto, rwcase, rwtype, rwfile, rwuses,
rwunit, { length: 4 }
rwbegin, rwuntil, rwwhile, rwarray, rwconst, rwlabel, rwvalue,
{ length: 5 }
rwrepeat, rwrecord, rwdownto, rwpacked,rwmodule,
{ length: 6 }
rwprogram, { length: 7 }
rwfunction, { length: 8 }
rwotherwise,rwprocedure,
{ length: 9 }
rwx); { length: 10 for table sentinel }
rwstring = PACKED ARRAY [1..maxrwlen] OF CHAR;
firstclass = ( { class of word if on new line }
newclause, { start of new clause }
continue, { continuation of clause }
alcomm, { start of aligned comment }
contalcomm, { continuation of aligned comment }
uncomm, { start of unaligned comment }
contuncomm, { continuation of unaligned comment }
stmtlabel); { statement label }
wordtype = RECORD { data record for word }
whenfirst: firstclass; { class of word if on new line }
puncfollows: BOOLEAN; { to reduce dangling punctuation }
blanklncount: natural; { number of preceding blank lines }
spaces: INTEGER; { number of spaces preceding word }
base: -9..maxinlen; { inlinexx.buf[base] precedes word }
size: inrange END; { length of word in inlinexx.buf }
symboltype = ( { symbols for syntax analysis }
semicolon, sybegin, syend,
{ three insertable symbols first }
syif, sydo, syof, sythen, syelse, sygoto, sycase, syuntil, syrepeat,
syrecord, forwhilewith, progprocfunc, declarator, otherword,
othersym, leftparen, rightparen, period, syotherwise, sysubrange,
intconst, colon, ident, comment, syeof);
inserttype = semicolon..syend;
symbolset = SET OF symboltype;
{ *** NOTE: set size of 0..26 REQUIRED for
symbolset! }
VAR
Input,Output : TEXT[$800];
response : STRING[10];
no_error_output : BOOLEAN;
infilename,outfilename : STRING[80];
inlinexx: RECORD { input line data }
endoffile: BOOLEAN; { end of file on input? }
ch: CHAR; { current char, buf[index] }
index: inrange; { subscript of current char }
len: natural; { length of input line in buf }
{ string ';BEGINEND' in buf[-8..0] }
buf: ARRAY [-8..maxinlen] OF CHAR END;
outline: RECORD { output line data }
blanklns: natural; { number of preceding blank lines }
len: outrange; { number of chars in buf }
buf: ARRAY [1..maxoutlen] OF CHAR END;
WORD: wordtype; { current word }
margin: outrange; { left margin }
lnpending: BOOLEAN; { new line before next symbol? }
symbol: symboltype; { current symbol }
{ Structured Constants }
headersyms: symbolset; { headers for program parts }
strucsyms: symbolset; { symbols that begin structured statements }
stmtbeginsyms: symbolset; { symbols that begin statements }
stmtendsyms: symbolset; { symbols that follow statements }
stopsyms: symbolset; { symbols that stop expression scan }
recendsyms: symbolset; { symbols that stop record scan }
datawords: symbolset; { to reduce dangling punctuation }
newword: ARRAY [inserttype] OF wordtype;
instring: PACKED ARRAY [1..9] OF CHAR;
firstrw: ARRAY [1..maxrwlen] OF resword;
rwword: ARRAY [rwif..rwprocedure] OF rwstring;
rwsy: ARRAY [rwif..rwprocedure] OF symboltype;
charclass: ARRAY [CHAR] OF chartype;
{ above is portable form; possible ASCII form
is: }
{ charclass: array [' '..'~'] of chartype;
}
symbolclass: ARRAY [chartype] OF symboltype;
PROCEDURE strucconsts; { establish values of structured constants }
VAR
i: ordminchar..ordmaxchar;
{ loop index }
ch: CHAR; { loop index }
PROCEDURE buildinsert (symbol: inserttype;
inclass: firstclass;
inpuncfollows: BOOLEAN;
inspaces, inbase: INTEGER;
insize: inrange);
BEGIN
WITH newword[symbol] DO BEGIN
whenfirst := inclass;
puncfollows := inpuncfollows;
blanklncount := 0;
spaces := inspaces;
base := inbase;
size := insize END;
END; { buildinsert }
PROCEDURE buildrw (rw: resword;
symword: rwstring;
symbol: symboltype);
BEGIN
rwword[rw] := symword;{ reserved word string }
rwsy[rw] := symbol; { map to symbol }
END; { buildrw }
BEGIN { strucconsts }
{ symbol sets for syntax analysis }
headersyms := [progprocfunc, declarator, sybegin, syeof];
strucsyms := [sycase, syrepeat, syif, forwhilewith];
stmtbeginsyms := strucsyms + [sybegin, ident, sygoto, syotherwise];
stmtendsyms := [semicolon, syend, syuntil, syelse, syeof];
stopsyms := headersyms + strucsyms + stmtendsyms + [sygoto];
recendsyms := [rightparen, syend, syeof];
datawords := [otherword, intconst, ident, syend];
{ words for insertable symbols }
buildinsert (semicolon, continue, FALSE, 0, -9, 1);
buildinsert (sybegin, continue, FALSE, 1, -8, 5);
buildinsert (syend, newclause, TRUE, endspaces, -3, 3);
instring := '; '; {';BEGINEND'}
{ constants for recognizing reserved words }
firstrw[1] := rwif; { length: 1 }
firstrw[2] := rwif; { length: 2 }
buildrw (rwif, 'IF ', syif);
buildrw (rwdo, 'DO ', sydo);
buildrw (rwof, 'OF ', syof);
buildrw (rwto, 'TO ', othersym);
buildrw (rwin, 'IN ', othersym);
buildrw (rwor, 'OR ', othersym);
firstrw[3] := rwend; { length: 3 }
buildrw (rwend, 'END ', syend);
buildrw (rwfor, 'FOR ', forwhilewith);
buildrw (rwvar, 'VAR ', declarator);
buildrw (rwdiv, 'DIV ', othersym);
buildrw (rwmod, 'MOD ', othersym);
buildrw (rwset, 'SET ', othersym);
buildrw (rwand, 'AND ', othersym);
buildrw (rwnot, 'NOT ', othersym);
buildrw (rwnil, 'NIL ', otherword);
firstrw[4] := rwthen; { length: 4 }
buildrw (rwthen, 'THEN ', sythen);
buildrw (rwelse, 'ELSE ', syelse);
buildrw (rwwith, 'WITH ', forwhilewith);
buildrw (rwgoto, 'GOTO ', sygoto);
buildrw (rwcase, 'CASE ', sycase);
buildrw (rwtype, 'TYPE ', declarator);
buildrw (rwfile, 'FILE ', othersym);
buildrw (rwuses, 'USES ', declarator);
buildrw (rwunit, 'UNIT ', declarator);
firstrw[5] := rwbegin; { length: 5 }
buildrw (rwbegin, 'BEGIN ', sybegin);
buildrw (rwuntil, 'UNTIL ', syuntil);
buildrw (rwwhile, 'WHILE ', forwhilewith);
buildrw (rwarray, 'ARRAY ', othersym);
buildrw (rwconst, 'CONST ', declarator);
buildrw (rwlabel, 'LABEL ', declarator);
buildrw (rwvalue, 'VALUE ', declarator);
firstrw[6] := rwrepeat; { length: 6 }
buildrw (rwrepeat, 'REPEAT ', syrepeat);
buildrw (rwrecord, 'RECORD ', syrecord);
buildrw (rwdownto, 'DOWNTO ', othersym);
buildrw (rwpacked, 'PACKED ', othersym);
buildrw (rwmodule, 'MODULE ',progprocfunc);
firstrw[7] := rwprogram; { length: 7 }
buildrw (rwprogram, 'PROGRAM ', progprocfunc);
firstrw[8] := rwfunction;{ length: 8 }
buildrw (rwfunction, 'FUNCTION ', progprocfunc);
firstrw[9] := rwotherwise;
{ length: 9 }
buildrw (rwotherwise, 'OTHERWISE ', syotherwise);
buildrw (rwprocedure, 'PROCEDURE ', progprocfunc);
firstrw[10] := rwx; { length: 10 for table sentinel }
{ constants for lexical scan }
FOR i := ordminchar TO ordmaxchar DO BEGIN
charclass[Chr (i)] := illegal END;
FOR ch := 'a' TO 'z' DO BEGIN
{ !!! implementation-dependent! (but can be
replaced with 52 explicit assignments) }
charclass[ch] := letter;
charclass[UpCase(ch)] := letter END;
charclass['_'] := letter;
charclass['#'] := letter;
FOR ch := '0' TO '9' DO charclass[ch] := digit;
charclass[' '] := special;
charclass['$'] := special;
charclass[''''] := chapostrophe;
charclass['('] := chleftparen;
charclass[')'] := chrightparen;
charclass['*'] := special;
charclass['+'] := special;
charclass['-'] := special;
charclass['.'] := chperiod;
charclass['/'] := special;
charclass[':'] := chcolon;
charclass[';'] := chsemicolon;
charclass['<'] := chlessthan;
charclass['='] := special;
charclass['>'] := chgreaterthan;
charclass['@'] := special;
charclass['['] := special;
charclass[']'] := special;
charclass['^'] := special;
charclass['{'] := chleftbrace;
symbolclass[illegal] := othersym;
symbolclass[special] := othersym;
symbolclass[chapostrophe] := otherword;
symbolclass[chleftparen] := leftparen;
symbolclass[chrightparen] := rightparen;
symbolclass[chperiod] := period;
symbolclass[digit] := intconst;
symbolclass[chcolon] := colon;
symbolclass[chsemicolon] := semicolon;
symbolclass[chlessthan] := othersym;
symbolclass[chgreaterthan] := othersym;
symbolclass[letter] := ident;
symbolclass[chleftbrace] := comment;
END; { strucconsts }
{ writeline/writeerror/readline convert between files and lines. }
PROCEDURE writeline; { write buffer into output file }
VAR
i: outrange; { loop index }
BEGIN
WITH outline DO BEGIN
WHILE blanklns > 0 DO BEGIN
Writeln (Output);
blanklns := blanklns - 1 END;
IF len > 0 THEN BEGIN
FOR i := 1 TO len DO Write (Output, buf[i]);
Writeln (Output);
len := 0 END END;
END; { writeline }
PROCEDURE writeerror (error: errortype);
{ report error to output }
VAR
i, ix: inrange; { loop index, limit }
BEGIN
IF NOT no_error_output THEN BEGIN
writeline;
Write (Output, ' (* !!! error, ');
CASE error OF
longline: Write (Output, 'shorter line');
noendcomm: Write (Output, 'end of comment');
notquote: Write (Output, 'final "''" on line');
longword: Write (Output, 'shorter word');
notdo: Write (Output, '"do"');
notof: Write (Output, '"of"');
notend: Write (Output, '"end"');
notthen: Write (Output, '"then"');
notbegin: Write (Output, '"begin"');
notuntil: Write (Output, '"until"');
notsemicolon: Write (Output, '";"');
notcolon: Write (Output, '":"');
notparen: Write (Output, '")"');
noeof: Write (Output, 'end of file') END;
Write (Output, ' expected');
IF error >= longword THEN BEGIN
Write (Output, ', not "');
WITH inlinexx, WORD DO BEGIN
IF size > maxrwlen THEN ix := maxrwlen
ELSE ix := size;
FOR i := 1 TO ix DO Write (Output, buf[base + i]) END;
Write (Output, '"') END;
IF error = noeof THEN Write (Output, ', FORMATTING STOPS');
Writeln (Output, ' !!! *)');
END
ELSE BEGIN
Write (Con,line_number, ' (* !!! error, ');
CASE error OF
longline: Write (Con, 'shorter line');
noendcomm: Write (Con, 'end of comment');
notquote: Write (Con, 'final "''" on line');
longword: Write (Con, 'shorter word');
notdo: Write (Con, '"do"');
notof: Write (Con, '"of"');
notend: Write (Con, '"end"');
notthen: Write (Con, '"then"');
notbegin: Write (Con, '"begin"');
notuntil: Write (Con, '"until"');
notsemicolon: Write (Con, '";"');
notcolon: Write (Con, '":"');
notparen: Write (Con, '")"');
noeof: Write (Con, 'end of file') END;
Write (Con, ' expected');
IF error >= longword THEN BEGIN
Write (Con, ', not "');
WITH inlinexx, WORD DO BEGIN
IF size > maxrwlen THEN ix := maxrwlen
ELSE ix := size;
FOR i := 1 TO ix DO Write (Con, buf[base + i]) END;
Write (Con, '"') END;
IF error = noeof THEN Write (Con, ', FORMATTING STOPS');
Writeln (Con, ' !!! *)');
END;
END; { writeerror }
PROCEDURE readline; { read line into input buffer }
VAR
c: CHAR; { input character }
nonblank: BOOLEAN; { is char other than space? }
BEGIN
WITH inlinexx DO BEGIN
len := 0;
IF Eof (Input) THEN endoffile := TRUE
ELSE BEGIN { get next line }
WHILE NOT Eoln (Input) DO BEGIN
Read (Input, c);
IF c < ' ' THEN BEGIN
{ convert ASCII control chars (except leading
form feed) to spaces }
IF c = Chr (9) THEN BEGIN
{ ASCII tab char }
c := ' '; { add last space at end }
WHILE len MOD 8 <> 7 DO BEGIN
len := len + 1;
IF len < maxinlen THEN buf[len] := c END;
END { end tab handling }
ELSE IF (c <> Chr (12)) OR (len > 0) THEN c := ' ';
END; { end ASCII control char conversion }
len := len + 1;
IF len < maxinlen THEN buf[len] := c END;
Readln (Input);
line_number := line_number+1;
IF len >= maxinlen THEN BEGIN
{ input line too long }
writeerror (longline);
len := maxinlen - 1 END;
nonblank := FALSE;
REPEAT { trim line }
IF len = 0 THEN nonblank := TRUE
ELSE IF buf[len] <> ' ' THEN nonblank := TRUE
ELSE len := len - 1
UNTIL nonblank END;
len := len + 1; { add exactly ONE trailing blank }
buf[len] := ' ';
index := 0 END;
END; { readline }
{ startword/finishword/copyword convert between lines and words.
auxiliary procedures getchar/nextchar precede. }
PROCEDURE getchar; { get next char from input buffer }
BEGIN
WITH inlinexx DO BEGIN
index := index + 1;
ch := buf[index] END;
END; { getchar }
FUNCTION nextchar: CHAR; { look at next char in input buffer }
BEGIN
WITH inlinexx DO nextchar := buf[index + 1];
END; { nextchar }
PROCEDURE startword (startclass: firstclass);
{ note beginning of word, and count preceding
lines and spaces }
VAR
first: BOOLEAN; { is word the first on input line? }
BEGIN
first := FALSE;
WITH inlinexx, WORD DO BEGIN
whenfirst := startclass;
blanklncount := 0;
WHILE (index >= len) AND NOT endoffile DO BEGIN
IF len = 1 THEN blanklncount := blanklncount + 1;
IF startclass = contuncomm THEN writeline
ELSE first := TRUE;
readline; { with exactly ONE trailing blank }
getchar; { ASCII: if ch = chr (12) then begin [
ASCII form feed char ] writeline; writeln
(output, chr (12)); blanklncount := 0;
getchar end; [ end ASCII form feed
handling }
END;
spaces := 0; { count leading spaces }
IF NOT endoffile THEN BEGIN
WHILE ch = ' ' DO BEGIN
spaces := spaces + 1;
getchar END END;
IF first THEN spaces := 1;
base := index - 1 END;
END; { startword }
PROCEDURE finishword; { note end of word }
BEGIN
WITH inlinexx, WORD DO BEGIN
puncfollows := (symbol IN datawords) AND (ch <> ' ');
size := index - base - 1 END;
END; { finishword }
PROCEDURE copyword (newline: BOOLEAN;
WORD: wordtype); { copy word from input buffer into output
buffer }
VAR
i: INTEGER; { outline.len excess, loop index }
BEGIN
WITH WORD, outline DO BEGIN
i := maxoutlen - len - spaces - size;
IF newline OR (i < 0) OR ((i = 0) AND puncfollows) THEN writeline;
IF len = 0 THEN BEGIN { first word on output line }
blanklns := blanklncount;
CASE whenfirst OF { update LOCAL word.spaces }
newclause: spaces := margin;
continue: spaces := margin + contindent;
alcomm: spaces := alcommbase;
contalcomm: spaces := alcommbase + commindent;
uncomm: spaces := base;
contuncomm: ; { spaces := spaces }
stmtlabel: spaces := initmargin END;
IF spaces + size > maxoutlen THEN BEGIN
spaces := maxoutlen - size;
{ reduce spaces }
IF spaces < 0 THEN BEGIN
writeerror (longword);
size := maxoutlen;
spaces := 0 END END END;
FOR i := 1 TO spaces DO BEGIN
{ put out spaces }
len := len + 1;
buf[len] := ' ' END;
FOR i := 1 TO size DO BEGIN
{ copy actual word }
len := len + 1;
buf[len] := inlinexx.buf[base + i] END END;
END; { copyword }
{ docomment/copysymbol/insert/getsymbol/findsymbol convert between
words and symbols. }
PROCEDURE docomment; { copy aligned or unaligned comment }
PROCEDURE copycomment (commclass: firstclass;
commbase: inrange); { copy words of comment }
VAR
endcomment: BOOLEAN; { end of comment? }
BEGIN
WITH WORD DO BEGIN { copy comment begin symbol }
whenfirst := commclass;
spaces := commbase - outline.len;
copyword ((spaces < 0) OR (blanklncount > 0), WORD) END;
commclass := Succ (commclass);
WITH inlinexx DO BEGIN
REPEAT { loop for successive words }
startword (commclass);
endcomment := endoffile;
{ premature end? }
IF endcomment THEN writeerror (noendcomm)
ELSE BEGIN
REPEAT
IF ch = '*' THEN BEGIN
getchar;
IF ch = ')' THEN BEGIN
endcomment := TRUE;
getchar END END
ELSE IF ch = '}' THEN BEGIN
endcomment := TRUE;
getchar END
ELSE getchar
UNTIL (ch = ' ') OR endcomment END;
finishword;
copyword (FALSE, WORD)
UNTIL endcomment END;
END; { copycomment }
BEGIN { docomment }
IF WORD.base < commthresh THEN BEGIN
{ copy comment without alignment }
copycomment (uncomm, WORD.base) END
ELSE BEGIN { align and format comment }
copycomment (alcomm, alcommbase) END;
END; { docomment }
PROCEDURE copysymbol (symbol: symboltype;
WORD: wordtype); { copy word(s) of symbol }
BEGIN
IF symbol = comment