Do you receive the Programmer's Heaven newsletter? If not, why not subscribe?
*/
*/

View \WORDS.PAS

SPELCHEK Version 1.2 - A *FAST* spelling checker

Submitted By: WEBMASTER
Rating: Not rated (Rate It)


{$A+,B-,D-,E-,F-,G-,I-,L-,N-,O-,R-,S-,V+,X+}
{$M 6144,8192,655360}
Program Words;
{ WORDS - A word extracter program.  Copyright 1990,91 by Edwin T. Floyd. }
Uses Dos, Crt, Token, PairHeap;

Const
  WordChar = ['a'..'z','A'..'Z']; { Default WordSet }
  DefaultOutput = '';             { Default output filename (''=stdout) }
  BufSize = 4096;                 { I/O buffer size }

Type
  SetOpType = (Union, Intersection, Complement);
  SetOfChar = Set Of Char;
  SortEntryType = Object(HeapEntry)
  { Data structure used for sorting }
    Token : Word;
  End;
  SortHeapType = Object(Heap)
  { PairHeap compare function override }
    Function Less(Var x, y : HeapEntry) : Boolean; Virtual;
  End;
  FileEntryPtr = ^FileEntry;
  FileEntry = Record
  { Input file name list entry }
    NextFile : FileEntryPtr;
    FileName : PathStr;
  End;

Const
  FileList : FileEntryPtr = Nil;       { File name list (head) }
  LastFile : FileEntryPtr = Nil;       { File name list (tail) }
  HashTab : PToken = Nil;              { Hash table pointer }
  TestTab : PToken = Nil;              { Test hash table pointer }
  WordCount : LongInt = 0;             { Total number of words examined }
  ReturnCode : Word = 0;               { Return code for Halt }
  WordSet : SetOfChar = WordChar;      { Words are made of these }
  SetOp : SetOpType = Union;           { Set operation }
  Alphabetize : Boolean = False;       { If true, sort output words }
  LowerCase : Boolean = False;         { If true, case is significant }
  HighOrder : Boolean = False;         { If true, clear high-order bits }
  SuppressOutput : Boolean = False;    { If true, do not write output file }
  OutOfMemory : Boolean = False;       { Set true by HandleHeapError }
  Aborted : Boolean = False;           { True if operator aborted }
  OutName : PathStr = DefaultOutput;   { Output file name }

Var
  OldMem : LongInt;                    { Original value of MemAvail }
  SortHeap : SortHeapType;             { Sorter object }
  TextFile : File;                     { Input/Output file }
  TextBuf : Array[1..BufSize] Of Char; { I/O buffer for TextFile }

{$S+}

Function ProcessParameter(s : String) : Boolean; Forward;

Function ParseParamString(s : String) : Boolean;
{ Extract parameters from a string and process them; return True if all OK. }
Var
  i, j : Word;
  ParamsOk : Boolean;
Begin
  ParamsOk := True;
  While (s <> '') And (s[Length(s)] = ' ') Do Dec(s[0]);
  While s <> '' Do Begin
    i := 1;
    While (i <= Length(s)) And (s[i] = ' ') Do Inc(i);
    j := Succ(i);
    While (j <= Length(s)) And (s[j] <> ' ') Do Inc(j);
    If Not ProcessParameter(Copy(s, i, j - i)) Then ParamsOk := False;
    Delete(s, 1, Pred(j));
  End;
  ParseParamString := ParamsOk;
End;

Function ProcessParameter(s : String) : Boolean;
{ Process command line parameter or file name; return True if OK. }
Var
  ThisFile : FileEntryPtr;
  IncludeFile : Text;
  ParamOk : Boolean;
  i, j : Word;
  IoRes : Integer;

  Procedure GetFiles(Var s : String);
  Var
    Path : PathStr;
    Dir : DirStr;
    Name : NameStr;
    Ext : ExtStr;
    Search : SearchRec;
  Begin
    Path := FExpand(s);
    FSplit(Path, Dir, Name, Ext);
    FindFirst(Path, Archive, Search);
    If DosError <> 0 Then Begin
      WriteLn('No files match ', s);
      ParamOk := False;
    End;
    While DosError = 0 Do Begin
      Path := Dir + Search.Name;
      ThisFile := FileList;
      While (ThisFile <> Nil) And (ThisFile^.FileName <> Path) Do
        ThisFile := ThisFile^.NextFile;
      If ThisFile = Nil Then Begin
        New(ThisFile);
        If ThisFile <> Nil Then Begin
          With ThisFile^ Do Begin
            NextFile := Nil;
            FileName := Path;
          End;
          If LastFile = Nil Then FileList := ThisFile
          Else LastFile^.NextFile := ThisFile;
          LastFile := ThisFile;
        End;
      End Else WriteLn('Already in list: ', Path);
      FindNext(Search);
    End;
  End;

Begin
  ParamOk := True;
  If (s[1] = '-') Or (s[1] = '/') Then Case UpCase(s[2]) Of
    'U' : SetOp := Union;
    'I' : SetOp := Intersection;
    'C' : SetOp := Complement;
    'A' : If s[3] = '-' Then Alphabetize := False Else Alphabetize := True;
    'L' : If s[3] = '-' Then LowerCase := False Else LowerCase := True;
    'H' : If s[3] = '-' Then HighOrder := False Else HighOrder := True;
    'O' : Begin
      Delete(s, 1, 2);
      For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
      If (s <> '') And ((s[1] = '-') Or (s = 'NUL')) Then Begin
        SuppressOutput := True;
        OutName := '-';
      End Else Begin
        SuppressOutput := False;
        If s = '' Then OutName := s Else OutName := FExpand(s);
      End;
    End;
    'W' : Begin
      Delete(s, 1, 2);
      Case s[1] Of
        '+' : ;
        '-' : WordSet := [];
        Else Begin
          WriteLn('WordSet (-W) option must be followed by + or -.');
          ParamOk := False;
        End;
      End;
      Delete(s, 1, 1);
      For i := 1 To Length(s) Do
        WordSet := WordSet + [s[i]];
    End;
    Else Begin
      WriteLn('Unrecognized option: ', s);
      ParamOk := False;
    End;
  End Else If s[1] = '@' Then Begin
    Delete(s, 1, 1);
    For i := 1 To Length(s) Do s[i] := UpCase(s[i]);
    Assign(IncludeFile, s);
    Reset(IncludeFile);
    IoRes := IoResult;
    If IoRes = 0 Then Begin
      WriteLn('Processing include file ', s);
      Repeat
        ReadLn(IncludeFile, s);
        IoRes := IoResult;
        If IoRes = 0 Then If Not ParseParamString(s) Then ParamOk := False;
      Until Eof(IncludeFile) Or (IoRes <> 0);
      If IoRes <> 0 Then Begin
        WriteLn('Error ', IoRes, ' reading include file');
        ParamOk := False;
      End;
      Close(IncludeFile);
      IoRes := IoResult;
    End Else Begin
      WriteLn('Error ', IoRes, ' opening include file ', s);
      ParamOk := False;
    End;
  End Else GetFiles(s);
  ProcessParameter := ParamOk;
End;

Procedure ParseParams;
{ Interpret environment and command line parameters; display Help info. }
Var
  i, j : Word;
  ParamsOk : Boolean;
  Ch : Char;
  s : String;
Begin
  WriteLn('WORDS v1.2 - A word extractor program.  Copyright (c) 1990,91 by Edwin T. Floyd.');
  ParamsOk := True;
  If Not ParseParamString(GetEnv('WORDS')) Then Begin
    WriteLn('Error found in SET WORDS=.. environment string');
    ParamsOk := False;
  End;
  For i := 1 To ParamCount Do Begin
    FillChar(s[1], 255, ' ');
    s := ParamStr(i);
    If Not ProcessParameter(s) Then ParamsOk := False;
  End;
  If Not ParamsOk Then Begin
    WriteLn('At least one parameter was in error.  Run WORDS with no parameters');
    WriteLn('to see documentation.');
    Halt(1);
  End Else If FileList = Nil Then Begin
    WriteLn;
    WriteLn('  WORDS filenames.. [-U/-I/-C] [-A] [-L] [-H] [-W[+/-]abc..] [-Oname] [@name]' );
    WriteLn;
    WriteLn('All command line parameters are separated by spaces.  Input text filenames');
    WriteLn('and options may be intermixed; options are distinguished by a leading hyphen:');
    WriteLn;
    WriteLn('  -U, -I or -C specifies the set operation to be performed on the extracted');
    WriteLn('  words from the files.  The operations are:');
    WriteLn('    -U Union:        Keep all unique words from any input file (default);');
    WriteLn('    -I Intersection: Keep unique words common to all files;');
    WriteLn('    -C Complement:   Keep unique words from second and subsequent files only');
    WriteLn('                     if they are not contained in the first file.');
    WriteLn('  -A[-] Sort output words alphabetically (default off).');
    WriteLn('  -H[-] Clear high-order bits on input file (i.e. WordStar, default off).');
    WriteLn('  -L[-] Lower case is significant (default off).');
    WriteLn('  -W-abc.. Replace the word character set with the indicated characters');
    WriteLn('     (default is all alphabetic characters, upper and lower case).');
    WriteLn('  -W+abc.. Add additional characters to the word character set.');
    WriteLn('  -O[name] Name the output file (default is name omitted => stdout).');
    WriteLn('  -O- Suppress output (counts are still displayed on screen).');
    WriteLn;
    WriteLn('The "@" prefixes the name of an ASCII include file which may contain');
    Write('filenames, options, and nested include files, in any order.    ');
    Ch := ReadKey;
    WriteLn;
    WriteLn;
    WriteLn('You may use the DOS "SET" command to specify default parameters.  Examples:');
    WriteLn;
    WriteLn('  SET WORDS=-U -A+ -L+ -Owords.out -W-ABCDEFGHIJKLMNOPQRSTUVWXYZ');
    WriteLn('  SET WORDS=@defaults.wrd -O');
    WriteLn;
    WriteLn('Command line parameters override "SET" parameters.  WORDS examples:');
    WriteLn;
    WriteLn('  WORDS oldwords.lst document.txt -W+-'' -C -Onewwords.lst');
    WriteLn('  WORDS @filename.lst -I -Oallwords.txt');
    WriteLn('  WORDS file1.txt -A+ -U -L- -O | nextprog');
    WriteLn;
    WriteLn('WORDS was written by:');
    WriteLn;
    WriteLn('  Edwin T. Floyd         [76067,747]  (CompuServe)');
    WriteLn('  #9 Adams Park Court    404/576-3305 (work)');
    WriteLn('  Columbus, GA 31909     404/322-0076 (home)');
    Halt(0);
  End Else Begin
    Case SetOp Of
      Union : s := '-U';
      Intersection : s := '-I';
      Complement : s := '-C';
    End;
    If Alphabetize Then ch := '+' Else ch := '-';
    s := s + ' -A' + ch;
    If LowerCase Then ch := '+' Else ch := '-';
    s := s + ' -L' + ch;
    If HighOrder Then ch := '+' Else ch := '-';
    s := s + ' -H' + ch;
    OldMem := MemAvail;
    WriteLn('Options: ', s, ' -O', OutName, ', ',
      OldMem Shr 10, 'k free.');
    WriteLn('Press <Esc> to stop.');
  End;
End;

{$S-}

Function SortHeapType.Less(Var x, y : HeapEntry) : Boolean;
{ Sort compare function override }
Var
  xx : SortEntryType Absolute x;
  yy : SortEntryType Absolute y;
Begin
  Less := HashTab^.TokenAddress(xx.Token)^ < HashTab^.TokenAddress(yy.Token)^;
End;

Function ParseInputBlock(Len : Word) : Word;
{ Insert words from input block into hash table }
Var
  Words : Word;
  t : TokenString;
  i, Toss : Word;
Begin
  i := 1;
  Words := 0;
  While i <= Len Do Begin
    t := '';
    While (i <= Len) And Not (TextBuf[i] In WordSet) Do Inc(i);
    If i <= Len Then Begin
      While (i <= Len) And (Length(t) < TokenStringSize)
      And (TextBuf[i] In WordSet) Do Begin
        Inc(t[0]);
        If LowerCase Then t[Ord(t[0])] := TextBuf[i]
        Else t[Ord(t[0])] := UpCase(TextBuf[i]);
        Inc(i);
      End;
      Inc(Words);
      Case SetOp Of
        Union : Toss := HashTab^.TokenInsertText(t);
        Intersection : If (TestTab <> Nil) And (TestTab^.TextToken(t) <> 0) Then
          Toss := HashTab^.TokenInsertText(t);
        Complement : If (TestTab <> Nil) And (TestTab^.TextToken(t) = 0) Then
          Toss := HashTab^.TokenInsertText(t);
      End;
    End;
  End;
  ParseInputBlock := Words;
End;

Procedure ProcessNextFile;
{ Open and process the next input file pointed to by FileList. }
Var
  ThisFile : FileEntryPtr;
  TempTab : PToken;
  FileWords : LongInt;
  i, MaxLen, Len : Word;
  FileResult : Integer;
Begin
  ThisFile := FileList;
  With ThisFile^ Do Begin
    Write(FileName, ': ');
    Assign(TextFile, FileName);
    Reset(TextFile, 1);
    FileResult := IoResult;
    If FileResult = 0 Then Begin
      If HashTab = Nil Then New(HashTab, Init);
      Len := 0;
      FileWords := 0;
      Repeat
        BlockRead(TextFile, TextBuf[Succ(Len)], BufSize-Len, i);
        FileResult := IoResult;
        If FileResult = 0 Then Begin
          MaxLen := Len + i;
          If HighOrder Then For i := Succ(Len) To MaxLen Do
            TextBuf[i] := Chr(Ord(TextBuf[i]) And $7F);
          Len := MaxLen;
          If Not Eof(TextFile) Then Begin
            While (Len > 0) And (TextBuf[Len] In WordSet) Do Dec(Len);
            If (Len = 0) Then Len := MaxLen;
          End;
          FileWords := FileWords + ParseInputBlock(Len);
          MaxLen := MaxLen - Len;
          If MaxLen > 0 Then
            Move(TextBuf[Succ(Len)], TextBuf[1], MaxLen);
          Len := MaxLen;
          Write(^M, FileName, ': ', FileWords, ' words, ',
            HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
          While KeyPressed Do If ReadKey = ^[ Then Aborted := True;
        End;
      Until Eof(TextFile) Or (FileResult <> 0) Or OutOfMemory Or Aborted;
      Close(TextFile);
      WriteLn(^M, FileName, ': ', FileWords, ' words, ',
        HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k');
      WordCount := WordCount + FileWords;
    End Else WriteLn('Unable to open input file ', FileName);
    If FileResult <> 0 Then Begin
      WriteLn('Error ', FileResult);
      Inc(ReturnCode);
    End;
    FileList := NextFile;
    If SetOp = Intersection Then Begin
      TempTab := TestTab;
      TestTab := HashTab;
      HashTab := TempTab;
      If HashTab <> Nil Then Begin
        Dispose(HashTab, Done);
        HashTab := Nil;
      End;
    End;
  End;
  Dispose(ThisFile);
End;

Procedure ProcessFirstFile;
{ Process the first input file. }
Var
  TempTab : PToken;
  Op : SetOpType;
Begin
  Op := SetOp;
  SetOp := Union;
  ProcessNextFile;
  SetOp := Op;
  If SetOp In [Intersection, Complement] Then Begin
    TempTab := TestTab;
    TestTab := HashTab;
    HashTab := TempTab;
  End;
End;

Procedure SortWords;
{ Write words to output file, optionally sorted. }
Var
  SortEntry : ^SortEntryType;
  FileResult : Integer;
  i : Word;
  OutFile : Text;
Begin
  If SuppressOutput Then WriteLn('Output suppressed') Else Begin
    Assign(OutFile, OutName);
    SetTextBuf(OutFile, TextBuf);
    ReWrite(OutFile);
    FileResult := IoResult;
    If FileResult = 0 Then Begin
      If Alphabetize Then With SortHeap Do Begin
        Init;
        For i := 1 To HashTab^.TokMaxToken Do Begin
          New(SortEntry);
          If SortEntry <> Nil Then Begin
            SortEntry^.Token := i;
            Insert(SortEntry^);
          End;
        End;
        If OutOfMemory Then Begin
          WriteLn('Sort suppressed due to insufficient memory');
          Alphabetize := False;
          Inc(ReturnCode);
        End;
      End;
      If Alphabetize Then With SortHeap Do Begin
        Write('Sorting and writing ', EntryCount, ' words to ');
        If OutName = '' Then Write('<stdout>') Else Write(OutName);
        WriteLn(', ', (OldMem-MemAvail) Shr 10, 'k');
        For i := 1 To EntryCount Do Begin
          SortEntry := DeleteLowEntry;
          If FileResult = 0 Then Begin
            WriteLn(OutFile, HashTab^.TokenAddress(SortEntry^.Token)^);
            FileResult := IoResult;
          End;
        End;
      End Else Begin
        Write('Writing ', HashTab^.TokMaxToken, ' words to ');
        If OutName = '' Then WriteLn('<stdout>') Else WriteLn(OutName);
        For i := 1 To HashTab^.TokMaxToken Do If FileResult = 0 Then Begin
          WriteLn(OutFile, HashTab^.TokenAddress(i)^);
          FileResult := IoResult
        End;
      End;
      If FileResult <> 0 Then Begin
        WriteLn('Error ', FileResult, ' writing file ', OutName);
        Inc(ReturnCode);
      End;
      Close(OutFile);
      FileResult := IoResult;
      If FileResult <> 0 Then Begin
        WriteLn('Error ', FileResult, ' closing file ', OutName);
        Inc(ReturnCode);
      End;
    End Else WriteLn('Error ', FileResult, ' opening file ', OutName);
  End;
End;

{$F+}
Function HandleHeapError(Size : Word) : Integer;
Begin
  If Size > 0 Then Begin
    HandleHeapError := 1;
    OutOfMemory := True;
  End;
End;
{$F-}

Begin
  FileMode := $40;
  HeapError := @HandleHeapError;
  OldMem := MemAvail;
  ParseParams;
  ProcessFirstFile;
  While (FileList <> Nil) And Not (OutOfMemory Or Aborted) Do ProcessNextFile;
  If OutOfMemory Then Begin
    WriteLn('Input file processing terminated due to insufficient memory');
    WriteLn('Words collected so far will be written to output file');
    Inc(ReturnCode);
  End;
  If Aborted Then Begin
    WriteLn('File processing aborted by operator');
    SuppressOutput := True;
    Inc(ReturnCode);
  End;
  If SetOp = Intersection Then Begin
    HashTab := TestTab;
    TestTab := Nil;
  End Else If Alphabetize And Not SuppressOutput Then Begin
    WriteLn('Maximizing free memory for sort');
    If TestTab <> Nil Then Dispose(TestTab, Done);
    TestTab := Nil;
  End;
  WriteLn('Final Counts: ', WordCount, ' words examined, ',
    HashTab^.TokMaxToken, ' kept, ', (OldMem-MemAvail) Shr 10, 'k in use');
  OutOfMemory := False;
  SortWords;
  WriteLn('Done!');
  Halt(ReturnCode);
End.

corner
© 1996-2008 CommunityHeaven LLC. All rights reserved. Reproduction in whole or in part, in any form or medium without express written permission is prohibited.
Violators of this policy may be subject to legal action. Please read our Terms Of Use and Privacy Statement for more information.
North American business development: Nicolai Wadstrom. Publisher: Lars Hagelin.
Resource Listings