*/
Do you receive the Programmer's Heaven newsletter? If not, why not subscribe?
*/

View \LZW.PAS

Lempel-Ziv compression in Pascal

Submitted By: WEBMASTER
Rating: (Not rated) (Rate It)


=================================== Cut Here ==================================
PROGRAM LZcompress;
{$D+ }

(*
Lempel-Ziv compression.  Mostly based on Tom Pfau's assembly language
code.

The contents of this file are hereby released to the public domain.

                                    -- Rahul Dhesi  1986/12/31
*)

(*
Converted from C to Pascal August 1988, by Tom Cattrall. The Pascal
version is of course in the public domain as well.

This is as much generic Pascal as I could make it. As a result, it
runs slower because I didn't do any nonstandard bit twiddling.

You may have to adjust the I/O procedures to suit your system.
This code is set up as a filter, it reads from standard input and
writes to standard output. Many will want to add O.S. specific file
manipulation to it.

*)



LABEL
        1;

CONST

  maxBits      =  13;              { max nb code bits }
  maxMax       =  8192;            { max Code + 1 }
  maxMax10     =  8202;            { maxMax + 10 }

  clearCode    =  256;             { code to clear table }
  eofCode      =  257;             { marks end of compressed data}
  firstFreeCode=  258;             { first free code }
 
  checkGap     =  4000;            (* interval at which to check ratio *)

  bufferSize   = 512;              (* size of buffer *)

  inName       = 'testIn';
  outName      = 'testOut';



TYPE

 aTableIndex = 0..maxMax10;

 aTableEntry =  RECORD
                   first,
                   next    : -1..maxMax10;
                   zCh     : 0..255;
                 END;
 
 aStatus = (found, nextUse, firstUse);

 aBuffer = PACKED ARRAY [ 1..bufferSize] OF CHAR;


VAR

  freeCode    : 0..maxMax;
  nBits       : INTEGER;
  maxCode     : INTEGER;
  bitsOut     : INTEGER;
  bitInterval : INTEGER;
 
  bytesIn,
  ratio       : INTEGER;
  ratFlag     : BOOLEAN;

  inOffset    : INTEGER;
  bitOffset   : INTEGER;
  inputLine   : string[255];


  inFile       : TEXT;
  outFile      : FILE OF aBuffer;
  outPtr       : INTEGER;

  outByte1,
  outByte2,
  outByte3     : INTEGER;

  table  : ARRAY[aTableIndex] OF aTableEntry;

   nextCh,
   prefixCode,
   k             : INTEGER;
   where         : INTEGER;

   status        : aStatus;



PROCEDURE  PutByte(b : INTEGER);

BEGIN
   outFile^[outPtr]:= CHR(b);
   IF outPtr >= bufferSize THEN
      BEGIN
        Put(outFile);
        outPtr:= 1;
      END
   ELSE
     outPtr:= outPtr + 1;
END;


PROCEDURE OpenFiles;

BEGIN
  Reset( inFile, inName);
  ReadLn(inFile, inputLine);
  inOffset:= 0;

  Rewrite( outFile, outName);
  outPtr:= 1;
END;


PROCEDURE CloseFiles;

BEGIN
  Close ( inFile );
  PutByte(outByte1);
  PutByte(outByte2);
  IF outPtr > 1 THEN
    Put(outFile);
  Close ( outFile, lock);
END;


PROCEDURE WrCode (code : INTEGER);
VAR
   t,
   i,
   ofsInbyte,
   hiBits,
   byteOffset    : INTEGER;

BEGIN
   bitsOut := bitsOut + nbits;                (* total number of bits written *)
   bitInterval := bitInterval - nbits;
   IF (bitInterval < 0) THEN
      ratFlag := TRUE;                  (* time to check ratio *)

   byteOffset := bitOffset DIV 8;
   ofsInbyte := bitOffset MOD 8;           (* offset within byte *)
   bitOffset := bitOffset + nbits;         (* allowing for new code *)

   IF (ofsInbyte = 0) THEN
      outByte1  := code MOD 256
   ELSE
     BEGIN
       t:= code;
       FOR i:= 1 TO ofsInByte DO
         t:= t + t;
       outByte1 := outByte1 + t MOD 256;
     END;

   hiBits := code;
   FOR i:= 1 TO 8 - ofsInByte DO
     hiBits:= hiBits DIV 2;
   outByte2 := hiBits MOD 256;
   outByte3 := hiBits DIV 256;

   WHILE bitOffset >= 8 DO
     BEGIN
       PutByte(outByte1);
       outByte1:= outByte2;
       outByte2:= outByte3;
       outByte3:= 0;
       bitOffset:= bitOffset - 8;
     END;

END

PROCEDURE InitCtab;
VAR
    i : INTEGER;
   
BEGIN
   bytesIn := 0;
   bitsOut := 0;
   ratio := 0;
   ratFlag := FALSE;
   bitInterval := checkGap;
   nbits := 9;
   maxCode := 512;
   FOR i := 0 TO maxMax + 1 DO
     BEGIN
      table[i].zCh := 0;
      table[i].first := -1;
      table[i].next := -1;
     END;
   freeCode := firstFreeCode;
END;

FUNCTION  RdCh(VAR c : INTEGER) : BOOLEAN;
VAR
    count  : INTEGER;

BEGIN
   bytesIn:= bytesIn + 1;
   RdCh:= TRUE;
   IF (inOffset > length(inputLine) ) THEN
     BEGIN
      IF eof(inFile) THEN
        RdCh:= FALSE
      ELSE
        BEGIN
          ReadLn(inFile, inputLine);
          inOffset:= 1;
          if length(inputLine) > 0 THEN
            c:= ORD( inputLine[inOffset] )
          ELSE
            c:= 10;
        END;
     END
   ELSE
     BEGIN
       inOffset:= inOffset + 1;
       IF inOffset > length(inputLine) THEN
         c:= 10
       else
         c:= ORD( inputLine[inOffset] );
     END;
END;

PROCEDURE  CheckRatio;

BEGIN
   bitInterval := CHECKGAP;
   bitsOut := 0;
   bytesIn := 0;
   ratFlag := FALSE;
END;

PROCEDURE AddCode (status: aStatus;
                   ch, index : INTEGER);
VAR
      t   : INTEGER;

BEGIN
   IF freeCode >= maxMax THEN
     t:= -1
   ELSE
     t:= freeCode;
   IF (status = nextUse) THEN
      table[index].next := t
   else                 (* else must be firstUse *)
      table[index].first := t;

   IF (freeCode <= maxMax) THEN
     BEGIN
      table[freeCode].first := -1;
      table[freeCode].next := -1;
      table[freeCode].zCh := ch MOD 256;
      freeCode:= freeCode + 1;
     END;
END;

FUNCTION LookupCode (index, ch : INTEGER;
                    VAR where : INTEGER) : aStatus;

(* index     where to start looking *)
(* ch        char to look for *)
(* where     last entry looked at *)
VAR
    t  : aStatus;

BEGIN
   where := index;
   index := table[index].first;
   t:= firstUse;
   IF (index <> -1) THEN
     BEGIN
      WHILE t = firstUse DO
        BEGIN
         IF ((table[index].zCh MOD 256) = (ch MOD 256)) THEN
           BEGIN
             where := index;
             t:= found;
           END
         ELSE
           BEGIN
             where := index;
             index := table[index].next;
             IF (index = -1) THEN
                t:= nextUse;
           END;
        END; (* end WHILE *)
     END(* end IF *)
  LookupCode:= t;
END;



BEGIN  { LZcompress }

   OpenFiles;

   bitOffset := 0;

   outByte1:= 0;
   outByte2:= 0;
   outByte3:= 0;


   InitCtab;
   WrCode(clearCode);
   IF NOT RdCh(nextCh) THEN
     BEGIN
       WrCode (eofCode);
       GOTO 1;                   (* normal return from compress *)
     END;

   (* compression loop begins here with nextCh holding the next input char *)

   WHILE TRUE DO
     BEGIN
       IF ratFlag THEN
          CheckRatio;
       nextCh := nextCh MOD 256;                       (* turn character to code *)
       REPEAT
         prefixCode := nextCh;
         IF NOT RdCh(nextCh) THEN
           BEGIN
             WrCode (prefixCode);
             WrCode (eofCode);
             GOTO 1;                (* normal return from compress *)
           END;
         nextCh := nextCh MOD 256;                        (* force to 8 bits *)
     
         k := nextCh;
         status := LookupCode (prefixCode, nextCh, where);
         IF (status = FOUND) THEN
            nextCh := where;                     (* where found *)
       UNTIL status <> found;
   
   
       (* reach here with status := firstUse or nextUse *)
       AddCode (status, nextCh, where);
   
       WrCode (prefixCode);
       nextCh := k;
   
       IF (freeCode > maxCode) THEN
         BEGIN
           IF (nbits >= MAXBITS) THEN
             BEGIN   (* To continue using table after it is full, remove next two lines *)
               WrCode (clearCode);
               InitCtab;
             END
           ELSE
             BEGIN
               nbits := nbits + 1;
               maxCode := maxCode + maxCode;
             END;
         END{ IF }
     END{ WHILE }
1:  CloseFiles;
END.
=============================== Cut here ======================================
program LZdeCompress;
{$D+}

(*
Lempel-Ziv decompression.  Mostly based on Tom Pfau's assembly language
code.  The contents of this file are hereby released to the public domain.
                                 -- Rahul Dhesi 1986/11/14
*)

(*
Converted from C to Pascal August 1988, by Tom Cattrall. The Pascal
version is of course in the public domain as well.

This is as much generic Pascal as I could make it. As a result, it
runs slower because I didn't do any nonstandard bit twiddling.

You may have to adjust the I/O procedures to suit your system.
This code is set up as a filter, it reads from standard input and
writes to standard output. Many will want to add O.S. specific file
manipulation to it.

*)


LABEL
         1;

CONST

  stackSize    =  4000;       (* size of decode stack  *)
  maxBits      =  13;              { max nb code bits }
  maxMax       =  8192;            { max Code + 1 }
  maxMax10     =  8202;            { maxMax + 10 }

  clearCode    =  256;             { code to clear table }
  eofCode      =  257;             { marks end of compressed data}
  firstFreeCode=  258;             { first free code }

  inSize       = 512;              (* size of input buffer *)
  outSize      = 519;              (* size of output buffer *)

  inName       = 'testOut';
  outName      = 'testOutD';

 

TYPE

     aTableIndex = 0..maxMax10;

     aTableEntry = RECORD
                     next  : INTEGER;
                     zCh   : INTEGER;
                   END;

     anInputBuffer = PACKED ARRAY [1..inSize] OF 0..255;



VAR


   i,
   t    : INTEGER;

   table : ARRAY[aTableIndex] OF aTableEntry;

   curCode,
   oldCode,
   inCode      : INTEGER;

   freeCode,
   nbits,
   maxCode     : INTEGER;

   finChar,
   k           : INTEGER;

   inByte1,
   inByte2,
   inByte3      : INTEGER;

   bitOffset,
   outputOffset : INTEGER;

   stackPointer : 0..stackSize;
   stack        : ARRAY[0..stackSize] OF INTEGER;

   inFile       : FILE OF anInputBuffer;
   outFile      : TEXT;

   inPtr        : INTEGER;
   inBuffer     : anInputBuffer;

   mask         : ARRAY[1..maxBits] OF INTEGER;

PROCEDURE OpenFiles;

BEGIN
  Reset(inFile, inName);
  inPtr:= inSize + 1;
  Rewrite(outFile, outName);
END;


PROCEDURE CloseFiles;

BEGIN
  Close(inFile);
  Close(outFile, LOCK);
END;


PROCEDURE Push(x : INTEGER);

BEGIN
  stack[stackPointer]:= x;
  IF stackPointer <= stackSize THEN
    stackPointer:= stackPointer + 1
  ELSE
    BEGIN
      WriteLn('Stack Overflow');
      GOTO 1;
    END;
END;



FUNCTION Pop : INTEGER;

BEGIN
  IF stackPointer >= 1 THEN
    BEGIN
      stackPointer:= stackPointer - 1;
      Pop:= stack[stackPointer];
    END
  ELSE
    BEGIN
      WriteLn('Stack Underflow');
      GOTO 1;
    END;
END;


FUNCTION GetByte : INTEGER;

BEGIN
  IF inPtr > inSize THEN
    IF NOT EOF(inFile) THEN
      BEGIN
        READ(inFile, inBuffer);
        inPtr:= 1;
      END;
  IF inPtr <= inSize THEN
    BEGIN
      GetByte:= inBuffer[inPtr];
      inPtr:= inPtr + 1;
    END
  ELSE
    GetByte:= -1;
END;



(* RdDecode reads a code from the input (compressed) file and returns its value. *)


FUNCTION RdDecode : INTEGER;
VAR
   t,
   i,
   hiBits,
   byteOffset  : INTEGER;
   ofsInbyte   : INTEGER;               (* offset within byte *)

BEGIN
   WHILE bitOffset >= 8 DO
     BEGIN
       inByte1:= inByte2;
       inByte2:= inByte3;
       inByte3:= GetByte;
       bitOffset:= bitOffset - 8;
     END;

   ofsInbyte := bitOffset MOD 8;
   byteOffset := bitOffset DIV 8;
   bitOffset := bitOffset + nbits;

   IF inByte1 < 0 THEN
     BEGIN
       WriteLn('Unexpected EOF on input, corrupt compressed file');
       GOTO 1;
     END;

   t:= inByte1;
   FOR i:= 1 TO ofsInByte DO
     t:= t DIV 2;
   hiBits:= inByte2 + 256 * inByte3;
   FOR i:= 1 TO 8 - ofsInByte DO
     hiBits:= hiBits + hiBits;
   t:= (hiBits AND mask[nBits]) + t;
   RdDecode:= t;
END; (* RdDecode *)

PROCEDURE InitDTab;

BEGIN
   nbits := 9;
   maxCode := 512;
   freeCode := firstFreeCode;
END;

PROCEDURE WrDChar (ch : INTEGER);

BEGIN
  IF ch <> 10 THEN
    Write(outFile, CHR(ch))
  ELSE
    WriteLn(outFile);
END(* WrDChar *)



(* adds a code to table *)
PROCEDURE AddDcode;

BEGIN
   table[freeCode].zCh := k;                (* save suffix char *)
   table[freeCode].next := oldCode;         (* save prefix code *)
   freeCode:= freeCode + 1;
   IF (freeCode >= maxCode) THEN
     BEGIN
      IF (nbits < MAXBITS) THEN
        BEGIN
         nbits:= nbits + 1;
         maxCode := maxCode + maxCode;
        END;
     END;
END;


BEGIN   { LZdCompress }
   nbits := 9;
   maxCode := 512;
   freeCode := firstFreeCode;
   stackPointer := 0;
   bitOffset := 24;
   outputOffset := 0;

   t:= 1;
   FOR i:= 1 TO maxBits DO  (* Prepare bit mask array *)
     BEGIN
       mask[i]:= t;
       t:= t + t + 1;
     END;
   
   OpenFiles;

   InitDTab;             (* initialize table *)

   WHILE TRUE DO
     BEGIN
       curCode := RdDecode;
       IF (curCode = eofCode) THEN
         GOTO 1;
   
       IF (curCode = clearCode) THEN
         BEGIN
           InitDTab;
           curCode := RdDecode;
           finChar := curCode;
           k := curCode;
           oldCode := curCode;
           WrDChar(k);
         END
       ELSE
         BEGIN
           inCode := curCode;
           IF (curCode >= freeCode) THEN     (* IF code not in table (k<w>k<w>k) *)
            BEGIN
              curCode := oldCode;             (* previous code becomes current *)
              Push(finChar);
            END;
       
           WHILE (curCode > 255) DO              (* IF code, not character *)
            BEGIN
              Push(table[curCode].zCh);         (* push suffix char *)
              curCode := table[curCode].next;    (* <w> := <w>.code *)
            END;
       
           finChar := curCode;
           k := finChar;
           Push(k);
           WHILE (stackPointer <> 0) DO
             WrDChar(Pop);
           AddDCode;
           oldCode := inCode;
         END{ ELSE }
       
     END;   { WHILE }

1 : CloseFiles;
END.
====================================== Cut Here ===============================
As an aid to getting this running on your system, here are some dumps of
a file before and after compression. If you can't get a file to return
to its original form after doing a compress followed by an uncompress, it
often isn'
t clear which half to blame. These dumps should help you place
the blame. The text consists of the first 10 lines of this file. Linefeeds
separate the lines.

============== Input file (uncompressed) 258 bytes long ===========

00000000 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d
00000020 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d
00000040 3d3d 3d20 4375 7420 4865 7265 203d 3d3d
00000060 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d
00000100 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d3d 3d0a
00000120 5052 4f47 5241 4d20 4c5a 636f 6d70 7265
00000140 7373 3b0a 7b24 442b 207d 0a0a 282a 0a4c
00000160 656d 7065 6c2d 5a69 7620 636f 6d70 7265
00000200 7373 696f 6e2e 2020 4d6f 7374 6c79 2062
00000220 6173 6564 206f 6e20 546f 6d20 5066 6175
00000240 2773 2061 7373 656d 626c 7920 6c61 6e67
00000260 7561 6765 0a63 6f64 652e 200a 0a54 6865
00000300 2063 6f6e 7465 6e74 7320 6f66 2074 6869
00000320 7320 6669 6c65 2061 7265 2068 6572 6562
00000340 7920 7265 6c65 6173 6564 2074 6f20 7468
00000360 6520 7075 626c 6963 2064 6f6d 6169 6e2e
00000400 0a0a
============== Output file (compressed)  38 bytes long ==========

00000000 494f 5245 5355 4c54 2045 5252 4f52 0a61
00000020 7420 6c6f 6361 7469 6f6e 2030 7830 3030
00000040 3030 3131 450a

corner
© 1996-2008 CommunityHeaven LLC. All rights reserved. Reproduction in whole or in part, in any form or medium without express written permission is prohibited.
Violators of this policy may be subject to legal action. Please read our Terms Of Use and Privacy Statement for more information.
North American business development: Nicolai Wadstrom. Publisher: Lars Hagelin.