*/
If you have a PH account, you can customize your PH profile.
*/

View \LZH.PAS

This Pascal unit allows the user to commpress data using a

Submitted By: WEBMASTER
Rating: starstarstarstarstar (Rate It)


unit lzh;
{$A+,B-,D+,E-,F-,I+,L+,N-,O+,R-,S-,V-}
(*
 * LZHUF.C English version 1.0
 * Based on Japanese version 29-NOV-1988
 * LZSS coded by Haruhiko OKUMURA
 * Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
 * Edited and translated to English by Kenji RIKITAKE
 * Translated from C to Turbo Pascal by Douglas Webb   2/18/91
 *)


{
     This unit allows the user to commpress data using a combination of
   LZSS compression and adaptive Huffman coding, or conversely to decompress
   data that was previously compressed by this unit.

     There are a number of options as to where the data being compressed/
   decompressed is coming from/going to.

    In fact it requires that you pass the "LZHPack" procedure 2 procedural
  parameter of type 'GetProcType' and 'PutProcType' (declared below) which
  will accept 3 parameters and act in every way like a 'BlockRead'/'BlockWrite'
  procedure call. Your 'GetProcType' procedure should return the data
  to be compressed, and Your 'PutProcType' procedure should do something with
  the compressed data (ie., put it in a file).  In case you need to know (and
  you do if you want to decompress this data again) the number of bytes in the
  compressed data (original, not compressed size) is returned in 'Bytes_Written'.

  GetBytesProc = PROCEDURE(VAR DTA; NBytes:WORD; VAR Bytes_Got : WORD);
 
  DTA is the start of a memory location where the information returned should
  be.  NBytes is the number of bytes requested.  The actual number of bytes
  returned must be passed in Bytes_Got (if there is no more data then 0
  should be returned).

  PutBytesProc = PROCEDURE(VAR DTA; NBytes:WORD; VAR Bytes_Got : WORD);

  As above except instead of asking for data the procedure is dumping out
  compressed data, do somthing with it.


    "LZHUnPack" is basically the same thing in reverse.  It requires
  procedural parameters of type 'PutProcType'/'GetProcType' which
  will act as above.  'GetProcType' must retrieve data compressed using
  "LZHPack" (above) and feed it to the unpacking routine as requested.
  'PutProcType' must accept the decompressed data and do something
  withit.  You must also pass in the original size of the decompressed data,
  failure to do so will have adverse results.


     Don't forget that as procedural parameters the 'GetProcType'/'PutProcType'
  procedures must be compiled in the 'F+' state to avoid a catastrophe.



}


{ Note: All the large data structures for these routines are allocated when
  needed from the heap, and deallocated when finished.  So when not in use
  memory requirements are minimal.  However, this unit uses about 34K of
  heap space, and 400 bytes of stack when in use. }



interface

TYPE


  PutBytesProc = PROCEDURE(VAR DTA; NBytes:WORD; VAR Bytes_Put : WORD);
  GetBytesProc = PROCEDURE(VAR DTA; NBytes:WORD; VAR Bytes_Got : WORD);



Procedure LZHPack(VAR Bytes_Written:LongInt; GetBytes:GetBytesProc; PutBytes:PutBytesProc);
Procedure LZHUnpack(TextSize : Longint; GetBytes:GetBytesProc; PutBytes: PutBytesProc);




implementation

CONST
  EXIT_OK = 0;
  EXIT_FAILED = 1;
{ LZSS Parameters }
  N          = 4096;      { Size of string buffer }
  F          = 60;        { Size of look-ahead buffer }
  THRESHOLD     = 2;
  NUL      = N;       { End of tree's node  }
  N_CHAR   =    (256 - THRESHOLD + F);
                { character code (:= 0..N_CHAR-1) }
  T        =        (N_CHAR * 2 - 1);       { Size of table }
  R        =        (T - 1);                { root position }
  MAX_FREQ =    $8000;
                                        { update when cumulative frequency }
                                        { reaches to this value }
{
 * Tables FOR encoding/decoding upper 6 bits of
 * sliding dictionary pointer
 }

{ encoder table }
  p_len : Array[0..63] of BYTE =
       ($03, $04, $04, $04, $05, $05, $05, $05,
        $05, $05, $05, $05, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $08, $08, $08, $08, $08, $08, $08, $08,
        $08, $08, $08, $08, $08, $08, $08, $08);

  p_code : Array [0..63] OF BYTE =
       ($00, $20, $30, $40, $50, $58, $60, $68,
        $70, $78, $80, $88, $90, $94, $98, $9C,
        $A0, $A4, $A8, $AC, $B0, $B4, $B8, $BC,
        $C0, $C2, $C4, $C6, $C8, $CA, $CC, $CE,
        $D0, $D2, $D4, $D6, $D8, $DA, $DC, $DE,
        $E0, $E2, $E4, $E6, $E8, $EA, $EC, $EE,
        $F0, $F1, $F2, $F3, $F4, $F5, $F6, $F7,
        $F8, $F9, $FA, $FB, $FC, $FD, $FE, $FF);

{ decoder table }
  d_code: Array [0..255] OF BYTE =
       ($00, $00, $00, $00, $00, $00, $00, $00,
        $00, $00, $00, $00, $00, $00, $00, $00,
        $00, $00, $00, $00, $00, $00, $00, $00,
        $00, $00, $00, $00, $00, $00, $00, $00,
        $01, $01, $01, $01, $01, $01, $01, $01,
        $01, $01, $01, $01, $01, $01, $01, $01,
        $02, $02, $02, $02, $02, $02, $02, $02,
        $02, $02, $02, $02, $02, $02, $02, $02,
        $03, $03, $03, $03, $03, $03, $03, $03,
        $03, $03, $03, $03, $03, $03, $03, $03,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $08, $08, $08, $08, $08, $08, $08, $08,
        $09, $09, $09, $09, $09, $09, $09, $09,
        $0A, $0A, $0A, $0A, $0A, $0A, $0A, $0A,
        $0B, $0B, $0B, $0B, $0B, $0B, $0B, $0B,
        $0C, $0C, $0C, $0C, $0D, $0D, $0D, $0D,
        $0E, $0E, $0E, $0E, $0F, $0F, $0F, $0F,
        $10, $10, $10, $10, $11, $11, $11, $11,
        $12, $12, $12, $12, $13, $13, $13, $13,
        $14, $14, $14, $14, $15, $15, $15, $15,
        $16, $16, $16, $16, $17, $17, $17, $17,
        $18, $18, $19, $19, $1A, $1A, $1B, $1B,
        $1C, $1C, $1D, $1D, $1E, $1E, $1F, $1F,
        $20, $20, $21, $21, $22, $22, $23, $23,
        $24, $24, $25, $25, $26, $26, $27, $27,
        $28, $28, $29, $29, $2A, $2A, $2B, $2B,
        $2C, $2C, $2D, $2D, $2E, $2E, $2F, $2F,
        $30, $31, $32, $33, $34, $35, $36, $37,
        $38, $39, $3A, $3B, $3C, $3D, $3E, $3F);

 d_len: Array[0..255] of BYTE =
       ($03, $03, $03, $03, $03, $03, $03, $03,
        $03, $03, $03, $03, $03, $03, $03, $03,
        $03, $03, $03, $03, $03, $03, $03, $03,
        $03, $03, $03, $03, $03, $03, $03, $03,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $04, $04, $04, $04, $04, $04, $04, $04,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $05, $05, $05, $05, $05, $05, $05, $05,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $06, $06, $06, $06, $06, $06, $06, $06,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $07, $07, $07, $07, $07, $07, $07, $07,
        $08, $08, $08, $08, $08, $08, $08, $08,
        $08, $08, $08, $08, $08, $08, $08, $08);

  getbuf : WORD = 0;
  getlen : BYTE = 0;
  putlen : BYTE = 0;
  putbuf : WORD = 0;
  textsize : longint = 0;
  codesize : longINT = 0;
  printcount : longint = 0;
  match_position : Integer = 0;
  match_length : Integer = 0;


TYPE
  Freqtype = Array[0..T] OF WORD;
  FreqPtr = ^freqtype;
  PntrType = Array[0..T+N_Char] OF Integer;
  pntrPtr = ^pntrType;
  SonType = Array[0..T] OF Integer;
  SonPtr = ^SonType;


  TextBufType = Array[0..N+F-1] OF BYTE;
  TBufPtr = ^TextBufType;
  WordRay = Array[0..N+1] OF Integer;
  WordRayPtr = ^WordRay;
  BWordRay = Array[0..N+257] OF Integer;
  BWordRayPtr = ^BWordRay;

VAR
  text_buf : TBufPtr;
  lson,dad : WordRayPtr;
  rson : BWordRayPtr;
  freq : FreqPtr;       { cumulative freq table }

{
 * pointing parent nodes.
 * area [T..(T + N_CHAR - 1)] are pointers FOR leaves
 }

  prnt : PntrPtr;

{ pointing children nodes (son[], son[] + 1)}
  son : SonPtr;



Procedure InitTree;  { Initializing tree }

VAR
  i : integer;
BEGIN
  FOR i := N + 1 TO N + 256 DO
        rson^[i] := NUL;                        { root }
  FOR i := 0 TO N DO
        dad^[i] := NUL;   { node }
END;





Procedure InsertNode(r : Integer){ Inserting node to the tree }

VAR
  tmp,i, p, cmp : Integer;
  key : TBufPtr;
  c : WORD;

BEGIN
     cmp := 1;
     key := @text_buf^[r];
     p := SUCC(N) + key^[0];
     rson^[r] := NUL;
     lson^[r] := NUL;
     match_length := 0;
     WHILE match_length < F DO
       BEGIN
         IF (cmp >= 0) THEN
           BEGIN
             IF (rson^[p] <> NUL) THEN
               p := rson^[p]
             ELSE
               BEGIN
                 rson^[p] := r;
                 dad^[r] := p;
                 exit;
               END;
           END
         ELSE
           BEGIN
             IF (lson^[p] <> NUL) THEN
               p := lson^[p]
             ELSE
               BEGIN
                 lson^[p] := r;
                 dad^[r] := p;
                 exit;
               END;
           END;
         i := 0;
         cmp := 0;
         While (i < F) AND (cmp = 0) DO
           BEGIN
             inc(i);
             cmp := key^[i] - text_buf^[p + i];
           END;
         IF (i > THRESHOLD) THEN
           BEGIN
             tmp := PRED((r - p) AND PRED(N));
             IF (i > match_length) THEN
               BEGIN
                 match_position := tmp;
                 match_length := i;
               END;
             IF (match_length < F) AND (i = match_length) THEN
               BEGIN
                 c := tmp;
                 IF (c < match_position) THEN
                   match_position := c;
               END;
           END;
       END;                { WHILE TRUE DO }
     dad^[r] := dad^[p];
     lson^[r] := lson^[p];
     rson^[r] := rson^[p];
     dad^[lson^[p]] := r;
     dad^[rson^[p]] := r;
     IF (rson^[dad^[p]] = p) THEN
       rson^[dad^[p]] := r
     ELSE
       lson^[dad^[p]] := r;
     dad^[p] := NUL;  { remove p }
END;




Procedure DeleteNode(p: Integer){ Deleting node from the tree }

VAR
  q : Integer;

BEGIN
  IF (dad^[p] = NUL) THEN
    exit;                     { unregistered }
  IF (rson^[p] = NUL) THEN
    q := lson^[p]
  ELSE IF (lson^[p] = NUL) THEN
    q := rson^[p]
  ELSE
    BEGIN
      q := lson^[p];
      IF (rson^[q] <> NUL) THEN
        BEGIN
          REPEAT
            q := rson^[q];
          UNTIL (rson^[q] = NUL);
          rson^[dad^[q]] := lson^[q];
          dad^[lson^[q]] := dad^[q];
          lson^[q] := lson^[p];
          dad^[lson^[p]] := q;
        END;
      rson^[q] := rson^[p];
      dad^[rson^[p]] := q;
    END;
  dad^[q] := dad^[p];
  IF (rson^[dad^[p]] = p) THEN
    rson^[dad^[p]] := q
  ELSE
    lson^[dad^[p]] := q;
  dad^[p] := NUL;
END;


{ Huffman coding parameters }


Function GetBit(GetBytes:GetBytesProc): Integer;        { get one bit }
VAR
  i: BYTE;
  i2 : Integer;
  result : Word;

BEGIN
  WHILE (getlen <= 8) DO
    BEGIN
      GetBytes(i,1,Result);
      If Result = 1 THEN
        i2 := i
      ELSE i2 := 0;
      getbuf := getbuf OR (i2 SHL (8 - getlen));
      INC(getlen,8);
    END;
  i2 := getbuf;
  getbuf := getbuf SHL 1;
  DEC(getlen);
  getbit := INTEGER((i2 < 0));
END;




Function GetByte(GetBytes:GetBytesProc): Integer;       { get a byte }

VAR
  j : BYTE;
  i,result : WORD;
BEGIN
  WHILE (getlen <= 8) DO
    BEGIN
      GetBytes(j,1,result);
      If Result = 1 THEN
        i := j
      ELSE
        i := 0;
      getbuf := getbuf OR (i SHL (8 - getlen));
      INC(getlen,8);
    END;
  i := getbuf;
  getbuf := getbuf SHL 8;
  DEC(getlen,8);
  getbyte := integer(i SHR 8);
END;





PROCEDURE Putcode(l : Integer; c: WORD;PutBytes:PutBytesProc);    { output c bits }
VAR
  Temp : BYTE;
  Got : WORD;
BEGIN
  putbuf := putbuf OR (c SHR putlen);
  inc(putlen,l);
  IF (putlen >= 8) THEN
    BEGIN
      Temp := putbuf SHR 8;
      PutBytes(Temp,1,Got);
      DEC(putlen,8);
      IF (putlen  >= 8) THEN
        BEGIN
          Temp := Lo(PutBuf);
          PutBytes(Temp,1,Got);
          INC(codesize,2);
          DEC(putlen,8);
          putbuf := c SHL (l - putlen);
        END
      ELSE
        BEGIN
          putbuf := putbuf SHL 8;
          INC(codesize);
        END;
    END;
END;



{ initialize freq tree }

Procedure StartHuff;
VAR
  i, j : Integer;
BEGIN
  FOR i := 0 to PRED(N_CHAR) DO
    BEGIN
      freq^[i] := 1;
      son^[i] := i + T;
      prnt^[i + T] := i;
    END;
  i := 0;
  j := N_CHAR;
  WHILE (j <= R) DO
    BEGIN
      freq^[j] := freq^[i] + freq^[i + 1];
      son^[j] := i;
      prnt^[i] := j;
      prnt^[i + 1] := j;
      INC(i,2);
      INC(j);
    END;
  freq^[T] := $ffff;
  prnt^[R] := 0;
END;




{ reconstruct freq tree }

PROCEDURE reconst;
VAR
 i, j, k, tmp : Integer;
 f, l : WORD;
BEGIN
 { halven cumulative freq FOR leaf nodes }
  j := 0;
  FOR i := 0 to PRED(T) DO
    BEGIN
      IF (son^[i] >= T) THEN
        BEGIN
          freq^[j] := SUCC(freq^[i]) MOD 2;
          son^[j] := son^[i];
          INC(j);
        END;
    END;
  { make a tree : first, connect children nodes }
  i := 0;
  j := N_CHAR;
  WHILE (j < T) DO
    BEGIN
      k := SUCC(i);
      f := freq^[i] + freq^[k];
      freq^[j] := f;
      k := PRED(j);
      WHILE f < freq^[k] DO
        DEC(K);
      INC(k);
      l := (j - k) SHL 1;
      tmp := SUCC(k);
      move(freq^[k], freq^[tmp], l);
      freq^[k] := f;
      move(son^[k], son^[tmp], l);
      son^[k] := i;
      INC(i,2);
      INC(j);
    END;
        { connect parent nodes }
  FOR i := 0 to PRED(T) DO
    BEGIN
      k := son^[i];
      IF (k >= T) THEN
        BEGIN
          prnt^[k] := i;
        END
      ELSE
        BEGIN
          prnt^[k] := i;
          prnt^[SUCC(k)] := i;
        END;
    END;
END;


{ update freq tree }

Procedure update(c : Integer);
VAR
  i, j, k, l : Integer;
BEGIN
  IF (freq^[R] = MAX_FREQ) THEN
    BEGIN
      reconst;
    END;
  c := prnt^[c + T];
  REPEAT
    INC(freq^[c]);
    k := freq^[c];

        { swap nodes to keep the tree freq-ordered }
   l := SUCC(C);
   IF (k > freq^[l]) THEN
     BEGIN
       WHILE (k > freq^[l]) DO
         INC(l);
       DEC(l);
       freq^[c] := freq^[l];
       freq^[l] := k;

       i := son^[c];
       prnt^[i] := l;
       IF (i < T) THEN prnt^[SUCC(i)] := l;

       j := son^[l];
       son^[l] := i;

       prnt^[j] := c;
       IF (j < T) THEN prnt^[SUCC(j)] := c;
       son^[c] := j;

       c := l;
     END;
   c := prnt^[c];
 UNTIL (c = 0){ REPEAT it until reaching the root }
END;


VAR
  code, len : WORD;




PROCEDURE EncodeChar(c: WORD;PutBytes:PutBytesProc);
VAR
  i : WORD;
  j, k : Integer;
BEGIN
  i := 0;
  j := 0;
  k := prnt^[c + T];

        { search connections from leaf node to the root }
  REPEAT
    i := i SHR 1;

        {
        IF node's address is odd, output 1
        ELSE output 0
        }

    IF BOOLEAN(k AND 1) THEN INC(i,$8000);
    INC(j);
    k := prnt^[k];
  UNTIL (k = R);
  Putcode(j, i,PutBytes);
  code := i;
  len := j;
  update(c);
END;



Procedure EncodePosition(c : WORD;PutBytes:PutBytesProc);
VAR
  i,j : WORD;
BEGIN
        { output upper 6 bits with encoding }
  i := c SHR 6;
  j := p_code[i];
  Putcode(p_len[i],j SHL 8,PutBytes);

        { output lower 6 bits directly }
  Putcode(6, (c AND $3f) SHL 10,PutBytes);
END;



Procedure EncodeEnd(PutBytes:PutBytesProc);
VAR
  Temp : BYTE;
  Got : WORD;
BEGIN
  IF BOOLEAN(putlen) THEN
    BEGIN
      Temp := Lo(putbuf SHR 8);
      PutBytes(Temp,1,Got);
      INC(codesize);
    END;
END;





FUNCTION DecodeChar(GetBytes:GetBytesProc)