Lempel-Ziv-Welch compression method (pascal)
Submitted By:
Unknown
Rating:
(Not rated) (
Rate It)
(*$R-,V-,S-,I-*)
PROGRAM PibCompr;
(*--------------------------------------------------------------------------*)
(* *)
(* Program: PibCompr *)
(* *)
(* Purpose: Compresses a file using the Lempel-Ziv-Welch approach. *)
(* *)
(* Author: Philip R. Burns. April 30, 1988. *)
(* *)
(* Use: PIBCOMPR inputfile outputfile *)
(* *)
(* inputfile --- the input file to be compressed *)
(* outputfile --- the output compressed file *)
(* *)
(* Remarks: *)
(* *)
(* PibCompr implements the Lempel-Ziv file compression algorithm. *)
(* (Files compressed by PibCommpr are uncompressed by PibDComp.) *)
(* It operates by finding common substrings and replaces them *)
(* with a fixed-length 12-bit code. This is deterministic, and *)
(* can be done with a single pass over the file. Thus, *)
(* the decompression procedure needs no input table, but *)
(* can track the way the table was built. *)
(* *)
(* Algorithm: *)
(* *)
(* *)
(* This section is abstracted from Terry Welch's article *)
(* referenced below. The algorithm builds a string *)
(* translation table that maps substrings in the input *)
(* into fixed-length codes. The compress algorithm may *)
(* be described as follows: *)
(* *)
(* 1. Initialize table to contain single-character strings. *)
(* 2. Read the first character. Set <w> (the prefix string) *)
(* to that character. *)
(* 3. (step): Read next input character, C. *)
(* 4. If at end of file, output code(<w>); exit. *)
(* 5. If <w>C is in the string table: *)
(* Set <w> to <w>C; goto step 3. *)
(* 6. Else <w>C is not in the string table. *)
(* Output code(<w>); *)
(* Put <w>C into the string table; *)
(* Set <w> to C; Goto step 3. *)
(* *)
(* "At each execution of the basic step an acceptable input *)
(* string <w> has been parsed off. The next character C is *)
(* read and the extended string <w>C is tested to see if it *)
(* exists in the string table. If it is there, then the *)
(* extended string becomes the parsed string <w> and the *)
(* step is repeated. If <w>C is not in the string table, *)
(* then it is entered, the code for the successfully *)
(* parsed string <w> is put out as comprssed data, the *)
(* character K becomes the beginning of the next string, *)
(* and the step is repeated." *)
(* *)
(* Reference: *)
(* *)
(* "A Technique for High Performance Data Compression", *)
(* Terry A. Welch, IEEE Computer, *)
(* vol. 17, no. 6 (June 1984), pp. 8-19. *)
(* *)
(* Note: The hashing algorithm used here isn't very good, and *)
(* should be replaced by a better one. *)
(* *)
(* Usage note: *)
(* *)
(* You may use this program in any way you see fit without *)
(* restriction. I'd appreciate a citation if you do use this *)
(* code in a program you distribute. *)
(* *)
(*--------------------------------------------------------------------------*)
(*$I PIBLZW.DEF *)
(*$I PIBLZW.INC *)
(*--------------------------------------------------------------------------*)
(* Put_Code --- Write hash code to output file. *)
(*--------------------------------------------------------------------------*)
PROCEDURE Put_Code( Hash_Code : INTEGER );
BEGIN (* Put_Code *)
(* Output code word is empty. *)
(* Put out 1st 8 bits of compression *)
(* code and save last 4 bit for next *)
(* time through. *)
IF ( Output_Code = Empty ) THEN
BEGIN
Put_Char( ( Hash_Code SHR 4 ) AND $FF );
Output_Code := Hash_Code AND $0F;
END
ELSE
(* Output code word not empty. *)
(* Put out last 4 bits of previous *)
(* code appended to 1st 4 bits of this *)
(* code. Then put out last 8 bits of *)
(* this code. *)
BEGIN
Put_Char( ( ( Output_Code SHL 4 ) AND $FF0 ) +
( ( Hash_Code SHR 8 ) AND $00F ) ) ;
Put_Char( Hash_Code AND $FF );
Output_Code := Empty;
END;
END (* Put_Code *);
(*--------------------------------------------------------------------------*)
(* Do_Compression --- Perform Lempel-Ziv-Welch compression *)
(*--------------------------------------------------------------------------*)
PROCEDURE Do_Compression;
VAR
C : INTEGER (* Current input character = C *);
WC : INTEGER (* Hash code value for <w>C *);
W : INTEGER (* Hash code value for <w> *);
BEGIN (* Do_Compression *)
(* Read first character ==> Step 2 *)
Get_Char( C );
(* Initial hash code -- first character *)
(* has no previous string (<w> is null) *)
W := Lookup_String( No_Prev , C );
(* Get next character ==> Step 3 *)
Get_Char( C );
(* Loop over input characters until *)
(* end of file reached ==> Step 4. *)
WHILE( C <> EOF_Char ) DO
BEGIN
(* See if <w>C is in table. *)
WC := Lookup_String( W , C );
(* If <w>C is not in the table, *)
(* enter it into the table and *)
(* output <w>. Reset <w> to *)
(* be the code for C ==> Step 6 *)
IF ( WC = End_List ) THEN
BEGIN
Make_Table_Entry( W , C );
Put_Code( W );
W := Lookup_String( No_Prev , C );
END
ELSE (* If <w>C is in table, keep looking *)
(* for longer strings == Step 5 *)
W := WC;
(* Get next input character ==> Step 3 *)
Get_Char( C );
END;
(* Make sure last code is *)
(* written out ==> Step 4. *)
Put_Code( W );
END (* Do_Compression *);
(*--------------------------------------------------------------------------*)
(* PibCompr --- Main program *)
(*--------------------------------------------------------------------------*)
BEGIN (* PibCompr *)
(* We are doing compression *)
If_Compressing := TRUE;
(* Initialize compression *)
Initialize;
(* Perform compression *)
Do_Compression;
(* Clean up and exit *)
Terminate;
END (* PibCompr *).