Main Page | Class List | Directories | File List | Class Members | File Members

unlzw.c

Go to the documentation of this file.
00001 /* unlzw.c -- decompress files in LZW format.
00002  * The code in this file is directly derived from the public domain 'compress'
00003  * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
00004  * Ken Turkowski, Dave Mack and Peter Jannesen.
00005  *
00006  * This is a temporary version which will be rewritten in some future version
00007  * to accommodate in-memory decompression.
00008  */
00009 
00010 #ifdef RCSID
00011 static char rcsid[] = "$Id: unlzw.c,v 1.5 2006/12/11 18:54:39 eggert Exp $";
00012 #endif
00013 
00014 #include <config.h>
00015 #include "tailor.h"
00016 
00017 #ifdef HAVE_UNISTD_H
00018 #  include <unistd.h>
00019 #endif
00020 #ifdef HAVE_FCNTL_H
00021 #  include <fcntl.h>
00022 #endif
00023 
00024 #include "gzip.h"
00025 #include "lzw.h"
00026 
00027 typedef unsigned char   char_type;
00028 typedef          long   code_int;
00029 typedef unsigned long   count_int;
00030 typedef unsigned short  count_short;
00031 typedef unsigned long   cmp_code_int;
00032 
00033 #define MAXCODE(n)      (1L << (n))
00034 
00035 #ifndef REGISTERS
00036 #       define  REGISTERS       2
00037 #endif
00038 #define REG1
00039 #define REG2
00040 #define REG3
00041 #define REG4
00042 #define REG5
00043 #define REG6
00044 #define REG7
00045 #define REG8
00046 #define REG9
00047 #define REG10
00048 #define REG11
00049 #define REG12
00050 #define REG13
00051 #define REG14
00052 #define REG15
00053 #define REG16
00054 #if REGISTERS >= 1
00055 #       undef   REG1
00056 #       define  REG1    register
00057 #endif
00058 #if REGISTERS >= 2
00059 #       undef   REG2
00060 #       define  REG2    register
00061 #endif
00062 #if REGISTERS >= 3
00063 #       undef   REG3
00064 #       define  REG3    register
00065 #endif
00066 #if REGISTERS >= 4
00067 #       undef   REG4
00068 #       define  REG4    register
00069 #endif
00070 #if REGISTERS >= 5
00071 #       undef   REG5
00072 #       define  REG5    register
00073 #endif
00074 #if REGISTERS >= 6
00075 #       undef   REG6
00076 #       define  REG6    register
00077 #endif
00078 #if REGISTERS >= 7
00079 #       undef   REG7
00080 #       define  REG7    register
00081 #endif
00082 #if REGISTERS >= 8
00083 #       undef   REG8
00084 #       define  REG8    register
00085 #endif
00086 #if REGISTERS >= 9
00087 #       undef   REG9
00088 #       define  REG9    register
00089 #endif
00090 #if REGISTERS >= 10
00091 #       undef   REG10
00092 #       define  REG10   register
00093 #endif
00094 #if REGISTERS >= 11
00095 #       undef   REG11
00096 #       define  REG11   register
00097 #endif
00098 #if REGISTERS >= 12
00099 #       undef   REG12
00100 #       define  REG12   register
00101 #endif
00102 #if REGISTERS >= 13
00103 #       undef   REG13
00104 #       define  REG13   register
00105 #endif
00106 #if REGISTERS >= 14
00107 #       undef   REG14
00108 #       define  REG14   register
00109 #endif
00110 #if REGISTERS >= 15
00111 #       undef   REG15
00112 #       define  REG15   register
00113 #endif
00114 #if REGISTERS >= 16
00115 #       undef   REG16
00116 #       define  REG16   register
00117 #endif
00118 
00119 #ifndef BYTEORDER
00120 #       define  BYTEORDER       0000
00121 #endif
00122 
00123 #ifndef NOALLIGN
00124 #       define  NOALLIGN        0
00125 #endif
00126 
00127 
00128 union   bytes {
00129     long  word;
00130     struct {
00131 #if BYTEORDER == 4321
00132         char_type       b1;
00133         char_type       b2;
00134         char_type       b3;
00135         char_type       b4;
00136 #else
00137 #if BYTEORDER == 1234
00138         char_type       b4;
00139         char_type       b3;
00140         char_type       b2;
00141         char_type       b1;
00142 #else
00143 #       undef   BYTEORDER
00144         int  dummy;
00145 #endif
00146 #endif
00147     } bytes;
00148 };
00149 
00150 #if BYTEORDER == 4321 && NOALLIGN == 1
00151 #  define input(b,o,c,n,m){ \
00152      (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
00153      (o) += (n); \
00154    }
00155 #else
00156 #  define input(b,o,c,n,m){ \
00157      REG1 char_type *p = &(b)[(o)>>3]; \
00158      (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
00159      ((long)(p[2])<<16))>>((o)&0x7))&(m); \
00160      (o) += (n); \
00161    }
00162 #endif
00163 
00164 #ifndef MAXSEG_64K
00165    /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
00166 #  define tab_prefixof(i) tab_prefix[i]
00167 #  define clear_tab_prefixof()  memzero(tab_prefix, 256);
00168 #else
00169    /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
00170    /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd  codes */
00171    ush *tab_prefix[2];
00172 #  define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
00173 #  define clear_tab_prefixof()  \
00174       memzero(tab_prefix0, 128), \
00175       memzero(tab_prefix1, 128);
00176 #endif
00177 #define de_stack        ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
00178 #define tab_suffixof(i) tab_suffix[i]
00179 
00180 int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
00181 
00182 /* ============================================================================
00183  * Decompress in to out.  This routine adapts to the codes in the
00184  * file building the "string" table on-the-fly; requiring no table to
00185  * be stored in the compressed file.
00186  * IN assertions: the buffer inbuf contains already the beginning of
00187  *   the compressed data, from offsets iptr to insize-1 included.
00188  *   The magic header has already been checked and skipped.
00189  *   bytes_in and bytes_out have been initialized.
00190  */
00191 int unlzw(in, out)
00192     int in, out;    /* input and output file descriptors */
00193 {
00194     REG2   char_type  *stackp;
00195     REG3   code_int   code;
00196     REG4   int        finchar;
00197     REG5   code_int   oldcode;
00198     REG6   code_int   incode;
00199     REG7   long       inbits;
00200     REG8   long       posbits;
00201     REG9   int        outpos;
00202 /*  REG10  int        insize; (global) */
00203     REG11  unsigned   bitmask;
00204     REG12  code_int   free_ent;
00205     REG13  code_int   maxcode;
00206     REG14  code_int   maxmaxcode;
00207     REG15  int        n_bits;
00208     REG16  int        rsize;
00209 
00210 #ifdef MAXSEG_64K
00211     tab_prefix[0] = tab_prefix0;
00212     tab_prefix[1] = tab_prefix1;
00213 #endif
00214     maxbits = get_byte();
00215     block_mode = maxbits & BLOCK_MODE;
00216     if ((maxbits & LZW_RESERVED) != 0) {
00217         WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n",
00218               program_name, ifname, maxbits & LZW_RESERVED));
00219     }
00220     maxbits &= BIT_MASK;
00221     maxmaxcode = MAXCODE(maxbits);
00222 
00223     if (maxbits > BITS) {
00224         fprintf(stderr,
00225                 "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
00226                 program_name, ifname, maxbits, BITS);
00227         exit_code = ERROR;
00228         return ERROR;
00229     }
00230     rsize = insize;
00231     maxcode = MAXCODE(n_bits = INIT_BITS)-1;
00232     bitmask = (1<<n_bits)-1;
00233     oldcode = -1;
00234     finchar = 0;
00235     outpos = 0;
00236     posbits = inptr<<3;
00237 
00238     free_ent = ((block_mode) ? FIRST : 256);
00239 
00240     clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */
00241 
00242     for (code = 255 ; code >= 0 ; --code) {
00243         tab_suffixof(code) = (char_type)code;
00244     }
00245     do {
00246         REG1 int i;
00247         int  e;
00248         int  o;
00249 
00250     resetbuf:
00251         e = insize-(o = (posbits>>3));
00252 
00253         for (i = 0 ; i < e ; ++i) {
00254             inbuf[i] = inbuf[i+o];
00255         }
00256         insize = e;
00257         posbits = 0;
00258 
00259         if (insize < INBUF_EXTRA) {
00260             rsize = read_buffer (in, (char *) inbuf + insize, INBUFSIZ);
00261             if (rsize == -1) {
00262                 read_error();
00263             }
00264             insize += rsize;
00265             bytes_in += (off_t)rsize;
00266         }
00267         inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 :
00268                   ((long)insize<<3)-(n_bits-1));
00269 
00270         while (inbits > posbits) {
00271             if (free_ent > maxcode) {
00272                 posbits = ((posbits-1) +
00273                            ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
00274                 ++n_bits;
00275                 if (n_bits == maxbits) {
00276                     maxcode = maxmaxcode;
00277                 } else {
00278                     maxcode = MAXCODE(n_bits)-1;
00279                 }
00280                 bitmask = (1<<n_bits)-1;
00281                 goto resetbuf;
00282             }
00283             input(inbuf,posbits,code,n_bits,bitmask);
00284             Tracev((stderr, "%d ", code));
00285 
00286             if (oldcode == -1) {
00287                 if (256 <= code)
00288                   gzip_error ("corrupt input.");
00289                 outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code));
00290                 continue;
00291             }
00292             if (code == CLEAR && block_mode) {
00293                 clear_tab_prefixof();
00294                 free_ent = FIRST - 1;
00295                 posbits = ((posbits-1) +
00296                            ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
00297                 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
00298                 bitmask = (1<<n_bits)-1;
00299                 goto resetbuf;
00300             }
00301             incode = code;
00302             stackp = de_stack;
00303 
00304             if (code >= free_ent) { /* Special case for KwKwK string. */
00305                 if (code > free_ent) {
00306 #ifdef DEBUG
00307                     char_type *p;
00308 
00309                     posbits -= n_bits;
00310                     p = &inbuf[posbits>>3];
00311                     fprintf(stderr,
00312                             "code:%ld free_ent:%ld n_bits:%d insize:%u\n",
00313                             code, free_ent, n_bits, insize);
00314                     fprintf(stderr,
00315                             "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
00316                             posbits, p[-1],p[0],p[1],p[2],p[3]);
00317 #endif
00318                     if (!test && outpos > 0) {
00319                         write_buf(out, (char*)outbuf, outpos);
00320                         bytes_out += (off_t)outpos;
00321                     }
00322                     gzip_error (to_stdout
00323                                 ? "corrupt input."
00324                                 : "corrupt input. Use zcat to recover some data.");
00325                 }
00326                 *--stackp = (char_type)finchar;
00327                 code = oldcode;
00328             }
00329 
00330             while ((cmp_code_int)code >= (cmp_code_int)256) {
00331                 /* Generate output characters in reverse order */
00332                 *--stackp = tab_suffixof(code);
00333                 code = tab_prefixof(code);
00334             }
00335             *--stackp = (char_type)(finchar = tab_suffixof(code));
00336 
00337             /* And put them out in forward order */
00338             {
00339                 REG1 int        i;
00340 
00341                 if (outpos+(i = (de_stack-stackp)) >= OUTBUFSIZ) {
00342                     do {
00343                         if (i > OUTBUFSIZ-outpos) i = OUTBUFSIZ-outpos;
00344 
00345                         if (i > 0) {
00346                             memcpy(outbuf+outpos, stackp, i);
00347                             outpos += i;
00348                         }
00349                         if (outpos >= OUTBUFSIZ) {
00350                             if (!test) {
00351                                 write_buf(out, (char*)outbuf, outpos);
00352                                 bytes_out += (off_t)outpos;
00353                             }
00354                             outpos = 0;
00355                         }
00356                         stackp+= i;
00357                     } while ((i = (de_stack-stackp)) > 0);
00358                 } else {
00359                     memcpy(outbuf+outpos, stackp, i);
00360                     outpos += i;
00361                 }
00362             }
00363 
00364             if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */
00365 
00366                 tab_prefixof(code) = (unsigned short)oldcode;
00367                 tab_suffixof(code) = (char_type)finchar;
00368                 free_ent = code+1;
00369             }
00370             oldcode = incode;   /* Remember previous code.      */
00371         }
00372     } while (rsize != 0);
00373 
00374     if (!test && outpos > 0) {
00375         write_buf(out, (char*)outbuf, outpos);
00376         bytes_out += (off_t)outpos;
00377     }
00378     return OK;
00379 }

© sourcejam.com 2005-2008