Main Page | Class List | Directories | File List | Class Members | File Members

regex.c

Go to the documentation of this file.
00001 /* Extended regular expression matching and search library,
00002    version 0.12.
00003    (Implements POSIX draft P10003.2/D11.2, except for
00004    internationalization features.)
00005 
00006    Copyright (C) 1993 Free Software Foundation, Inc.
00007 
00008    This program is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU General Public License as published by
00010    the Free Software Foundation; either version 2, or (at your option)
00011    any later version.
00012 
00013    This program is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016    GNU General Public License for more details.
00017 
00018    You should have received a copy of the GNU General Public License
00019    along with this program; if not, write to the Free Software
00020    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
00021 
00022 /* AIX requires this to be the first thing in the file. */
00023 
00024 
00025 #include "../src/conf.h"
00026 
00027 #if defined (_AIX) && !defined (REGEX_MALLOC)
00028   #pragma alloca
00029 #endif
00030 
00031 
00032 #if defined(HAVE_REGEX_H) || defined(HAVE_LIBRX)
00033 /* Nothing */
00034 #else
00035 
00036 #define _GNU_SOURCE
00037 
00038 #define HAVE_STRING_H  1
00039 #define HAVE_ALLOCA_H  0
00040 
00041 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00042 #include <sys/types.h>
00043 #include "gnuregex.h"
00044 
00045 /* The `emacs' switch turns on certain matching commands
00046    that make sense only in Emacs. */
00047 #ifdef emacs
00048 
00049 #include "lisp.h"
00050 #include "buffer.h"
00051 #include "syntax.h"
00052 
00053 /* Emacs uses `NULL' as a predicate.  */
00054 #undef NULL
00055 
00056 #else  /* not emacs */
00057 
00058 /* We used to test for `BSTRING' here, but only GCC and Emacs define
00059    `BSTRING', as far as I know, and neither of them use this code.  */
00060 
00061 #ifndef bcmp
00062 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00063 #endif
00064 #ifndef bcopy
00065 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
00066 #endif
00067 #ifndef bzero
00068 #define bzero(s, n)     memset ((s), 0, (n))
00069 #endif
00070 
00071 #ifdef STDC_HEADERS
00072 #include <stdlib.h>
00073 #else
00074 char *malloc ();
00075 char *realloc ();
00076 #endif
00077 
00078 
00079 /* Define the syntax stuff for <, >, etc.  */
00080 
00081 /* This must be nonzero for the wordchar and notwordchar pattern
00082    commands in re_match_2.  */
00083 #ifndef Sword 
00084 #define Sword 1
00085 #endif
00086 
00087 #ifdef SYNTAX_TABLE
00088 
00089 extern char *re_syntax_table;
00090 
00091 #else /* not SYNTAX_TABLE */
00092 
00093 /* How many characters in the character set.  */
00094 #define CHAR_SET_SIZE 256
00095 
00096 static char re_syntax_table[CHAR_SET_SIZE];
00097 
00098 static void
00099 init_syntax_once ()
00100 {
00101    register int c;
00102    static int done = 0;
00103 
00104    if (done)
00105      return;
00106 
00107    bzero (re_syntax_table, sizeof re_syntax_table);
00108 
00109    for (c = 'a'; c <= 'z'; c++)
00110      re_syntax_table[c] = Sword;
00111 
00112    for (c = 'A'; c <= 'Z'; c++)
00113      re_syntax_table[c] = Sword;
00114 
00115    for (c = '0'; c <= '9'; c++)
00116      re_syntax_table[c] = Sword;
00117 
00118    re_syntax_table['_'] = Sword;
00119 
00120    done = 1;
00121 }
00122 
00123 #endif /* not SYNTAX_TABLE */
00124 
00125 #define SYNTAX(c) re_syntax_table[c]
00126 
00127 #endif /* not emacs */
00128 
00129 /* Get the interface, including the syntax bits.  */
00130 /* #include "regex.h" */
00131 
00132 /* isalpha etc. are used for the character classes.  */
00133 #include <ctype.h>
00134 
00135 #ifndef isascii
00136 #define isascii(c) 1
00137 #endif
00138 
00139 #ifdef isblank
00140 #define ISBLANK(c) (isascii (c) && isblank (c))
00141 #else
00142 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00143 #endif
00144 #ifdef isgraph
00145 #define ISGRAPH(c) (isascii (c) && isgraph (c))
00146 #else
00147 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
00148 #endif
00149 
00150 #define ISPRINT(c) (isascii (c) && isprint (c))
00151 #define ISDIGIT(c) (isascii (c) && isdigit (c))
00152 #define ISALNUM(c) (isascii (c) && isalnum (c))
00153 #define ISALPHA(c) (isascii (c) && isalpha (c))
00154 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
00155 #define ISLOWER(c) (isascii (c) && islower (c))
00156 #define ISPUNCT(c) (isascii (c) && ispunct (c))
00157 #define ISSPACE(c) (isascii (c) && isspace (c))
00158 #define ISUPPER(c) (isascii (c) && isupper (c))
00159 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
00160 
00161 #ifndef NULL
00162 #define NULL (void *)0
00163 #endif
00164 
00165 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00166    since ours (we hope) works properly with all combinations of
00167    machines, compilers, `char' and `unsigned char' argument types.
00168    (Per Bothner suggested the basic approach.)  */
00169 #undef SIGN_EXTEND_CHAR
00170 #if __STDC__
00171 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00172 #else  /* not __STDC__ */
00173 /* As in Harbison and Steele.  */
00174 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00175 #endif
00176 
00177 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00178    use `alloca' instead of `malloc'.  This is because using malloc in
00179    re_search* or re_match* could cause memory leaks when C-g is used in
00180    Emacs; also, malloc is slower and causes storage fragmentation.  On
00181    the other hand, malloc is more portable, and easier to debug.  
00182    
00183    Because we sometimes use alloca, some routines have to be macros,
00184    not functions -- `alloca'-allocated space disappears at the end of the
00185    function it is called in.  */
00186 
00187 #ifdef REGEX_MALLOC
00188 
00189 #define REGEX_ALLOCATE malloc
00190 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00191 
00192 #else /* not REGEX_MALLOC  */
00193 
00194 /* Emacs already defines alloca, sometimes.  */
00195 #ifndef alloca
00196 
00197 /* Make alloca work the best possible way.  */
00198 #ifdef __GNUC__
00199 #define alloca __builtin_alloca
00200 #else /* not __GNUC__ */
00201 #if HAVE_ALLOCA_H
00202 #include <alloca.h>
00203 #else /* not __GNUC__ or HAVE_ALLOCA_H */
00204 #ifndef _AIX /* Already did AIX, up at the top.  */
00205 char *alloca ();
00206 #endif /* not _AIX */
00207 #endif /* not HAVE_ALLOCA_H */ 
00208 #endif /* not __GNUC__ */
00209 
00210 #endif /* not alloca */
00211 
00212 #define REGEX_ALLOCATE alloca
00213 
00214 /* Assumes a `char *destination' variable.  */
00215 #define REGEX_REALLOCATE(source, osize, nsize)                          \
00216   (destination = (char *) alloca (nsize),                               \
00217    bcopy (source, destination, osize),                                  \
00218    destination)
00219 
00220 #endif /* not REGEX_MALLOC */
00221 
00222 
00223 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00224    `string1' or just past its end.  This works if PTR is NULL, which is
00225    a good thing.  */
00226 #define FIRST_STRING_P(ptr)                                     \
00227   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00228 
00229 /* (Re)Allocate N items of type T using malloc, or fail.  */
00230 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00231 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00232 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00233 
00234 #define BYTEWIDTH 8 /* In bits.  */
00235 
00236 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00237 
00238 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00239 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00240 
00241 typedef char boolean;
00242 #define false 0
00243 #define true 1
00244 
00245 /* These are the command codes that appear in compiled regular
00246    expressions.  Some opcodes are followed by argument bytes.  A
00247    command code can specify any interpretation whatsoever for its
00248    arguments.  Zero bytes may appear in the compiled regular expression.
00249 
00250    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
00251    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
00252    `exactn' we use here must also be 1.  */
00253 
00254 typedef enum
00255 {
00256   no_op = 0,
00257 
00258         /* Followed by one byte giving n, then by n literal bytes.  */
00259   exactn = 1,
00260 
00261         /* Matches any (more or less) character.  */
00262   anychar,
00263 
00264         /* Matches any one char belonging to specified set.  First
00265            following byte is number of bitmap bytes.  Then come bytes
00266            for a bitmap saying which chars are in.  Bits in each byte
00267            are ordered low-bit-first.  A character is in the set if its
00268            bit is 1.  A character too large to have a bit in the map is
00269            automatically not in the set.  */
00270   charset,
00271 
00272         /* Same parameters as charset, but match any character that is
00273            not one of those specified.  */
00274   charset_not,
00275 
00276         /* Start remembering the text that is matched, for storing in a
00277            register.  Followed by one byte with the register number, in
00278            the range 0 to one less than the pattern buffer's re_nsub
00279            field.  Then followed by one byte with the number of groups
00280            inner to this one.  (This last has to be part of the
00281            start_memory only because we need it in the on_failure_jump
00282            of re_match_2.)  */
00283   start_memory,
00284 
00285         /* Stop remembering the text that is matched and store it in a
00286            memory register.  Followed by one byte with the register
00287            number, in the range 0 to one less than `re_nsub' in the
00288            pattern buffer, and one byte with the number of inner groups,
00289            just like `start_memory'.  (We need the number of inner
00290            groups here because we don't have any easy way of finding the
00291            corresponding start_memory when we're at a stop_memory.)  */
00292   stop_memory,
00293 
00294         /* Match a duplicate of something remembered. Followed by one
00295            byte containing the register number.  */
00296   duplicate,
00297 
00298         /* Fail unless at beginning of line.  */
00299   begline,
00300 
00301         /* Fail unless at end of line.  */
00302   endline,
00303 
00304         /* Succeeds if at beginning of buffer (if emacs) or at beginning
00305            of string to be matched (if not).  */
00306   begbuf,
00307 
00308         /* Analogously, for end of buffer/string.  */
00309   endbuf,
00310  
00311         /* Followed by two byte relative address to which to jump.  */
00312   jump, 
00313 
00314         /* Same as jump, but marks the end of an alternative.  */
00315   jump_past_alt,
00316 
00317         /* Followed by two-byte relative address of place to resume at
00318            in case of failure.  */
00319   on_failure_jump,
00320         
00321         /* Like on_failure_jump, but pushes a placeholder instead of the
00322            current string position when executed.  */
00323   on_failure_keep_string_jump,
00324   
00325         /* Throw away latest failure point and then jump to following
00326            two-byte relative address.  */
00327   pop_failure_jump,
00328 
00329         /* Change to pop_failure_jump if know won't have to backtrack to
00330            match; otherwise change to jump.  This is used to jump
00331            back to the beginning of a repeat.  If what follows this jump
00332            clearly won't match what the repeat does, such that we can be
00333            sure that there is no use backtracking out of repetitions
00334            already matched, then we change it to a pop_failure_jump.
00335            Followed by two-byte address.  */
00336   maybe_pop_jump,
00337 
00338         /* Jump to following two-byte address, and push a dummy failure
00339            point. This failure point will be thrown away if an attempt
00340            is made to use it for a failure.  A `+' construct makes this
00341            before the first repeat.  Also used as an intermediary kind
00342            of jump when compiling an alternative.  */
00343   dummy_failure_jump,
00344 
00345         /* Push a dummy failure point and continue.  Used at the end of
00346            alternatives.  */
00347   push_dummy_failure,
00348 
00349         /* Followed by two-byte relative address and two-byte number n.
00350            After matching N times, jump to the address upon failure.  */
00351   succeed_n,
00352 
00353         /* Followed by two-byte relative address, and two-byte number n.
00354            Jump to the address N times, then fail.  */
00355   jump_n,
00356 
00357         /* Set the following two-byte relative address to the
00358            subsequent two-byte number.  The address *includes* the two
00359            bytes of number.  */
00360   set_number_at,
00361 
00362   wordchar,     /* Matches any word-constituent character.  */
00363   notwordchar,  /* Matches any char that is not a word-constituent.  */
00364 
00365   wordbeg,      /* Succeeds if at word beginning.  */
00366   wordend,      /* Succeeds if at word end.  */
00367 
00368   wordbound,    /* Succeeds if at a word boundary.  */
00369   notwordbound  /* Succeeds if not at a word boundary.  */
00370 
00371 #ifdef emacs
00372   ,before_dot,  /* Succeeds if before point.  */
00373   at_dot,       /* Succeeds if at point.  */
00374   after_dot,    /* Succeeds if after point.  */
00375 
00376         /* Matches any character whose syntax is specified.  Followed by
00377            a byte which contains a syntax code, e.g., Sword.  */
00378   syntaxspec,
00379 
00380         /* Matches any character whose syntax is not that specified.  */
00381   notsyntaxspec
00382 #endif /* emacs */
00383 } re_opcode_t;
00384 
00385 /* Common operations on the compiled pattern.  */
00386 
00387 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00388 
00389 #define STORE_NUMBER(destination, number)                               \
00390   do {                                                                  \
00391     (destination)[0] = (number) & 0377;                                 \
00392     (destination)[1] = (number) >> 8;                                   \
00393   } while (0)
00394 
00395 /* Same as STORE_NUMBER, except increment DESTINATION to
00396    the byte after where the number is stored.  Therefore, DESTINATION
00397    must be an lvalue.  */
00398 
00399 #define STORE_NUMBER_AND_INCR(destination, number)                      \
00400   do {                                                                  \
00401     STORE_NUMBER (destination, number);                                 \
00402     (destination) += 2;                                                 \
00403   } while (0)
00404 
00405 /* Put into DESTINATION a number stored in two contiguous bytes starting
00406    at SOURCE.  */
00407 
00408 #define EXTRACT_NUMBER(destination, source)                             \
00409   do {                                                                  \
00410     (destination) = *(source) & 0377;                                   \
00411     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
00412   } while (0)
00413 
00414 #ifdef DEBUG
00415 static void
00416 extract_number (dest, source)
00417     int *dest;
00418     unsigned char *source;
00419 {
00420   int temp = SIGN_EXTEND_CHAR (*(source + 1)); 
00421   *dest = *source & 0377;
00422   *dest += temp << 8;
00423 }
00424 
00425 #ifndef EXTRACT_MACROS /* To debug the macros.  */
00426 #undef EXTRACT_NUMBER
00427 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00428 #endif /* not EXTRACT_MACROS */
00429 
00430 #endif /* DEBUG */
00431 
00432 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00433    SOURCE must be an lvalue.  */
00434 
00435 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
00436   do {                                                                  \
00437     EXTRACT_NUMBER (destination, source);                               \
00438     (source) += 2;                                                      \
00439   } while (0)
00440 
00441 #ifdef DEBUG
00442 static void
00443 extract_number_and_incr (destination, source)
00444     int *destination;
00445     unsigned char **source;
00446 { 
00447   extract_number (destination, *source);
00448   *source += 2;
00449 }
00450 
00451 #ifndef EXTRACT_MACROS
00452 #undef EXTRACT_NUMBER_AND_INCR
00453 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00454   extract_number_and_incr (&dest, &src)
00455 #endif /* not EXTRACT_MACROS */
00456 
00457 #endif /* DEBUG */
00458 
00459 /* If DEBUG is defined, Regex prints many voluminous messages about what
00460    it is doing (if the variable `debug' is nonzero).  If linked with the
00461    main program in `iregex.c', you can enter patterns and strings
00462    interactively.  And if linked with the main program in `main.c' and
00463    the other test files, you can run the already-written tests.  */
00464 
00465 #ifdef DEBUG
00466 
00467 /* We use standard I/O for debugging.  */
00468 #include <stdio.h>
00469 
00470 /* It is useful to test things that ``must'' be true when debugging.  */
00471 #include <assert.h>
00472 
00473 static int debug = 0;
00474 
00475 #define DEBUG_STATEMENT(e) e
00476 #define DEBUG_PRINT1(x) if (debug) printf (x)
00477 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00478 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00479 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00480 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
00481   if (debug) print_partial_compiled_pattern (s, e)
00482 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
00483   if (debug) print_double_string (w, s1, sz1, s2, sz2)
00484 
00485 
00486 extern void printchar ();
00487 
00488 /* Print the fastmap in human-readable form.  */
00489 
00490 void
00491 print_fastmap (fastmap)
00492     char *fastmap;
00493 {
00494   unsigned was_a_range = 0;
00495   unsigned i = 0;  
00496   
00497   while (i < (1 << BYTEWIDTH))
00498     {
00499       if (fastmap[i++])
00500         {
00501           was_a_range = 0;
00502           printchar (i - 1);
00503           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00504             {
00505               was_a_range = 1;
00506               i++;
00507             }
00508           if (was_a_range)
00509             {
00510               printf ("-");
00511               printchar (i - 1);
00512             }
00513         }
00514     }
00515   putchar ('\n'); 
00516 }
00517 
00518 
00519 /* Print a compiled pattern string in human-readable form, starting at
00520    the START pointer into it and ending just before the pointer END.  */
00521 
00522 void
00523 print_partial_compiled_pattern (start, end)
00524     unsigned char *start;
00525     unsigned char *end;
00526 {
00527   int mcnt, mcnt2;
00528   unsigned char *p = start;
00529   unsigned char *pend = end;
00530 
00531   if (start == NULL)
00532     {
00533       printf ("(null)\n");
00534       return;
00535     }
00536     
00537   /* Loop over pattern commands.  */
00538   while (p < pend)
00539     {
00540       switch ((re_opcode_t) *p++)
00541         {
00542         case no_op:
00543           printf ("/no_op");
00544           break;
00545 
00546         case exactn:
00547           mcnt = *p++;
00548           printf ("/exactn/%d", mcnt);
00549           do
00550             {
00551               putchar ('/');
00552               printchar (*p++);
00553             }
00554           while (--mcnt);
00555           break;
00556 
00557         case start_memory:
00558           mcnt = *p++;
00559           printf ("/start_memory/%d/%d", mcnt, *p++);
00560           break;
00561 
00562         case stop_memory:
00563           mcnt = *p++;
00564           printf ("/stop_memory/%d/%d", mcnt, *p++);
00565           break;
00566 
00567         case duplicate:
00568           printf ("/duplicate/%d", *p++);
00569           break;
00570 
00571         case anychar:
00572           printf ("/anychar");
00573           break;
00574 
00575         case charset:
00576         case charset_not:
00577           {
00578             register int c;
00579 
00580             printf ("/charset%s",
00581                     (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
00582             
00583             assert (p + *p < pend);
00584 
00585             for (c = 0; c < *p; c++)
00586               {
00587                 unsigned bit;
00588                 unsigned char map_byte = p[1 + c];
00589                 
00590                 putchar ('/');
00591 
00592                 for (bit = 0; bit < BYTEWIDTH; bit++)
00593                   if (map_byte & (1 << bit))
00594                     printchar (c * BYTEWIDTH + bit);
00595               }
00596             p += 1 + *p;
00597             break;
00598           }
00599 
00600         case begline:
00601           printf ("/begline");
00602           break;
00603 
00604         case endline:
00605           printf ("/endline");
00606           break;
00607 
00608         case on_failure_jump:
00609           extract_number_and_incr (&mcnt, &p);
00610           printf ("/on_failure_jump/0/%d", mcnt);
00611           break;
00612 
00613         case on_failure_keep_string_jump:
00614           extract_number_and_incr (&mcnt, &p);
00615           printf ("/on_failure_keep_string_jump/0/%d", mcnt);
00616           break;
00617 
00618         case dummy_failure_jump:
00619           extract_number_and_incr (&mcnt, &p);
00620           printf ("/dummy_failure_jump/0/%d", mcnt);
00621           break;
00622 
00623         case push_dummy_failure:
00624           printf ("/push_dummy_failure");
00625           break;
00626           
00627         case maybe_pop_jump:
00628           extract_number_and_incr (&mcnt, &p);
00629           printf ("/maybe_pop_jump/0/%d", mcnt);
00630           break;
00631 
00632         case pop_failure_jump:
00633           extract_number_and_incr (&mcnt, &p);
00634           printf ("/pop_failure_jump/0/%d", mcnt);
00635           break;          
00636           
00637         case jump_past_alt:
00638           extract_number_and_incr (&mcnt, &p);
00639           printf ("/jump_past_alt/0/%d", mcnt);
00640           break;          
00641           
00642         case jump:
00643           extract_number_and_incr (&mcnt, &p);
00644           printf ("/jump/0/%d", mcnt);
00645           break;
00646 
00647         case succeed_n: 
00648           extract_number_and_incr (&mcnt, &p);
00649           extract_number_and_incr (&mcnt2, &p);
00650           printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
00651           break;
00652         
00653         case jump_n: 
00654           extract_number_and_incr (&mcnt, &p);
00655           extract_number_and_incr (&mcnt2, &p);
00656           printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
00657           break;
00658         
00659         case set_number_at: 
00660           extract_number_and_incr (&mcnt, &p);
00661           extract_number_and_incr (&mcnt2, &p);
00662           printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
00663           break;
00664         
00665         case wordbound:
00666           printf ("/wordbound");
00667           break;
00668 
00669         case notwordbound:
00670           printf ("/notwordbound");
00671           break;
00672 
00673         case wordbeg:
00674           printf ("/wordbeg");
00675           break;
00676           
00677         case wordend:
00678           printf ("/wordend");
00679           
00680 #ifdef emacs
00681         case before_dot:
00682           printf ("/before_dot");
00683           break;
00684 
00685         case at_dot:
00686           printf ("/at_dot");
00687           break;
00688 
00689         case after_dot:
00690           printf ("/after_dot");
00691           break;
00692 
00693         case syntaxspec:
00694           printf ("/syntaxspec");
00695           mcnt = *p++;
00696           printf ("/%d", mcnt);
00697           break;
00698           
00699         case notsyntaxspec:
00700           printf ("/notsyntaxspec");
00701           mcnt = *p++;
00702           printf ("/%d", mcnt);
00703           break;
00704 #endif /* emacs */
00705 
00706         case wordchar:
00707           printf ("/wordchar");
00708           break;
00709           
00710         case notwordchar:
00711           printf ("/notwordchar");
00712           break;
00713 
00714         case begbuf:
00715           printf ("/begbuf");
00716           break;
00717 
00718         case endbuf:
00719           printf ("/endbuf");
00720           break;
00721 
00722         default:
00723           printf ("?%d", *(p-1));
00724         }
00725     }
00726   printf ("/\n");
00727 }
00728 
00729 
00730 void
00731 print_compiled_pattern (bufp)
00732     struct re_pattern_buffer *bufp;
00733 {
00734   unsigned char *buffer = bufp->buffer;
00735 
00736   print_partial_compiled_pattern (buffer, buffer + bufp->used);
00737   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00738 
00739   if (bufp->fastmap_accurate && bufp->fastmap)
00740     {
00741       printf ("fastmap: ");
00742       print_fastmap (bufp->fastmap);
00743     }
00744 
00745   printf ("re_nsub: %d\t", bufp->re_nsub);
00746   printf ("regs_alloc: %d\t", bufp->regs_allocated);
00747   printf ("can_be_null: %d\t", bufp->can_be_null);
00748   printf ("newline_anchor: %d\n", bufp->newline_anchor);
00749   printf ("no_sub: %d\t", bufp->no_sub);
00750   printf ("not_bol: %d\t", bufp->not_bol);
00751   printf ("not_eol: %d\t", bufp->not_eol);
00752   printf ("syntax: %d\n", bufp->syntax);
00753   /* Perhaps we should print the translate table?  */
00754 }
00755 
00756 
00757 void
00758 print_double_string (where, string1, size1, string2, size2)
00759     const char *where;
00760     const char *string1;
00761     const char *string2;
00762     int size1;
00763     int size2;
00764 {
00765   unsigned this_char;
00766   
00767   if (where == NULL)
00768     printf ("(null)");
00769   else
00770     {
00771       if (FIRST_STRING_P (where))
00772         {
00773           for (this_char = where - string1; this_char < size1; this_char++)
00774             printchar (string1[this_char]);
00775 
00776           where = string2;    
00777         }
00778 
00779       for (this_char = where - string2; this_char < size2; this_char++)
00780         printchar (string2[this_char]);
00781     }
00782 }
00783 
00784 #else /* not DEBUG */
00785 
00786 #undef assert
00787 #define assert(e)
00788 
00789 #define DEBUG_STATEMENT(e)
00790 #define DEBUG_PRINT1(x)
00791 #define DEBUG_PRINT2(x1, x2)
00792 #define DEBUG_PRINT3(x1, x2, x3)
00793 #define DEBUG_PRINT4(x1, x2, x3, x4)
00794 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00795 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00796 
00797 #endif /* not DEBUG */
00798 
00799 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
00800    also be assigned to arbitrarily: each pattern buffer stores its own
00801    syntax, so it can be changed between regex compilations.  */
00802 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
00803 
00804 
00805 /* Specify the precise syntax of regexps for compilation.  This provides
00806    for compatibility for various utilities which historically have
00807    different, incompatible syntaxes.
00808 
00809    The argument SYNTAX is a bit mask comprised of the various bits
00810    defined in regex.h.  We return the old syntax.  */
00811 
00812 reg_syntax_t
00813 re_set_syntax (syntax)
00814     reg_syntax_t syntax;
00815 {
00816   reg_syntax_t ret = re_syntax_options;
00817   
00818   re_syntax_options = syntax;
00819   return ret;
00820 }
00821 
00822 /* This table gives an error message for each of the error codes listed
00823    in regex.h.  Obviously the order here has to be same as there.  */
00824 
00825 static char *re_error_msg[] =
00826   { NULL,                                       /* REG_NOERROR */
00827     "No match",                                 /* REG_NOMATCH */
00828     "Invalid regular expression",               /* REG_BADPAT */
00829     "Invalid collation character",              /* REG_ECOLLATE */
00830     "Invalid character class name",             /* REG_ECTYPE */
00831     "Trailing backslash",                       /* REG_EESCAPE */
00832     "Invalid back reference",                   /* REG_ESUBREG */
00833     "Unmatched [ or [^",                        /* REG_EBRACK */
00834     "Unmatched ( or \\(",                       /* REG_EPAREN */
00835     "Unmatched \\{",                            /* REG_EBRACE */
00836     "Invalid content of \\{\\}",                /* REG_BADBR */
00837     "Invalid range end",                        /* REG_ERANGE */
00838     "Memory exhausted",                         /* REG_ESPACE */
00839     "Invalid preceding regular expression",     /* REG_BADRPT */
00840     "Premature end of regular expression",      /* REG_EEND */
00841     "Regular expression too big",               /* REG_ESIZE */
00842     "Unmatched ) or \\)",                       /* REG_ERPAREN */
00843   };
00844 
00845 /* Subroutine declarations and macros for regex_compile.  */
00846 
00847 static void store_op1 (), store_op2 ();
00848 static void insert_op1 (), insert_op2 ();
00849 static boolean at_begline_loc_p (), at_endline_loc_p ();
00850 static boolean group_in_compile_stack ();
00851 static reg_errcode_t compile_range ();
00852 
00853 /* Fetch the next character in the uncompiled pattern---translating it 
00854    if necessary.  Also cast from a signed character in the constant
00855    string passed to us by the user to an unsigned char that we can use
00856    as an array index (in, e.g., `translate').  */
00857 #define PATFETCH(c)                                                     \
00858   do {if (p == pend) return REG_EEND;                                   \
00859     c = (unsigned char) *p++;                                           \
00860     if (translate) c = translate[c];                                    \
00861   } while (0)
00862 
00863 /* Fetch the next character in the uncompiled pattern, with no
00864    translation.  */
00865 #define PATFETCH_RAW(c)                                                 \
00866   do {if (p == pend) return REG_EEND;                                   \
00867     c = (unsigned char) *p++;                                           \
00868   } while (0)
00869 
00870 /* Go backwards one character in the pattern.  */
00871 #define PATUNFETCH p--
00872 
00873 
00874 /* If `translate' is non-null, return translate[D], else just D.  We
00875    cast the subscript to translate because some data is declared as
00876    `char *', to avoid warnings when a string constant is passed.  But
00877    when we use a character as a subscript we must make it unsigned.  */
00878 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
00879 
00880 
00881 /* Macros for outputting the compiled pattern into `buffer'.  */
00882 
00883 /* If the buffer isn't allocated when it comes in, use this.  */
00884 #define INIT_BUF_SIZE  32
00885 
00886 /* Make sure we have at least N more bytes of space in buffer.  */
00887 #define GET_BUFFER_SPACE(n)                                             \
00888     while (b - bufp->buffer + (n) > bufp->allocated)                    \
00889       EXTEND_BUFFER ()
00890 
00891 /* Make sure we have one more byte of buffer space and then add C to it.  */
00892 #define BUF_PUSH(c)                                                     \
00893   do {                                                                  \
00894     GET_BUFFER_SPACE (1);                                               \
00895     *b++ = (unsigned char) (c);                                         \
00896   } while (0)
00897 
00898 
00899 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
00900 #define BUF_PUSH_2(c1, c2)                                              \
00901   do {                                                                  \
00902     GET_BUFFER_SPACE (2);                                               \
00903     *b++ = (unsigned char) (c1);                                        \
00904     *b++ = (unsigned char) (c2);                                        \
00905   } while (0)
00906 
00907 
00908 /* As with BUF_PUSH_2, except for three bytes.  */
00909 #define BUF_PUSH_3(c1, c2, c3)                                          \
00910   do {                                                                  \
00911     GET_BUFFER_SPACE (3);                                               \
00912     *b++ = (unsigned char) (c1);                                        \
00913     *b++ = (unsigned char) (c2);                                        \
00914     *b++ = (unsigned char) (c3);                                        \
00915   } while (0)
00916 
00917 
00918 /* Store a jump with opcode OP at LOC to location TO.  We store a
00919    relative address offset by the three bytes the jump itself occupies.  */
00920 #define STORE_JUMP(op, loc, to) \
00921   store_op1 (op, loc, (to) - (loc) - 3)
00922 
00923 /* Likewise, for a two-argument jump.  */
00924 #define STORE_JUMP2(op, loc, to, arg) \
00925   store_op2 (op, loc, (to) - (loc) - 3, arg)
00926 
00927 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
00928 #define INSERT_JUMP(op, loc, to) \
00929   insert_op1 (op, loc, (to) - (loc) - 3, b)
00930 
00931 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
00932 #define INSERT_JUMP2(op, loc, to, arg) \
00933   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
00934 
00935 
00936 /* This is not an arbitrary limit: the arguments which represent offsets
00937    into the pattern are two bytes long.  So if 2^16 bytes turns out to
00938    be too small, many things would have to change.  */
00939 #define MAX_BUF_SIZE (1L << 16)
00940 
00941 
00942 /* Extend the buffer by twice its current size via realloc and
00943    reset the pointers that pointed into the old block to point to the
00944    correct places in the new one.  If extending the buffer results in it
00945    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
00946 #define EXTEND_BUFFER()                                                 \
00947   do {                                                                  \
00948     unsigned char *old_buffer = bufp->buffer;                           \
00949     if (bufp->allocated == MAX_BUF_SIZE)                                \
00950       return REG_ESIZE;                                                 \
00951     bufp->allocated <<= 1;                                              \
00952     if (bufp->allocated > MAX_BUF_SIZE)                                 \
00953       bufp->allocated = MAX_BUF_SIZE;                                   \
00954     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
00955     if (bufp->buffer == NULL)                                           \
00956       return REG_ESPACE;                                                \
00957     /* If the buffer moved, move all the pointers into it.  */          \
00958     if (old_buffer != bufp->buffer)                                     \
00959       {                                                                 \
00960         b = (b - old_buffer) + bufp->buffer;                            \
00961         begalt = (begalt - old_buffer) + bufp->buffer;                  \
00962         if (fixup_alt_jump)                                             \
00963           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
00964         if (laststart)                                                  \
00965           laststart = (laststart - old_buffer) + bufp->buffer;          \
00966         if (pending_exact)                                              \
00967           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
00968       }                                                                 \
00969   } while (0)
00970 
00971 
00972 /* Since we have one byte reserved for the register number argument to
00973    {start,stop}_memory, the maximum number of groups we can report
00974    things about is what fits in that byte.  */
00975 #define MAX_REGNUM 255
00976 
00977 /* But patterns can have more than `MAX_REGNUM' registers.  We just
00978    ignore the excess.  */
00979 typedef unsigned regnum_t;
00980 
00981 
00982 /* Macros for the compile stack.  */
00983 
00984 /* Since offsets can go either forwards or backwards, this type needs to
00985    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
00986 typedef int pattern_offset_t;
00987 
00988 typedef struct
00989 {
00990   pattern_offset_t begalt_offset;
00991   pattern_offset_t fixup_alt_jump;
00992   pattern_offset_t inner_group_offset;
00993   pattern_offset_t laststart_offset;  
00994   regnum_t regnum;
00995 } compile_stack_elt_t;
00996 
00997 
00998 typedef struct
00999 {
01000   compile_stack_elt_t *stack;
01001   unsigned size;
01002   unsigned avail;                       /* Offset of next open position.  */
01003 } compile_stack_type;
01004 
01005 
01006 #define INIT_COMPILE_STACK_SIZE 32
01007 
01008 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
01009 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
01010 
01011 /* The next available element.  */
01012 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01013 
01014 
01015 /* Set the bit for character C in a list.  */
01016 #define SET_LIST_BIT(c)                               \
01017   (b[((unsigned char) (c)) / BYTEWIDTH]               \
01018    |= 1 << (((unsigned char) c) % BYTEWIDTH))
01019 
01020 
01021 /* Get the next unsigned number in the uncompiled pattern.  */
01022 #define GET_UNSIGNED_NUMBER(num)                                        \
01023   { if (p != pend)                                                      \
01024      {                                                                  \
01025        PATFETCH (c);                                                    \
01026        while (ISDIGIT (c))                                              \
01027          {                                                              \
01028            if (num < 0)                                                 \
01029               num = 0;                                                  \
01030            num = num * 10 + c - '0';                                    \
01031            if (p == pend)                                               \
01032               break;                                                    \
01033            PATFETCH (c);                                                \
01034          }                                                              \
01035        }                                                                \
01036     }           
01037 
01038 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
01039 
01040 #define IS_CHAR_CLASS(string)                                           \
01041    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
01042     || STREQ (string, "lower") || STREQ (string, "digit")               \
01043     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
01044     || STREQ (string, "space") || STREQ (string, "print")               \
01045     || STREQ (string, "punct") || STREQ (string, "graph")               \
01046     || STREQ (string, "cntrl") || STREQ (string, "blank"))
01047 
01048 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
01049    Returns one of error codes defined in `regex.h', or zero for success.
01050 
01051    Assumes the `allocated' (and perhaps `buffer') and `translate'
01052    fields are set in BUFP on entry.
01053 
01054    If it succeeds, results are put in BUFP (if it returns an error, the
01055    contents of BUFP are undefined):
01056      `buffer' is the compiled pattern;
01057      `syntax' is set to SYNTAX;
01058      `used' is set to the length of the compiled pattern;
01059      `fastmap_accurate' is zero;
01060      `re_nsub' is the number of subexpressions in PATTERN;
01061      `not_bol' and `not_eol' are zero;
01062    
01063    The `fastmap' and `newline_anchor' fields are neither
01064    examined nor set.  */
01065 
01066 static reg_errcode_t
01067 regex_compile (pattern, size, syntax, bufp)
01068       char *pattern;
01069      int size;
01070      reg_syntax_t syntax;
01071      struct re_pattern_buffer *bufp;
01072 {
01073   /* We fetch characters from PATTERN here.  Even though PATTERN is
01074      `char *' (i.e., signed), we declare these variables as unsigned, so
01075      they can be reliably used as array indices.  */
01076   register unsigned char c, c1;
01077   
01078   /* A random tempory spot in PATTERN.  */
01079    char *p1;
01080 
01081   /* Points to the end of the buffer, where we should append.  */
01082   register unsigned char *b;
01083   
01084   /* Keeps track of unclosed groups.  */
01085   compile_stack_type compile_stack;
01086 
01087   /* Points to the current (ending) position in the pattern.  */
01088    char *p = pattern;
01089    char *pend = pattern + size;
01090   
01091   /* How to translate the characters in the pattern.  */
01092   char *translate = bufp->translate;
01093 
01094   /* Address of the count-byte of the most recently inserted `exactn'
01095      command.  This makes it possible to tell if a new exact-match
01096      character can be added to that command or if the character requires
01097      a new `exactn' command.  */
01098   unsigned char *pending_exact = 0;
01099 
01100   /* Address of start of the most recently finished expression.
01101      This tells, e.g., postfix * where to find the start of its
01102      operand.  Reset at the beginning of groups and alternatives.  */
01103   unsigned char *laststart = 0;
01104 
01105   /* Address of beginning of regexp, or inside of last group.  */
01106   unsigned char *begalt;
01107 
01108   /* Place in the uncompiled pattern (i.e., the {) to
01109      which to go back if the interval is invalid.  */
01110    char *beg_interval;
01111                 
01112   /* Address of the place where a forward jump should go to the end of
01113      the containing expression.  Each alternative of an `or' -- except the
01114      last -- ends with a forward jump of this sort.  */
01115   unsigned char *fixup_alt_jump = 0;
01116 
01117   /* Counts open-groups as they are encountered.  Remembered for the
01118      matching close-group on the compile stack, so the same register
01119      number is put in the stop_memory as the start_memory.  */
01120   regnum_t regnum = 0;
01121 
01122 #ifdef DEBUG
01123   DEBUG_PRINT1 ("\nCompiling pattern: ");
01124   if (debug)
01125     {
01126       unsigned debug_count;
01127       
01128       for (debug_count = 0; debug_count < size; debug_count++)
01129         printchar (pattern[debug_count]);
01130       putchar ('\n');
01131     }
01132 #endif /* DEBUG */
01133 
01134   /* Initialize the compile stack.  */
01135   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01136   if (compile_stack.stack == NULL)
01137     return REG_ESPACE;
01138 
01139   compile_stack.size = INIT_COMPILE_STACK_SIZE;
01140   compile_stack.avail = 0;
01141 
01142   /* Initialize the pattern buffer.  */
01143   bufp->syntax = syntax;
01144   bufp->fastmap_accurate = 0;
01145   bufp->not_bol = bufp->not_eol = 0;
01146 
01147   /* Set `used' to zero, so that if we return an error, the pattern
01148      printer (for debugging) will think there's no pattern.  We reset it
01149      at the end.  */
01150   bufp->used = 0;
01151   
01152   /* Always count groups, whether or not bufp->no_sub is set.  */
01153   bufp->re_nsub = 0;                            
01154 
01155 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01156   /* Initialize the syntax table.  */
01157    init_syntax_once ();
01158 #endif
01159 
01160   if (bufp->allocated == 0)
01161     {
01162       if (bufp->buffer)
01163         { /* If zero allocated, but buffer is non-null, try to realloc
01164              enough space.  This loses if buffer's address is bogus, but
01165              that is the user's responsibility.  */
01166           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01167         }
01168       else
01169         { /* Caller did not allocate a buffer.  Do it for them.  */
01170           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01171         }
01172       if (!bufp->buffer) return REG_ESPACE;
01173 
01174       bufp->allocated = INIT_BUF_SIZE;
01175     }
01176 
01177   begalt = b = bufp->buffer;
01178 
01179   /* Loop through the uncompiled pattern until we're at the end.  */
01180   while (p != pend)
01181     {
01182       PATFETCH (c);
01183 
01184       switch (c)
01185         {
01186         case '^':
01187           {
01188             if (   /* If at start of pattern, it's an operator.  */
01189                    p == pattern + 1
01190                    /* If context independent, it's an operator.  */
01191                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01192                    /* Otherwise, depends on what's come before.  */
01193                 || at_begline_loc_p (pattern, p, syntax))
01194               BUF_PUSH (begline);
01195             else
01196               goto normal_char;
01197           }
01198           break;
01199 
01200 
01201         case '$':
01202           {
01203             if (   /* If at end of pattern, it's an operator.  */
01204                    p == pend 
01205                    /* If context independent, it's an operator.  */
01206                 || syntax & RE_CONTEXT_INDEP_ANCHORS
01207                    /* Otherwise, depends on what's next.  */
01208                 || at_endline_loc_p (p, pend, syntax))
01209                BUF_PUSH (endline);
01210              else
01211                goto normal_char;
01212            }
01213            break;
01214 
01215 
01216         case '+':
01217         case '?':
01218           if ((syntax & RE_BK_PLUS_QM)
01219               || (syntax & RE_LIMITED_OPS))
01220             goto normal_char;
01221         handle_plus:
01222         case '*':
01223           /* If there is no previous pattern... */
01224           if (!laststart)
01225             {
01226               if (syntax & RE_CONTEXT_INVALID_OPS)
01227                 return REG_BADRPT;
01228               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01229                 goto normal_char;
01230             }
01231 
01232           {
01233             /* Are we optimizing this jump?  */
01234             boolean keep_string_p = false;
01235             
01236             /* 1 means zero (many) matches is allowed.  */
01237             char zero_times_ok = 0, many_times_ok = 0;
01238 
01239             /* If there is a sequence of repetition chars, collapse it
01240                down to just one (the right one).  We can't combine
01241                interval operators with these because of, e.g., `a{2}*',
01242                which should only match an even number of `a's.  */
01243 
01244             for (;;)
01245               {
01246                 zero_times_ok |= c != '+';
01247                 many_times_ok |= c != '?';
01248 
01249                 if (p == pend)
01250                   break;
01251 
01252                 PATFETCH (c);
01253 
01254                 if (c == '*'
01255                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01256                   ;
01257 
01258                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
01259                   {
01260                     if (p == pend) return REG_EESCAPE;
01261 
01262                     PATFETCH (c1);
01263                     if (!(c1 == '+' || c1 == '?'))
01264                       {
01265                         PATUNFETCH;
01266                         PATUNFETCH;
01267                         break;
01268                       }
01269 
01270                     c = c1;
01271                   }
01272                 else
01273                   {
01274                     PATUNFETCH;
01275                     break;
01276                   }
01277 
01278                 /* If we get here, we found another repeat character.  */
01279                }
01280 
01281             /* Star, etc. applied to an empty pattern is equivalent
01282                to an empty pattern.  */
01283             if (!laststart)  
01284               break;
01285 
01286             /* Now we know whether or not zero matches is allowed
01287                and also whether or not two or more matches is allowed.  */
01288             if (many_times_ok)
01289               { /* More than one repetition is allowed, so put in at the
01290                    end a backward relative jump from `b' to before the next
01291                    jump we're going to put in below (which jumps from
01292                    laststart to after this jump).  
01293 
01294                    But if we are at the `*' in the exact sequence `.*\n',
01295                    insert an unconditional jump backwards to the .,
01296                    instead of the beginning of the loop.  This way we only
01297                    push a failure point once, instead of every time
01298                    through the loop.  */
01299                 assert (p - 1 > pattern);
01300 
01301                 /* Allocate the space for the jump.  */
01302                 GET_BUFFER_SPACE (3);
01303 
01304                 /* We know we are not at the first character of the pattern,
01305                    because laststart was nonzero.  And we've already
01306                    incremented `p', by the way, to be the character after
01307                    the `*'.  Do we have to do something analogous here
01308                    for null bytes, because of RE_DOT_NOT_NULL?  */
01309                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01310                     && zero_times_ok
01311                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01312                     && !(syntax & RE_DOT_NEWLINE))
01313                   { /* We have .*\n.  */
01314                     STORE_JUMP (jump, b, laststart);
01315                     keep_string_p = true;
01316                   }
01317                 else
01318                   /* Anything else.  */
01319                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01320 
01321                 /* We've added more stuff to the buffer.  */
01322                 b += 3;
01323               }
01324 
01325             /* On