00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "../src/conf.h"
00026
00027 #if defined (_AIX) && !defined (REGEX_MALLOC)
00028 #pragma alloca
00029 #endif
00030
00031
00032 #if defined(HAVE_REGEX_H) || defined(HAVE_LIBRX)
00033
00034 #else
00035
00036 #define _GNU_SOURCE
00037
00038 #define HAVE_STRING_H 1
00039 #define HAVE_ALLOCA_H 0
00040
00041
00042 #include <sys/types.h>
00043 #include "gnuregex.h"
00044
00045
00046
00047 #ifdef emacs
00048
00049 #include "lisp.h"
00050 #include "buffer.h"
00051 #include "syntax.h"
00052
00053
00054 #undef NULL
00055
00056 #else
00057
00058
00059
00060
00061 #ifndef bcmp
00062 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00063 #endif
00064 #ifndef bcopy
00065 #define bcopy(s, d, n) memcpy ((d), (s), (n))
00066 #endif
00067 #ifndef bzero
00068 #define bzero(s, n) memset ((s), 0, (n))
00069 #endif
00070
00071 #ifdef STDC_HEADERS
00072 #include <stdlib.h>
00073 #else
00074 char *malloc ();
00075 char *realloc ();
00076 #endif
00077
00078
00079
00080
00081
00082
00083 #ifndef Sword
00084 #define Sword 1
00085 #endif
00086
00087 #ifdef SYNTAX_TABLE
00088
00089 extern char *re_syntax_table;
00090
00091 #else
00092
00093
00094 #define CHAR_SET_SIZE 256
00095
00096 static char re_syntax_table[CHAR_SET_SIZE];
00097
00098 static void
00099 init_syntax_once ()
00100 {
00101 register int c;
00102 static int done = 0;
00103
00104 if (done)
00105 return;
00106
00107 bzero (re_syntax_table, sizeof re_syntax_table);
00108
00109 for (c = 'a'; c <= 'z'; c++)
00110 re_syntax_table[c] = Sword;
00111
00112 for (c = 'A'; c <= 'Z'; c++)
00113 re_syntax_table[c] = Sword;
00114
00115 for (c = '0'; c <= '9'; c++)
00116 re_syntax_table[c] = Sword;
00117
00118 re_syntax_table['_'] = Sword;
00119
00120 done = 1;
00121 }
00122
00123 #endif
00124
00125 #define SYNTAX(c) re_syntax_table[c]
00126
00127 #endif
00128
00129
00130
00131
00132
00133 #include <ctype.h>
00134
00135 #ifndef isascii
00136 #define isascii(c) 1
00137 #endif
00138
00139 #ifdef isblank
00140 #define ISBLANK(c) (isascii (c) && isblank (c))
00141 #else
00142 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00143 #endif
00144 #ifdef isgraph
00145 #define ISGRAPH(c) (isascii (c) && isgraph (c))
00146 #else
00147 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
00148 #endif
00149
00150 #define ISPRINT(c) (isascii (c) && isprint (c))
00151 #define ISDIGIT(c) (isascii (c) && isdigit (c))
00152 #define ISALNUM(c) (isascii (c) && isalnum (c))
00153 #define ISALPHA(c) (isascii (c) && isalpha (c))
00154 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
00155 #define ISLOWER(c) (isascii (c) && islower (c))
00156 #define ISPUNCT(c) (isascii (c) && ispunct (c))
00157 #define ISSPACE(c) (isascii (c) && isspace (c))
00158 #define ISUPPER(c) (isascii (c) && isupper (c))
00159 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
00160
00161 #ifndef NULL
00162 #define NULL (void *)0
00163 #endif
00164
00165
00166
00167
00168
00169 #undef SIGN_EXTEND_CHAR
00170 #if __STDC__
00171 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00172 #else
00173
00174 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00175 #endif
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187 #ifdef REGEX_MALLOC
00188
00189 #define REGEX_ALLOCATE malloc
00190 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00191
00192 #else
00193
00194
00195 #ifndef alloca
00196
00197
00198 #ifdef __GNUC__
00199 #define alloca __builtin_alloca
00200 #else
00201 #if HAVE_ALLOCA_H
00202 #include <alloca.h>
00203 #else
00204 #ifndef _AIX
00205 char *alloca ();
00206 #endif
00207 #endif
00208 #endif
00209
00210 #endif
00211
00212 #define REGEX_ALLOCATE alloca
00213
00214
00215 #define REGEX_REALLOCATE(source, osize, nsize) \
00216 (destination = (char *) alloca (nsize), \
00217 bcopy (source, destination, osize), \
00218 destination)
00219
00220 #endif
00221
00222
00223
00224
00225
00226 #define FIRST_STRING_P(ptr) \
00227 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00228
00229
00230 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00231 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00232 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00233
00234 #define BYTEWIDTH 8
00235
00236 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00237
00238 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00239 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00240
00241 typedef char boolean;
00242 #define false 0
00243 #define true 1
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254 typedef enum
00255 {
00256 no_op = 0,
00257
00258
00259 exactn = 1,
00260
00261
00262 anychar,
00263
00264
00265
00266
00267
00268
00269
00270 charset,
00271
00272
00273
00274 charset_not,
00275
00276
00277
00278
00279
00280
00281
00282
00283 start_memory,
00284
00285
00286
00287
00288
00289
00290
00291
00292 stop_memory,
00293
00294
00295
00296 duplicate,
00297
00298
00299 begline,
00300
00301
00302 endline,
00303
00304
00305
00306 begbuf,
00307
00308
00309 endbuf,
00310
00311
00312 jump,
00313
00314
00315 jump_past_alt,
00316
00317
00318
00319 on_failure_jump,
00320
00321
00322
00323 on_failure_keep_string_jump,
00324
00325
00326
00327 pop_failure_jump,
00328
00329
00330
00331
00332
00333
00334
00335
00336 maybe_pop_jump,
00337
00338
00339
00340
00341
00342
00343 dummy_failure_jump,
00344
00345
00346
00347 push_dummy_failure,
00348
00349
00350
00351 succeed_n,
00352
00353
00354
00355 jump_n,
00356
00357
00358
00359
00360 set_number_at,
00361
00362 wordchar,
00363 notwordchar,
00364
00365 wordbeg,
00366 wordend,
00367
00368 wordbound,
00369 notwordbound
00370
00371 #ifdef emacs
00372 ,before_dot,
00373 at_dot,
00374 after_dot,
00375
00376
00377
00378 syntaxspec,
00379
00380
00381 notsyntaxspec
00382 #endif
00383 } re_opcode_t;
00384
00385
00386
00387
00388
00389 #define STORE_NUMBER(destination, number) \
00390 do { \
00391 (destination)[0] = (number) & 0377; \
00392 (destination)[1] = (number) >> 8; \
00393 } while (0)
00394
00395
00396
00397
00398
00399 #define STORE_NUMBER_AND_INCR(destination, number) \
00400 do { \
00401 STORE_NUMBER (destination, number); \
00402 (destination) += 2; \
00403 } while (0)
00404
00405
00406
00407
00408 #define EXTRACT_NUMBER(destination, source) \
00409 do { \
00410 (destination) = *(source) & 0377; \
00411 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00412 } while (0)
00413
00414 #ifdef DEBUG
00415 static void
00416 extract_number (dest, source)
00417 int *dest;
00418 unsigned char *source;
00419 {
00420 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00421 *dest = *source & 0377;
00422 *dest += temp << 8;
00423 }
00424
00425 #ifndef EXTRACT_MACROS
00426 #undef EXTRACT_NUMBER
00427 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00428 #endif
00429
00430 #endif
00431
00432
00433
00434
00435 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
00436 do { \
00437 EXTRACT_NUMBER (destination, source); \
00438 (source) += 2; \
00439 } while (0)
00440
00441 #ifdef DEBUG
00442 static void
00443 extract_number_and_incr (destination, source)
00444 int *destination;
00445 unsigned char **source;
00446 {
00447 extract_number (destination, *source);
00448 *source += 2;
00449 }
00450
00451 #ifndef EXTRACT_MACROS
00452 #undef EXTRACT_NUMBER_AND_INCR
00453 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00454 extract_number_and_incr (&dest, &src)
00455 #endif
00456
00457 #endif
00458
00459
00460
00461
00462
00463
00464
00465 #ifdef DEBUG
00466
00467
00468 #include <stdio.h>
00469
00470
00471 #include <assert.h>
00472
00473 static int debug = 0;
00474
00475 #define DEBUG_STATEMENT(e) e
00476 #define DEBUG_PRINT1(x) if (debug) printf (x)
00477 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00478 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00479 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00480 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00481 if (debug) print_partial_compiled_pattern (s, e)
00482 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00483 if (debug) print_double_string (w, s1, sz1, s2, sz2)
00484
00485
00486 extern void printchar ();
00487
00488
00489
00490 void
00491 print_fastmap (fastmap)
00492 char *fastmap;
00493 {
00494 unsigned was_a_range = 0;
00495 unsigned i = 0;
00496
00497 while (i < (1 << BYTEWIDTH))
00498 {
00499 if (fastmap[i++])
00500 {
00501 was_a_range = 0;
00502 printchar (i - 1);
00503 while (i < (1 << BYTEWIDTH) && fastmap[i])
00504 {
00505 was_a_range = 1;
00506 i++;
00507 }
00508 if (was_a_range)
00509 {
00510 printf ("-");
00511 printchar (i - 1);
00512 }
00513 }
00514 }
00515 putchar ('\n');
00516 }
00517
00518
00519
00520
00521
00522 void
00523 print_partial_compiled_pattern (start, end)
00524 unsigned char *start;
00525 unsigned char *end;
00526 {
00527 int mcnt, mcnt2;
00528 unsigned char *p = start;
00529 unsigned char *pend = end;
00530
00531 if (start == NULL)
00532 {
00533 printf ("(null)\n");
00534 return;
00535 }
00536
00537
00538 while (p < pend)
00539 {
00540 switch ((re_opcode_t) *p++)
00541 {
00542 case no_op:
00543 printf ("/no_op");
00544 break;
00545
00546 case exactn:
00547 mcnt = *p++;
00548 printf ("/exactn/%d", mcnt);
00549 do
00550 {
00551 putchar ('/');
00552 printchar (*p++);
00553 }
00554 while (--mcnt);
00555 break;
00556
00557 case start_memory:
00558 mcnt = *p++;
00559 printf ("/start_memory/%d/%d", mcnt, *p++);
00560 break;
00561
00562 case stop_memory:
00563 mcnt = *p++;
00564 printf ("/stop_memory/%d/%d", mcnt, *p++);
00565 break;
00566
00567 case duplicate:
00568 printf ("/duplicate/%d", *p++);
00569 break;
00570
00571 case anychar:
00572 printf ("/anychar");
00573 break;
00574
00575 case charset:
00576 case charset_not:
00577 {
00578 register int c;
00579
00580 printf ("/charset%s",
00581 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
00582
00583 assert (p + *p < pend);
00584
00585 for (c = 0; c < *p; c++)
00586 {
00587 unsigned bit;
00588 unsigned char map_byte = p[1 + c];
00589
00590 putchar ('/');
00591
00592 for (bit = 0; bit < BYTEWIDTH; bit++)
00593 if (map_byte & (1 << bit))
00594 printchar (c * BYTEWIDTH + bit);
00595 }
00596 p += 1 + *p;
00597 break;
00598 }
00599
00600 case begline:
00601 printf ("/begline");
00602 break;
00603
00604 case endline:
00605 printf ("/endline");
00606 break;
00607
00608 case on_failure_jump:
00609 extract_number_and_incr (&mcnt, &p);
00610 printf ("/on_failure_jump/0/%d", mcnt);
00611 break;
00612
00613 case on_failure_keep_string_jump:
00614 extract_number_and_incr (&mcnt, &p);
00615 printf ("/on_failure_keep_string_jump/0/%d", mcnt);
00616 break;
00617
00618 case dummy_failure_jump:
00619 extract_number_and_incr (&mcnt, &p);
00620 printf ("/dummy_failure_jump/0/%d", mcnt);
00621 break;
00622
00623 case push_dummy_failure:
00624 printf ("/push_dummy_failure");
00625 break;
00626
00627 case maybe_pop_jump:
00628 extract_number_and_incr (&mcnt, &p);
00629 printf ("/maybe_pop_jump/0/%d", mcnt);
00630 break;
00631
00632 case pop_failure_jump:
00633 extract_number_and_incr (&mcnt, &p);
00634 printf ("/pop_failure_jump/0/%d", mcnt);
00635 break;
00636
00637 case jump_past_alt:
00638 extract_number_and_incr (&mcnt, &p);
00639 printf ("/jump_past_alt/0/%d", mcnt);
00640 break;
00641
00642 case jump:
00643 extract_number_and_incr (&mcnt, &p);
00644 printf ("/jump/0/%d", mcnt);
00645 break;
00646
00647 case succeed_n:
00648 extract_number_and_incr (&mcnt, &p);
00649 extract_number_and_incr (&mcnt2, &p);
00650 printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
00651 break;
00652
00653 case jump_n:
00654 extract_number_and_incr (&mcnt, &p);
00655 extract_number_and_incr (&mcnt2, &p);
00656 printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
00657 break;
00658
00659 case set_number_at:
00660 extract_number_and_incr (&mcnt, &p);
00661 extract_number_and_incr (&mcnt2, &p);
00662 printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
00663 break;
00664
00665 case wordbound:
00666 printf ("/wordbound");
00667 break;
00668
00669 case notwordbound:
00670 printf ("/notwordbound");
00671 break;
00672
00673 case wordbeg:
00674 printf ("/wordbeg");
00675 break;
00676
00677 case wordend:
00678 printf ("/wordend");
00679
00680 #ifdef emacs
00681 case before_dot:
00682 printf ("/before_dot");
00683 break;
00684
00685 case at_dot:
00686 printf ("/at_dot");
00687 break;
00688
00689 case after_dot:
00690 printf ("/after_dot");
00691 break;
00692
00693 case syntaxspec:
00694 printf ("/syntaxspec");
00695 mcnt = *p++;
00696 printf ("/%d", mcnt);
00697 break;
00698
00699 case notsyntaxspec:
00700 printf ("/notsyntaxspec");
00701 mcnt = *p++;
00702 printf ("/%d", mcnt);
00703 break;
00704 #endif
00705
00706 case wordchar:
00707 printf ("/wordchar");
00708 break;
00709
00710 case notwordchar:
00711 printf ("/notwordchar");
00712 break;
00713
00714 case begbuf:
00715 printf ("/begbuf");
00716 break;
00717
00718 case endbuf:
00719 printf ("/endbuf");
00720 break;
00721
00722 default:
00723 printf ("?%d", *(p-1));
00724 }
00725 }
00726 printf ("/\n");
00727 }
00728
00729
00730 void
00731 print_compiled_pattern (bufp)
00732 struct re_pattern_buffer *bufp;
00733 {
00734 unsigned char *buffer = bufp->buffer;
00735
00736 print_partial_compiled_pattern (buffer, buffer + bufp->used);
00737 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
00738
00739 if (bufp->fastmap_accurate && bufp->fastmap)
00740 {
00741 printf ("fastmap: ");
00742 print_fastmap (bufp->fastmap);
00743 }
00744
00745 printf ("re_nsub: %d\t", bufp->re_nsub);
00746 printf ("regs_alloc: %d\t", bufp->regs_allocated);
00747 printf ("can_be_null: %d\t", bufp->can_be_null);
00748 printf ("newline_anchor: %d\n", bufp->newline_anchor);
00749 printf ("no_sub: %d\t", bufp->no_sub);
00750 printf ("not_bol: %d\t", bufp->not_bol);
00751 printf ("not_eol: %d\t", bufp->not_eol);
00752 printf ("syntax: %d\n", bufp->syntax);
00753
00754 }
00755
00756
00757 void
00758 print_double_string (where, string1, size1, string2, size2)
00759 const char *where;
00760 const char *string1;
00761 const char *string2;
00762 int size1;
00763 int size2;
00764 {
00765 unsigned this_char;
00766
00767 if (where == NULL)
00768 printf ("(null)");
00769 else
00770 {
00771 if (FIRST_STRING_P (where))
00772 {
00773 for (this_char = where - string1; this_char < size1; this_char++)
00774 printchar (string1[this_char]);
00775
00776 where = string2;
00777 }
00778
00779 for (this_char = where - string2; this_char < size2; this_char++)
00780 printchar (string2[this_char]);
00781 }
00782 }
00783
00784 #else
00785
00786 #undef assert
00787 #define assert(e)
00788
00789 #define DEBUG_STATEMENT(e)
00790 #define DEBUG_PRINT1(x)
00791 #define DEBUG_PRINT2(x1, x2)
00792 #define DEBUG_PRINT3(x1, x2, x3)
00793 #define DEBUG_PRINT4(x1, x2, x3, x4)
00794 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00795 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00796
00797 #endif
00798
00799
00800
00801
00802 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812 reg_syntax_t
00813 re_set_syntax (syntax)
00814 reg_syntax_t syntax;
00815 {
00816 reg_syntax_t ret = re_syntax_options;
00817
00818 re_syntax_options = syntax;
00819 return ret;
00820 }
00821
00822
00823
00824
00825 static char *re_error_msg[] =
00826 { NULL,
00827 "No match",
00828 "Invalid regular expression",
00829 "Invalid collation character",
00830 "Invalid character class name",
00831 "Trailing backslash",
00832 "Invalid back reference",
00833 "Unmatched [ or [^",
00834 "Unmatched ( or \\(",
00835 "Unmatched \\{",
00836 "Invalid content of \\{\\}",
00837 "Invalid range end",
00838 "Memory exhausted",
00839 "Invalid preceding regular expression",
00840 "Premature end of regular expression",
00841 "Regular expression too big",
00842 "Unmatched ) or \\)",
00843 };
00844
00845
00846
00847 static void store_op1 (), store_op2 ();
00848 static void insert_op1 (), insert_op2 ();
00849 static boolean at_begline_loc_p (), at_endline_loc_p ();
00850 static boolean group_in_compile_stack ();
00851 static reg_errcode_t compile_range ();
00852
00853
00854
00855
00856
00857 #define PATFETCH(c) \
00858 do {if (p == pend) return REG_EEND; \
00859 c = (unsigned char) *p++; \
00860 if (translate) c = translate[c]; \
00861 } while (0)
00862
00863
00864
00865 #define PATFETCH_RAW(c) \
00866 do {if (p == pend) return REG_EEND; \
00867 c = (unsigned char) *p++; \
00868 } while (0)
00869
00870
00871 #define PATUNFETCH p--
00872
00873
00874
00875
00876
00877
00878 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
00879
00880
00881
00882
00883
00884 #define INIT_BUF_SIZE 32
00885
00886
00887 #define GET_BUFFER_SPACE(n) \
00888 while (b - bufp->buffer + (n) > bufp->allocated) \
00889 EXTEND_BUFFER ()
00890
00891
00892 #define BUF_PUSH(c) \
00893 do { \
00894 GET_BUFFER_SPACE (1); \
00895 *b++ = (unsigned char) (c); \
00896 } while (0)
00897
00898
00899
00900 #define BUF_PUSH_2(c1, c2) \
00901 do { \
00902 GET_BUFFER_SPACE (2); \
00903 *b++ = (unsigned char) (c1); \
00904 *b++ = (unsigned char) (c2); \
00905 } while (0)
00906
00907
00908
00909 #define BUF_PUSH_3(c1, c2, c3) \
00910 do { \
00911 GET_BUFFER_SPACE (3); \
00912 *b++ = (unsigned char) (c1); \
00913 *b++ = (unsigned char) (c2); \
00914 *b++ = (unsigned char) (c3); \
00915 } while (0)
00916
00917
00918
00919
00920 #define STORE_JUMP(op, loc, to) \
00921 store_op1 (op, loc, (to) - (loc) - 3)
00922
00923
00924 #define STORE_JUMP2(op, loc, to, arg) \
00925 store_op2 (op, loc, (to) - (loc) - 3, arg)
00926
00927
00928 #define INSERT_JUMP(op, loc, to) \
00929 insert_op1 (op, loc, (to) - (loc) - 3, b)
00930
00931
00932 #define INSERT_JUMP2(op, loc, to, arg) \
00933 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
00934
00935
00936
00937
00938
00939 #define MAX_BUF_SIZE (1L << 16)
00940
00941
00942
00943
00944
00945
00946 #define EXTEND_BUFFER() \
00947 do { \
00948 unsigned char *old_buffer = bufp->buffer; \
00949 if (bufp->allocated == MAX_BUF_SIZE) \
00950 return REG_ESIZE; \
00951 bufp->allocated <<= 1; \
00952 if (bufp->allocated > MAX_BUF_SIZE) \
00953 bufp->allocated = MAX_BUF_SIZE; \
00954 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
00955 if (bufp->buffer == NULL) \
00956 return REG_ESPACE; \
00957 \
00958 if (old_buffer != bufp->buffer) \
00959 { \
00960 b = (b - old_buffer) + bufp->buffer; \
00961 begalt = (begalt - old_buffer) + bufp->buffer; \
00962 if (fixup_alt_jump) \
00963 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
00964 if (laststart) \
00965 laststart = (laststart - old_buffer) + bufp->buffer; \
00966 if (pending_exact) \
00967 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
00968 } \
00969 } while (0)
00970
00971
00972
00973
00974
00975 #define MAX_REGNUM 255
00976
00977
00978
00979 typedef unsigned regnum_t;
00980
00981
00982
00983
00984
00985
00986 typedef int pattern_offset_t;
00987
00988 typedef struct
00989 {
00990 pattern_offset_t begalt_offset;
00991 pattern_offset_t fixup_alt_jump;
00992 pattern_offset_t inner_group_offset;
00993 pattern_offset_t laststart_offset;
00994 regnum_t regnum;
00995 } compile_stack_elt_t;
00996
00997
00998 typedef struct
00999 {
01000 compile_stack_elt_t *stack;
01001 unsigned size;
01002 unsigned avail;
01003 } compile_stack_type;
01004
01005
01006 #define INIT_COMPILE_STACK_SIZE 32
01007
01008 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
01009 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
01010
01011
01012 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01013
01014
01015
01016 #define SET_LIST_BIT(c) \
01017 (b[((unsigned char) (c)) / BYTEWIDTH] \
01018 |= 1 << (((unsigned char) c) % BYTEWIDTH))
01019
01020
01021
01022 #define GET_UNSIGNED_NUMBER(num) \
01023 { if (p != pend) \
01024 { \
01025 PATFETCH (c); \
01026 while (ISDIGIT (c)) \
01027 { \
01028 if (num < 0) \
01029 num = 0; \
01030 num = num * 10 + c - '0'; \
01031 if (p == pend) \
01032 break; \
01033 PATFETCH (c); \
01034 } \
01035 } \
01036 }
01037
01038 #define CHAR_CLASS_MAX_LENGTH 6
01039
01040 #define IS_CHAR_CLASS(string) \
01041 (STREQ (string, "alpha") || STREQ (string, "upper") \
01042 || STREQ (string, "lower") || STREQ (string, "digit") \
01043 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
01044 || STREQ (string, "space") || STREQ (string, "print") \
01045 || STREQ (string, "punct") || STREQ (string, "graph") \
01046 || STREQ (string, "cntrl") || STREQ (string, "blank"))
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066 static reg_errcode_t
01067 regex_compile (pattern, size, syntax, bufp)
01068 char *pattern;
01069 int size;
01070 reg_syntax_t syntax;
01071 struct re_pattern_buffer *bufp;
01072 {
01073
01074
01075
01076 register unsigned char c, c1;
01077
01078
01079 char *p1;
01080
01081
01082 register unsigned char *b;
01083
01084
01085 compile_stack_type compile_stack;
01086
01087
01088 char *p = pattern;
01089 char *pend = pattern + size;
01090
01091
01092 char *translate = bufp->translate;
01093
01094
01095
01096
01097
01098 unsigned char *pending_exact = 0;
01099
01100
01101
01102
01103 unsigned char *laststart = 0;
01104
01105
01106 unsigned char *begalt;
01107
01108
01109
01110 char *beg_interval;
01111
01112
01113
01114
01115 unsigned char *fixup_alt_jump = 0;
01116
01117
01118
01119
01120 regnum_t regnum = 0;
01121
01122 #ifdef DEBUG
01123 DEBUG_PRINT1 ("\nCompiling pattern: ");
01124 if (debug)
01125 {
01126 unsigned debug_count;
01127
01128 for (debug_count = 0; debug_count < size; debug_count++)
01129 printchar (pattern[debug_count]);
01130 putchar ('\n');
01131 }
01132 #endif
01133
01134
01135 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01136 if (compile_stack.stack == NULL)
01137 return REG_ESPACE;
01138
01139 compile_stack.size = INIT_COMPILE_STACK_SIZE;
01140 compile_stack.avail = 0;
01141
01142
01143 bufp->syntax = syntax;
01144 bufp->fastmap_accurate = 0;
01145 bufp->not_bol = bufp->not_eol = 0;
01146
01147
01148
01149
01150 bufp->used = 0;
01151
01152
01153 bufp->re_nsub = 0;
01154
01155 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01156
01157 init_syntax_once ();
01158 #endif
01159
01160 if (bufp->allocated == 0)
01161 {
01162 if (bufp->buffer)
01163 {
01164
01165
01166 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01167 }
01168 else
01169 {
01170 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01171 }
01172 if (!bufp->buffer) return REG_ESPACE;
01173
01174 bufp->allocated = INIT_BUF_SIZE;
01175 }
01176
01177 begalt = b = bufp->buffer;
01178
01179
01180 while (p != pend)
01181 {
01182 PATFETCH (c);
01183
01184 switch (c)
01185 {
01186 case '^':
01187 {
01188 if (
01189 p == pattern + 1
01190
01191 || syntax & RE_CONTEXT_INDEP_ANCHORS
01192
01193 || at_begline_loc_p (pattern, p, syntax))
01194 BUF_PUSH (begline);
01195 else
01196 goto normal_char;
01197 }
01198 break;
01199
01200
01201 case '$':
01202 {
01203 if (
01204 p == pend
01205
01206 || syntax & RE_CONTEXT_INDEP_ANCHORS
01207
01208 || at_endline_loc_p (p, pend, syntax))
01209 BUF_PUSH (endline);
01210 else
01211 goto normal_char;
01212 }
01213 break;
01214
01215
01216 case '+':
01217 case '?':
01218 if ((syntax & RE_BK_PLUS_QM)
01219 || (syntax & RE_LIMITED_OPS))
01220 goto normal_char;
01221 handle_plus:
01222 case '*':
01223
01224 if (!laststart)
01225 {
01226 if (syntax & RE_CONTEXT_INVALID_OPS)
01227 return REG_BADRPT;
01228 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01229 goto normal_char;
01230 }
01231
01232 {
01233
01234 boolean keep_string_p = false;
01235
01236
01237 char zero_times_ok = 0, many_times_ok = 0;
01238
01239
01240
01241
01242
01243
01244 for (;;)
01245 {
01246 zero_times_ok |= c != '+';
01247 many_times_ok |= c != '?';
01248
01249 if (p == pend)
01250 break;
01251
01252 PATFETCH (c);
01253
01254 if (c == '*'
01255 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01256 ;
01257
01258 else if (syntax & RE_BK_PLUS_QM && c == '\\')
01259 {
01260 if (p == pend) return REG_EESCAPE;
01261
01262 PATFETCH (c1);
01263 if (!(c1 == '+' || c1 == '?'))
01264 {
01265 PATUNFETCH;
01266 PATUNFETCH;
01267 break;
01268 }
01269
01270 c = c1;
01271 }
01272 else
01273 {
01274 PATUNFETCH;
01275 break;
01276 }
01277
01278
01279 }
01280
01281
01282
01283 if (!laststart)
01284 break;
01285
01286
01287
01288 if (many_times_ok)
01289 {
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299 assert (p - 1 > pattern);
01300
01301
01302 GET_BUFFER_SPACE (3);
01303
01304
01305
01306
01307
01308
01309 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
01310 && zero_times_ok
01311 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
01312 && !(syntax & RE_DOT_NEWLINE))
01313 {
01314 STORE_JUMP (jump, b, laststart);
01315 keep_string_p = true;
01316 }
01317 else
01318
01319 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
01320
01321
01322 b += 3;
01323 }
01324
01325