00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 static reg_errcode_t re_compile_internal _RE_ARGS((regex_t *preg, const char * pattern,
00022 int length, reg_syntax_t syntax));
00023 static void re_compile_fastmap_iter _RE_ARGS((regex_t *bufp,
00024 const re_dfastate_t *init_state,
00025 char *fastmap));
00026 static reg_errcode_t init_dfa _RE_ARGS((re_dfa_t *dfa, int pat_len));
00027 static reg_errcode_t init_word_char _RE_ARGS((re_dfa_t *dfa));
00028 #ifdef RE_ENABLE_I18N
00029 static void free_charset _RE_ARGS((re_charset_t *cset));
00030 #endif
00031 static void free_workarea_compile _RE_ARGS((regex_t *preg));
00032 static reg_errcode_t create_initial_state _RE_ARGS((re_dfa_t *dfa));
00033 static reg_errcode_t analyze _RE_ARGS((re_dfa_t *dfa));
00034 static reg_errcode_t analyze_tree _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
00035 static void calc_first _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
00036 static void calc_next _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
00037 static void calc_epsdest _RE_ARGS((re_dfa_t *dfa, bin_tree_t *node));
00038 static reg_errcode_t duplicate_node_closure _RE_ARGS((re_dfa_t *dfa, int top_org_node,
00039 int top_clone_node, int root_node,
00040 unsigned int constraint));
00041 static reg_errcode_t duplicate_node _RE_ARGS((int *new_idx, re_dfa_t *dfa, int org_idx,
00042 unsigned int constraint));
00043 static int search_duplicated_node _RE_ARGS((re_dfa_t *dfa, int org_node,
00044 unsigned int constraint));
00045 static reg_errcode_t calc_eclosure _RE_ARGS((re_dfa_t *dfa));
00046 static reg_errcode_t calc_eclosure_iter _RE_ARGS((re_node_set *new_set, re_dfa_t *dfa,
00047 int node, int root));
00048 static void calc_inveclosure _RE_ARGS((re_dfa_t *dfa));
00049 static int fetch_number _RE_ARGS((re_string_t *input, re_token_t *token,
00050 reg_syntax_t syntax));
00051 static re_token_t fetch_token _RE_ARGS((re_string_t *input, reg_syntax_t syntax));
00052 static int peek_token _RE_ARGS((re_token_t *token, re_string_t *input,
00053 reg_syntax_t syntax));
00054 static int peek_token_bracket _RE_ARGS((re_token_t *token, re_string_t *input,
00055 reg_syntax_t syntax));
00056 static bin_tree_t *parse _RE_ARGS((re_string_t *regexp, regex_t *preg,
00057 reg_syntax_t syntax, reg_errcode_t *err));
00058 static bin_tree_t *parse_reg_exp _RE_ARGS((re_string_t *regexp, regex_t *preg,
00059 re_token_t *token, reg_syntax_t syntax,
00060 int nest, reg_errcode_t *err));
00061 static bin_tree_t *parse_branch _RE_ARGS((re_string_t *regexp, regex_t *preg,
00062 re_token_t *token, reg_syntax_t syntax,
00063 int nest, reg_errcode_t *err));
00064 static bin_tree_t *parse_expression _RE_ARGS((re_string_t *regexp, regex_t *preg,
00065 re_token_t *token, reg_syntax_t syntax,
00066 int nest, reg_errcode_t *err));
00067 static bin_tree_t *parse_sub_exp _RE_ARGS((re_string_t *regexp, regex_t *preg,
00068 re_token_t *token, reg_syntax_t syntax,
00069 int nest, reg_errcode_t *err));
00070 static bin_tree_t *parse_dup_op _RE_ARGS((bin_tree_t *dup_elem, re_string_t *regexp,
00071 re_dfa_t *dfa, re_token_t *token,
00072 reg_syntax_t syntax, reg_errcode_t *err));
00073 static bin_tree_t *parse_bracket_exp _RE_ARGS((re_string_t *regexp, re_dfa_t *dfa,
00074 re_token_t *token, reg_syntax_t syntax,
00075 reg_errcode_t *err));
00076 static reg_errcode_t parse_bracket_element _RE_ARGS((bracket_elem_t *elem,
00077 re_string_t *regexp,
00078 re_token_t *token, int token_len,
00079 re_dfa_t *dfa,
00080 reg_syntax_t syntax));
00081 static reg_errcode_t parse_bracket_symbol _RE_ARGS((bracket_elem_t *elem,
00082 re_string_t *regexp,
00083 re_token_t *token));
00084 #ifndef _LIBC
00085 # ifdef RE_ENABLE_I18N
00086 static reg_errcode_t build_range_exp _RE_ARGS((re_bitset_ptr_t sbcset,
00087 re_charset_t *mbcset, int *range_alloc,
00088 bracket_elem_t *start_elem,
00089 bracket_elem_t *end_elem));
00090 static reg_errcode_t build_collating_symbol _RE_ARGS((re_bitset_ptr_t sbcset,
00091 re_charset_t *mbcset,
00092 int *coll_sym_alloc,
00093 const unsigned char *name));
00094 # else
00095 static reg_errcode_t build_range_exp _RE_ARGS((re_bitset_ptr_t sbcset,
00096 bracket_elem_t *start_elem,
00097 bracket_elem_t *end_elem));
00098 static reg_errcode_t build_collating_symbol _RE_ARGS((re_bitset_ptr_t sbcset,
00099 const unsigned char *name));
00100 # endif
00101 #endif
00102 #ifdef RE_ENABLE_I18N
00103 static reg_errcode_t build_equiv_class _RE_ARGS((re_bitset_ptr_t sbcset,
00104 re_charset_t *mbcset,
00105 int *equiv_class_alloc,
00106 const unsigned char *name));
00107 static reg_errcode_t build_charclass _RE_ARGS((RE_TRANSLATE_TYPE trans,
00108 re_bitset_ptr_t sbcset,
00109 re_charset_t *mbcset,
00110 int *char_class_alloc,
00111 const unsigned char *class_name,
00112 reg_syntax_t syntax));
00113 #else
00114 static reg_errcode_t build_equiv_class _RE_ARGS((re_bitset_ptr_t sbcset,
00115 const unsigned char *name));
00116 static reg_errcode_t build_charclass _RE_ARGS((RE_TRANSLATE_TYPE trans,
00117 re_bitset_ptr_t sbcset,
00118 const unsigned char *class_name,
00119 reg_syntax_t syntax));
00120 #endif
00121 static bin_tree_t *build_word_op _RE_ARGS((re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
00122 int not, reg_errcode_t *err));
00123 static void free_bin_tree _RE_ARGS((bin_tree_t *tree));
00124 static bin_tree_t *create_tree _RE_ARGS((bin_tree_t *left, bin_tree_t *right,
00125 re_token_type_t type, int index));
00126 static bin_tree_t *duplicate_tree _RE_ARGS((const bin_tree_t *src, re_dfa_t *dfa));
00127
00128
00129
00130
00131
00132
00133 const char __re_error_msgid[] attribute_hidden =
00134 {
00135 #define REG_NOERROR_IDX 0
00136 gettext_noop ("Success")
00137 "\0"
00138 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
00139 gettext_noop ("No match")
00140 "\0"
00141 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
00142 gettext_noop ("Invalid regular expression")
00143 "\0"
00144 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
00145 gettext_noop ("Invalid collation character")
00146 "\0"
00147 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
00148 gettext_noop ("Invalid character class name")
00149 "\0"
00150 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
00151 gettext_noop ("Trailing backslash")
00152 "\0"
00153 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
00154 gettext_noop ("Invalid back reference")
00155 "\0"
00156 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
00157 gettext_noop ("Unmatched [ or [^")
00158 "\0"
00159 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
00160 gettext_noop ("Unmatched ( or \\(")
00161 "\0"
00162 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
00163 gettext_noop ("Unmatched \\{")
00164 "\0"
00165 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
00166 gettext_noop ("Invalid content of \\{\\}")
00167 "\0"
00168 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
00169 gettext_noop ("Invalid range end")
00170 "\0"
00171 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
00172 gettext_noop ("Memory exhausted")
00173 "\0"
00174 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
00175 gettext_noop ("Invalid preceding regular expression")
00176 "\0"
00177 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
00178 gettext_noop ("Premature end of regular expression")
00179 "\0"
00180 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
00181 gettext_noop ("Regular expression too big")
00182 "\0"
00183 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
00184 gettext_noop ("Unmatched ) or \\)")
00185 };
00186
00187 const size_t __re_error_msgid_idx[] attribute_hidden =
00188 {
00189 REG_NOERROR_IDX,
00190 REG_NOMATCH_IDX,
00191 REG_BADPAT_IDX,
00192 REG_ECOLLATE_IDX,
00193 REG_ECTYPE_IDX,
00194 REG_EESCAPE_IDX,
00195 REG_ESUBREG_IDX,
00196 REG_EBRACK_IDX,
00197 REG_EPAREN_IDX,
00198 REG_EBRACE_IDX,
00199 REG_BADBR_IDX,
00200 REG_ERANGE_IDX,
00201 REG_ESPACE_IDX,
00202 REG_BADRPT_IDX,
00203 REG_EEND_IDX,
00204 REG_ESIZE_IDX,
00205 REG_ERPAREN_IDX
00206 };
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217 const char *
00218 re_compile_pattern (pattern, length, bufp)
00219 const char *pattern;
00220 size_t length;
00221 struct re_pattern_buffer *bufp;
00222 {
00223 reg_errcode_t ret;
00224
00225
00226
00227
00228 bufp->no_sub = 0;
00229
00230
00231 bufp->newline_anchor = 1;
00232
00233 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
00234
00235 if (!ret)
00236 return NULL;
00237 return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
00238 }
00239 #ifdef _LIBC
00240 weak_alias (__re_compile_pattern, re_compile_pattern)
00241 #endif
00242
00243
00244
00245
00246
00247
00248 reg_syntax_t re_syntax_options;
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 reg_syntax_t
00259 re_set_syntax (syntax)
00260 reg_syntax_t syntax;
00261 {
00262 reg_syntax_t ret = re_syntax_options;
00263
00264 re_syntax_options = syntax;
00265 #ifdef RE_ENABLE_I18N
00266 re_mb_cur_max = MB_CUR_MAX;
00267 #endif
00268 return ret;
00269 }
00270 #ifdef _LIBC
00271 weak_alias (__re_set_syntax, re_set_syntax)
00272 #endif
00273
00274 int
00275 re_compile_fastmap (bufp)
00276 struct re_pattern_buffer *bufp;
00277 {
00278 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
00279 char *fastmap = bufp->fastmap;
00280
00281 memset (fastmap, '\0', sizeof (char) * SBC_MAX);
00282 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
00283 if (dfa->init_state != dfa->init_state_word)
00284 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
00285 if (dfa->init_state != dfa->init_state_nl)
00286 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
00287 if (dfa->init_state != dfa->init_state_begbuf)
00288 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
00289 bufp->fastmap_accurate = 1;
00290 return 0;
00291 }
00292 #ifdef _LIBC
00293 weak_alias (__re_compile_fastmap, re_compile_fastmap)
00294 #endif
00295
00296 static inline void
00297 re_set_fastmap (char *fastmap, int icase, int ch)
00298 {
00299 fastmap[ch] = 1;
00300 if (icase)
00301 fastmap[tolower (ch)] = 1;
00302 }
00303
00304
00305
00306
00307 static void
00308 re_compile_fastmap_iter (bufp, init_state, fastmap)
00309 regex_t *bufp;
00310 const re_dfastate_t *init_state;
00311 char *fastmap;
00312 {
00313 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
00314 int node_cnt;
00315 #ifdef RE_ENABLE_I18N
00316 int icase = (re_mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
00317 #else
00318 int icase = (bufp->syntax & RE_ICASE);
00319 #endif
00320 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
00321 {
00322 int node = init_state->nodes.elems[node_cnt];
00323 re_token_type_t type = dfa->nodes[node].type;
00324
00325 if (type == CHARACTER)
00326 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
00327 else if (type == SIMPLE_BRACKET)
00328 {
00329 int i, j, ch;
00330 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
00331 for (j = 0; j < UINT_BITS; ++j, ++ch)
00332 if (dfa->nodes[node].opr.sbcset[i] & (1UL << j))
00333 re_set_fastmap (fastmap, icase, ch);
00334 }
00335 #ifdef RE_ENABLE_I18N
00336 else if (type == COMPLEX_BRACKET)
00337 {
00338 int i;
00339 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
00340 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
00341 || cset->nranges || cset->nchar_classes)
00342 {
00343 # ifdef _LIBC
00344 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
00345 {
00346
00347
00348
00349
00350
00351
00352 int j, ch;
00353 const int32_t *table = (const int32_t *)
00354 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
00355 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
00356 for (j = 0; j < UINT_BITS; ++j, ++ch)
00357 if (table[ch] < 0)
00358 re_set_fastmap (fastmap, icase, ch);
00359 }
00360 # else
00361 if (re_mb_cur_max > 1)
00362 for (i = 0; i < SBC_MAX; ++i)
00363 if (__btowc (i) == WEOF)
00364 re_set_fastmap (fastmap, icase, i);
00365 # endif
00366 }
00367 for (i = 0; i < cset->nmbchars; ++i)
00368 {
00369 char buf[256];
00370 mbstate_t state;
00371 memset (&state, '\0', sizeof (state));
00372 __wcrtomb (buf, cset->mbchars[i], &state);
00373 re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
00374 }
00375 }
00376 #endif
00377 else if (type == END_OF_RE || type == OP_PERIOD)
00378 {
00379 memset (fastmap, '\1', sizeof (char) * SBC_MAX);
00380 if (type == END_OF_RE)
00381 bufp->can_be_null = 1;
00382 return;
00383 }
00384 }
00385 }
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423 int
00424 regcomp (preg, pattern, cflags)
00425 regex_t *__restrict preg;
00426 const char *__restrict pattern;
00427 int cflags;
00428 {
00429 reg_errcode_t ret;
00430 reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
00431 : RE_SYNTAX_POSIX_BASIC);
00432
00433 preg->buffer = NULL;
00434 preg->allocated = 0;
00435 preg->used = 0;
00436
00437
00438 preg->fastmap = re_malloc (char, SBC_MAX);
00439 if (BE (preg->fastmap == NULL, 0))
00440 return REG_ESPACE;
00441
00442 syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
00443
00444
00445 if (cflags & REG_NEWLINE)
00446 {
00447 syntax &= ~RE_DOT_NEWLINE;
00448 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
00449
00450 preg->newline_anchor = 1;
00451 }
00452 else
00453 preg->newline_anchor = 0;
00454 preg->no_sub = !!(cflags & REG_NOSUB);
00455 preg->translate = NULL;
00456
00457 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
00458
00459
00460
00461 if (ret == REG_ERPAREN)
00462 ret = REG_EPAREN;
00463
00464
00465 if (BE (ret == REG_NOERROR, 1))
00466
00467
00468 (void) re_compile_fastmap (preg);
00469 else
00470 {
00471
00472 re_free (preg->fastmap);
00473 preg->fastmap = NULL;
00474 }
00475
00476 return (int) ret;
00477 }
00478 #ifdef _LIBC
00479 weak_alias (__regcomp, regcomp)
00480 #endif
00481
00482
00483
00484
00485 size_t
00486 regerror (errcode, preg, errbuf, errbuf_size)
00487 int errcode;
00488 const regex_t *preg;
00489 char *errbuf;
00490 size_t errbuf_size;
00491 {
00492 const char *msg;
00493 size_t msg_size;
00494
00495 if (BE (errcode < 0
00496 || errcode >= (int) (sizeof (__re_error_msgid_idx)
00497 / sizeof (__re_error_msgid_idx[0])), 0))
00498
00499
00500
00501
00502 abort ();
00503
00504 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
00505
00506 msg_size = strlen (msg) + 1;
00507
00508 if (BE (errbuf_size != 0, 1))
00509 {
00510 if (BE (msg_size > errbuf_size, 0))
00511 {
00512 memcpy (errbuf, msg, errbuf_size - 1);
00513 errbuf[errbuf_size - 1] = 0;
00514 }
00515 else
00516 memcpy (errbuf, msg, msg_size);
00517 }
00518
00519 return msg_size;
00520 }
00521 #ifdef _LIBC
00522 weak_alias (__regerror, regerror)
00523 #endif
00524
00525
00526 static void
00527 free_dfa_content (re_dfa_t *dfa)
00528 {
00529 int i, j;
00530
00531 re_free (dfa->subexps);
00532
00533 for (i = 0; i < dfa->nodes_len; ++i)
00534 {
00535 re_token_t *node = dfa->nodes + i;
00536 #ifdef RE_ENABLE_I18N
00537 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
00538 free_charset (node->opr.mbcset);
00539 else
00540 #endif
00541 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
00542 re_free (node->opr.sbcset);
00543 }
00544 re_free (dfa->nexts);
00545 for (i = 0; i < dfa->nodes_len; ++i)
00546 {
00547 if (dfa->eclosures != NULL)
00548 re_node_set_free (dfa->eclosures + i);
00549 if (dfa->inveclosures != NULL)
00550 re_node_set_free (dfa->inveclosures + i);
00551 if (dfa->edests != NULL)
00552 re_node_set_free (dfa->edests + i);
00553 }
00554 re_free (dfa->edests);
00555 re_free (dfa->eclosures);
00556 re_free (dfa->inveclosures);
00557 re_free (dfa->nodes);
00558
00559 for (i = 0; i <= dfa->state_hash_mask; ++i)
00560 {
00561 struct re_state_table_entry *entry = dfa->state_table + i;
00562 for (j = 0; j < entry->num; ++j)
00563 {
00564 re_dfastate_t *state = entry->array[j];
00565 free_state (state);
00566 }
00567 re_free (entry->array);
00568 }
00569 re_free (dfa->state_table);
00570
00571 if (dfa->word_char != NULL)
00572 re_free (dfa->word_char);
00573 #ifdef DEBUG
00574 re_free (dfa->re_str);
00575 #endif
00576
00577 re_free (dfa);
00578 }
00579
00580
00581
00582
00583 void
00584 regfree (preg)
00585 regex_t *preg;
00586 {
00587 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00588 if (BE (dfa != NULL, 1))
00589 free_dfa_content (dfa);
00590
00591 re_free (preg->fastmap);
00592 }
00593 #ifdef _LIBC
00594 weak_alias (__regfree, regfree)
00595 #endif
00596
00597
00598
00599
00600 #if defined _REGEX_RE_COMP || defined _LIBC
00601
00602
00603 static struct re_pattern_buffer re_comp_buf;
00604
00605 char *
00606 # ifdef _LIBC
00607
00608
00609
00610 weak_function
00611 # endif
00612 re_comp (s)
00613 const char *s;
00614 {
00615 reg_errcode_t ret;
00616 char *fastmap;
00617
00618 if (!s)
00619 {
00620 if (!re_comp_buf.buffer)
00621 return gettext ("No previous regular expression");
00622 return 0;
00623 }
00624
00625 if (re_comp_buf.buffer)
00626 {
00627 fastmap = re_comp_buf.fastmap;
00628 re_comp_buf.fastmap = NULL;
00629 __regfree (&re_comp_buf);
00630 memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
00631 re_comp_buf.fastmap = fastmap;
00632 }
00633
00634 if (re_comp_buf.fastmap == NULL)
00635 {
00636 re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
00637 if (re_comp_buf.fastmap == NULL)
00638 return (char *) gettext (__re_error_msgid
00639 + __re_error_msgid_idx[(int) REG_ESPACE]);
00640 }
00641
00642
00643
00644
00645
00646 re_comp_buf.newline_anchor = 1;
00647
00648 ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
00649
00650 if (!ret)
00651 return NULL;
00652
00653
00654 return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
00655 }
00656
00657 #ifdef _LIBC
00658 libc_freeres_fn (free_mem)
00659 {
00660 __regfree (&re_comp_buf);
00661 }
00662 #endif
00663
00664 #endif
00665
00666
00667
00668
00669
00670 static reg_errcode_t
00671 re_compile_internal (preg, pattern, length, syntax)
00672 regex_t *preg;
00673 const char * pattern;
00674 int length;
00675 reg_syntax_t syntax;
00676 {
00677 reg_errcode_t err = REG_NOERROR;
00678 re_dfa_t *dfa;
00679 re_string_t regexp;
00680
00681
00682 preg->fastmap_accurate = 0;
00683 preg->syntax = syntax;
00684 preg->not_bol = preg->not_eol = 0;
00685 preg->used = 0;
00686 preg->re_nsub = 0;
00687 preg->can_be_null = 0;
00688 preg->regs_allocated = REGS_UNALLOCATED;
00689
00690
00691 dfa = (re_dfa_t *) preg->buffer;
00692 if (preg->allocated < sizeof (re_dfa_t))
00693 {
00694
00695
00696
00697
00698 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
00699 if (dfa == NULL)
00700 return REG_ESPACE;
00701 preg->allocated = sizeof (re_dfa_t);
00702 }
00703 preg->buffer = (unsigned char *) dfa;
00704 preg->used = sizeof (re_dfa_t);
00705
00706 err = init_dfa (dfa, length);
00707 if (BE (err != REG_NOERROR, 0))
00708 {
00709 re_free (dfa);
00710 preg->buffer = NULL;
00711 preg->allocated = 0;
00712 return err;
00713 }
00714 #ifdef DEBUG
00715 dfa->re_str = re_malloc (char, length + 1);
00716 strncpy (dfa->re_str, pattern, length + 1);
00717 #endif
00718
00719 err = re_string_construct (®exp, pattern, length, preg->translate,
00720 syntax & RE_ICASE);
00721 if (BE (err != REG_NOERROR, 0))
00722 {
00723 re_free (dfa);
00724 preg->buffer = NULL;
00725 preg->allocated = 0;
00726 return err;
00727 }
00728
00729
00730 preg->re_nsub = 0;
00731 dfa->str_tree = parse (®exp, preg, syntax, &err);
00732 if (BE (dfa->str_tree == NULL, 0))
00733 goto re_compile_internal_free_return;
00734
00735
00736
00737 err = analyze (dfa);
00738 if (BE (err != REG_NOERROR, 0))
00739 goto re_compile_internal_free_return;
00740
00741
00742 err = create_initial_state (dfa);
00743
00744
00745 free_workarea_compile (preg);
00746 re_string_destruct (®exp);
00747
00748 if (BE (err != REG_NOERROR, 0))
00749 {
00750 re_compile_internal_free_return:
00751 free_dfa_content (dfa);
00752 preg->buffer = NULL;
00753 preg->allocated = 0;
00754 }
00755
00756 return err;
00757 }
00758
00759
00760
00761
00762 static reg_errcode_t
00763 init_dfa (dfa, pat_len)
00764 re_dfa_t *dfa;
00765 int pat_len;
00766 {
00767 int table_size;
00768
00769 memset (dfa, '\0', sizeof (re_dfa_t));
00770
00771 dfa->nodes_alloc = pat_len + 1;
00772 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
00773
00774 dfa->states_alloc = pat_len + 1;
00775
00776
00777 for (table_size = 1; table_size > 0; table_size <<= 1)
00778 if (table_size > pat_len)
00779 break;
00780
00781 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
00782 dfa->state_hash_mask = table_size - 1;
00783
00784 dfa->subexps_alloc = 1;
00785 dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
00786 dfa->word_char = NULL;
00787
00788 if (BE (dfa->nodes == NULL || dfa->state_table == NULL
00789 || dfa->subexps == NULL, 0))
00790 {
00791
00792
00793 dfa->subexps = NULL;
00794 dfa->state_table = NULL;
00795 dfa->nodes = NULL;
00796 return REG_ESPACE;
00797 }
00798 return REG_NOERROR;
00799 }
00800
00801
00802
00803
00804
00805 static reg_errcode_t
00806 init_word_char (dfa)
00807 re_dfa_t *dfa;
00808 {
00809 int i, j, ch;
00810 dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
00811 if (BE (dfa->word_char == NULL, 0))
00812 return REG_ESPACE;
00813 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
00814 for (j = 0; j < UINT_BITS; ++j, ++ch)
00815 if (isalnum (ch) || ch == '_')
00816 dfa->word_char[i] |= 1UL << j;
00817 return REG_NOERROR;
00818 }
00819
00820
00821
00822 static void
00823 free_workarea_compile (preg)
00824 regex_t *preg;
00825 {
00826 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00827 free_bin_tree (dfa->str_tree);
00828 dfa->str_tree = NULL;
00829 re_free (dfa->org_indices);
00830 dfa->org_indices = NULL;
00831 }
00832
00833
00834
00835 static reg_errcode_t
00836 create_initial_state (dfa)
00837 re_dfa_t *dfa;
00838 {
00839 int first, i;
00840 reg_errcode_t err;
00841 re_node_set init_nodes;
00842
00843
00844
00845 first = dfa->str_tree->first;
00846 dfa->init_node = first;
00847 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
00848 if (BE (err != REG_NOERROR, 0))
00849 return err;
00850
00851
00852
00853
00854
00855 if (dfa->nbackref > 0)
00856 for (i = 0; i < init_nodes.nelem; ++i)
00857 {
00858 int node_idx = init_nodes.elems[i];
00859 re_token_type_t type = dfa->nodes[node_idx].type;
00860
00861 int clexp_idx;
00862 if (type != OP_BACK_REF)
00863 continue;
00864 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
00865 {
00866 re_token_t *clexp_node;
00867 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
00868 if (clexp_node->type == OP_CLOSE_SUBEXP
00869 && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
00870 break;
00871 }
00872 if (clexp_idx == init_nodes.nelem)
00873 continue;
00874
00875 if (type == OP_BACK_REF)
00876 {
00877 int dest_idx = dfa->edests[node_idx].elems[0];
00878 if (!re_node_set_contains (&init_nodes, dest_idx))
00879 {
00880 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
00881 i = 0;
00882 }
00883 }
00884 }
00885
00886
00887 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
00888
00889 if (BE (dfa->init_state == NULL, 0))
00890 return err;
00891 if (dfa->init_state->has_constraint)
00892 {
00893 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
00894 CONTEXT_WORD);
00895 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
00896 CONTEXT_NEWLINE);
00897 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
00898 &init_nodes,
00899 CONTEXT_NEWLINE
00900 | CONTEXT_BEGBUF);
00901 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
00902 || dfa->init_state_begbuf == NULL, 0))
00903 return err;
00904 }
00905 else
00906 dfa->init_state_word = dfa->init_state_nl
00907 = dfa->init_state_begbuf = dfa->init_state;
00908
00909 re_node_set_free (&init_nodes);
00910 return REG_NOERROR;
00911 }
00912
00913
00914
00915
00916 static reg_errcode_t
00917 analyze (dfa)
00918 re_dfa_t *dfa;
00919 {
00920 int i;
00921 reg_errcode_t ret;
00922
00923
00924 dfa->nexts = re_malloc (int, dfa->nodes_alloc);
00925 dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
00926 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
00927 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
00928 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
00929 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
00930 || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
00931 return REG_ESPACE;
00932
00933 for (i = 0; i < dfa->nodes_len; ++i)
00934 {
00935 dfa->nexts[i] = -1;
00936 re_node_set_init_empty (dfa->edests + i);
00937 re_node_set_init_empty (dfa->eclosures + i);
00938 re_node_set_init_empty (dfa->inveclosures + i);
00939 }
00940
00941 ret = analyze_tree (dfa, dfa->str_tree);
00942 if (BE (ret == REG_NOERROR, 1))
00943 {
00944 ret = calc_eclosure (dfa);
00945 if (ret == REG_NOERROR)
00946 calc_inveclosure (dfa);
00947 }
00948 return ret;
00949 }
00950
00951
00952
00953
00954
00955 static reg_errcode_t
00956 analyze_tree (dfa, node)
00957 re_dfa_t *dfa;
00958 bin_tree_t *node;
00959 {
00960 reg_errcode_t ret;
00961 if (node->first == -1)
00962 calc_first (dfa, node);
00963 if (node->next == -1)
00964 calc_next (dfa, node);
00965 if (node->eclosure.nelem == 0)
00966 calc_epsdest (dfa, node);
00967
00968 if (node->left != NULL)
00969 {
00970 ret = analyze_tree (dfa, node->left);
00971 if (BE (ret != REG_NOERROR, 0))
00972 return ret;
00973 }
00974
00975 if (node->right != NULL)
00976 {
00977 ret = analyze_tree (dfa, node->right);
00978 if (BE (ret != REG_NOERROR, 0))
00979 return ret;
00980 }
00981 return REG_NOERROR;
00982 }
00983
00984
00985 static void
00986 calc_first (dfa, node)
00987 re_dfa_t *dfa;
00988 bin_tree_t *node;
00989 {
00990 int idx, type;
00991 idx = node->node_idx;
00992 type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
00993
00994 switch (type)
00995 {
00996 #ifdef DEBUG
00997 case OP_OPEN_BRACKET:
00998 case OP_CLOSE_BRACKET:
00999 case OP_OPEN_DUP_NUM:
01000 case OP_CLOSE_DUP_NUM:
01001 case OP_NON_MATCH_LIST:
01002 case OP_OPEN_COLL_ELEM:
01003 case OP_CLOSE_COLL_ELEM:
01004 case OP_OPEN_EQUIV_CLASS:
01005 case OP_CLOSE_EQUIV_CLASS:
01006 case OP_OPEN_CHAR_CLASS:
01007 case OP_CLOSE_CHAR_CLASS:
01008
01009 assert (0);
01010 #endif
01011 case END_OF_RE:
01012 case CHARACTER:
01013 case OP_PERIOD:
01014 case OP_DUP_ASTERISK:
01015 case OP_DUP_QUESTION:
01016 #ifdef RE_ENABLE_I18N
01017 case COMPLEX_BRACKET:
01018 #endif
01019 case SIMPLE_BRACKET:
01020 case OP_BACK_REF:
01021 case ANCHOR:
01022 case OP_OPEN_SUBEXP:
01023 case OP_CLOSE_SUBEXP:
01024 node->first = idx;
01025 break;
01026 case OP_DUP_PLUS:
01027 #ifdef DEBUG
01028 assert (node->left != NULL);
01029 #endif
01030 if (node->left->first == -1)
01031 calc_first (dfa, node->left);
01032 node->first = node->left->first;
01033 break;
01034 case OP_ALT:
01035 node->first = idx;
01036 break;
01037
01038 default:
01039 #ifdef DEBUG
01040 assert (node->left != NULL);
01041 #endif
01042 if (node->left->first == -1)
01043 calc_first (dfa, node->left);
01044 node->first = node->left->first;
01045 break;
01046 }
01047 }
01048
01049
01050
01051 static void
01052 calc_next (dfa, node)
01053 re_dfa_t *dfa;
01054 bin_tree_t *node;
01055 {
01056 int idx, type;
01057 bin_tree_t *parent = node->parent;
01058 if (parent == NULL)
01059 {
01060 node->next = -1;
01061 idx = node->node_idx;
01062 if (node->type == 0)
01063 dfa->nexts[idx] = node->next;
01064 return;
01065 }
01066
01067 idx = parent->node_idx;
01068 type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
01069
01070 switch (type)
01071 {
01072 case OP_DUP_ASTERISK:
01073 case OP_DUP_PLUS:
01074 node->next = idx;
01075 break;
01076 case CONCAT:
01077 if (parent->left == node)
01078 {
01079 if (parent->right->first == -1)
01080 calc_first (dfa, parent->right);
01081 node->next = parent->right->first;