00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 static reg_errcode_t match_ctx_init _RE_ARGS((re_match_context_t *cache, int eflags,
00022 re_string_t *input, int n));
00023 static void match_ctx_clean _RE_ARGS((re_match_context_t *mctx));
00024 static void match_ctx_free _RE_ARGS((re_match_context_t *cache));
00025 static void match_ctx_free_subtops _RE_ARGS((re_match_context_t *mctx));
00026 static reg_errcode_t match_ctx_add_entry _RE_ARGS((re_match_context_t *cache, int node,
00027 int str_idx, int from, int to));
00028 static int search_cur_bkref_entry _RE_ARGS((re_match_context_t *mctx, int str_idx));
00029 static void match_ctx_clear_flag _RE_ARGS((re_match_context_t *mctx));
00030 static reg_errcode_t match_ctx_add_subtop _RE_ARGS((re_match_context_t *mctx, int node,
00031 int str_idx));
00032 static re_sub_match_last_t * match_ctx_add_sublast _RE_ARGS((re_sub_match_top_t *subtop,
00033 int node, int str_idx));
00034 static void sift_ctx_init _RE_ARGS((re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
00035 re_dfastate_t **limited_sts, int last_node,
00036 int last_str_idx, int check_subexp));
00037 static reg_errcode_t re_search_internal _RE_ARGS((const regex_t *preg,
00038 const char *string, int length,
00039 int start, int range, int stop,
00040 size_t nmatch, regmatch_t pmatch[],
00041 int eflags));
00042 static int re_search_2_stub _RE_ARGS((struct re_pattern_buffer *bufp,
00043 const char *string1, int length1,
00044 const char *string2, int length2,
00045 int start, int range, struct re_registers *regs,
00046 int stop, int ret_len));
00047 static int re_search_stub _RE_ARGS((struct re_pattern_buffer *bufp,
00048 const char *string, int length, int start,
00049 int range, int stop, struct re_registers *regs,
00050 int ret_len));
00051 static unsigned re_copy_regs _RE_ARGS((struct re_registers *regs, regmatch_t *pmatch,
00052 int nregs, int regs_allocated));
00053 static inline re_dfastate_t *acquire_init_state_context _RE_ARGS((reg_errcode_t *err,
00054 const regex_t *preg,
00055 const re_match_context_t *mctx,
00056 int idx));
00057 static reg_errcode_t prune_impossible_nodes _RE_ARGS((const regex_t *preg,
00058 re_match_context_t *mctx));
00059 static int check_matching _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
00060 int fl_search, int fl_longest_match));
00061 static int check_halt_node_context _RE_ARGS((const re_dfa_t *dfa, int node,
00062 unsigned int context));
00063 static int check_halt_state_context _RE_ARGS((const regex_t *preg,
00064 const re_dfastate_t *state,
00065 const re_match_context_t *mctx, int idx));
00066 static void update_regs _RE_ARGS((re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
00067 int cur_idx, int nmatch));
00068 static int proceed_next_node _RE_ARGS((const regex_t *preg, int nregs, regmatch_t *regs,
00069 const re_match_context_t *mctx,
00070 int *pidx, int node, re_node_set *eps_via_nodes,
00071 struct re_fail_stack_t *fs));
00072 static reg_errcode_t push_fail_stack _RE_ARGS((struct re_fail_stack_t *fs,
00073 int str_idx, int *dests, int nregs,
00074 regmatch_t *regs,
00075 re_node_set *eps_via_nodes));
00076 static int pop_fail_stack _RE_ARGS((struct re_fail_stack_t *fs, int *pidx, int nregs,
00077 regmatch_t *regs, re_node_set *eps_via_nodes));
00078 static reg_errcode_t set_regs _RE_ARGS((const regex_t *preg,
00079 const re_match_context_t *mctx,
00080 size_t nmatch, regmatch_t *pmatch,
00081 int fl_backtrack));
00082 static reg_errcode_t free_fail_stack_return _RE_ARGS((struct re_fail_stack_t *fs));
00083
00084 #ifdef RE_ENABLE_I18N
00085 static int sift_states_iter_mb _RE_ARGS((const regex_t *preg,
00086 const re_match_context_t *mctx,
00087 re_sift_context_t *sctx,
00088 int node_idx, int str_idx, int max_str_idx));
00089 #endif
00090 static reg_errcode_t sift_states_backward _RE_ARGS((const regex_t *preg,
00091 re_match_context_t *mctx,
00092 re_sift_context_t *sctx));
00093 static reg_errcode_t update_cur_sifted_state _RE_ARGS((const regex_t *preg,
00094 re_match_context_t *mctx,
00095 re_sift_context_t *sctx,
00096 int str_idx,
00097 re_node_set *dest_nodes));
00098 static reg_errcode_t add_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa,
00099 re_node_set *dest_nodes,
00100 const re_node_set *candidates));
00101 static reg_errcode_t sub_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa, int node,
00102 re_node_set *dest_nodes,
00103 const re_node_set *and_nodes));
00104 static int check_dst_limits _RE_ARGS((re_dfa_t *dfa, re_node_set *limits,
00105 re_match_context_t *mctx, int dst_node,
00106 int dst_idx, int src_node, int src_idx));
00107 static int check_dst_limits_calc_pos _RE_ARGS((re_dfa_t *dfa, re_match_context_t *mctx,
00108 int limit, re_node_set *eclosures,
00109 int subexp_idx, int node, int str_idx));
00110 static reg_errcode_t check_subexp_limits _RE_ARGS((re_dfa_t *dfa,
00111 re_node_set *dest_nodes,
00112 const re_node_set *candidates,
00113 re_node_set *limits,
00114 struct re_backref_cache_entry *bkref_ents,
00115 int str_idx));
00116 static reg_errcode_t sift_states_bkref _RE_ARGS((const regex_t *preg,
00117 re_match_context_t *mctx,
00118 re_sift_context_t *sctx,
00119 int str_idx, re_node_set *dest_nodes));
00120 static reg_errcode_t clean_state_log_if_need _RE_ARGS((re_match_context_t *mctx,
00121 int next_state_log_idx));
00122 static reg_errcode_t merge_state_array _RE_ARGS((re_dfa_t *dfa, re_dfastate_t **dst,
00123 re_dfastate_t **src, int num));
00124 static re_dfastate_t *transit_state _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
00125 re_match_context_t *mctx,
00126 re_dfastate_t *state, int fl_search));
00127 static reg_errcode_t check_subexp_matching_top _RE_ARGS((re_dfa_t *dfa,
00128 re_match_context_t *mctx,
00129 re_node_set *cur_nodes,
00130 int str_idx));
00131 static re_dfastate_t *transit_state_sb _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
00132 re_dfastate_t *pstate,
00133 int fl_search,
00134 re_match_context_t *mctx));
00135 #ifdef RE_ENABLE_I18N
00136 static reg_errcode_t transit_state_mb _RE_ARGS((const regex_t *preg,
00137 re_dfastate_t *pstate,
00138 re_match_context_t *mctx));
00139 #endif
00140 static reg_errcode_t transit_state_bkref _RE_ARGS((const regex_t *preg,
00141 re_node_set *nodes,
00142 re_match_context_t *mctx));
00143 static reg_errcode_t get_subexp _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
00144 int bkref_node, int bkref_str_idx));
00145 static reg_errcode_t get_subexp_sub _RE_ARGS((const regex_t *preg,
00146 re_match_context_t *mctx,
00147 re_sub_match_top_t *sub_top,
00148 re_sub_match_last_t *sub_last,
00149 int bkref_node, int bkref_str));
00150 static int find_subexp_node _RE_ARGS((re_dfa_t *dfa, re_node_set *nodes,
00151 int subexp_idx, int fl_open));
00152 static reg_errcode_t check_arrival _RE_ARGS((const regex_t *preg,
00153 re_match_context_t *mctx,
00154 state_array_t *path, int top_node,
00155 int top_str, int last_node, int last_str,
00156 int fl_open));
00157 static reg_errcode_t check_arrival_add_next_nodes _RE_ARGS((const regex_t *preg,
00158 re_dfa_t *dfa,
00159 re_match_context_t *mctx,
00160 int str_idx,
00161 re_node_set *cur_nodes,
00162 re_node_set *next_nodes));
00163 static reg_errcode_t check_arrival_expand_ecl _RE_ARGS((re_dfa_t *dfa,
00164 re_node_set *cur_nodes,
00165 int ex_subexp, int fl_open));
00166 static reg_errcode_t check_arrival_expand_ecl_sub _RE_ARGS((re_dfa_t *dfa,
00167 re_node_set *dst_nodes,
00168 int target, int ex_subexp,
00169 int fl_open));
00170 static reg_errcode_t expand_bkref_cache _RE_ARGS((const regex_t *preg,
00171 re_match_context_t *mctx,
00172 re_node_set *cur_nodes, int cur_str,
00173 int last_str, int subexp_num,
00174 int fl_open));
00175 static re_dfastate_t **build_trtable _RE_ARGS((const regex_t *dfa,
00176 const re_dfastate_t *state,
00177 int fl_search));
00178 #ifdef RE_ENABLE_I18N
00179 static int check_node_accept_bytes _RE_ARGS((const regex_t *preg, int node_idx,
00180 const re_string_t *input, int idx));
00181 # ifdef _LIBC
00182 static unsigned int find_collation_sequence_value _RE_ARGS((const unsigned char *mbs,
00183 size_t name_len));
00184 # endif
00185 #endif
00186 static int group_nodes_into_DFAstates _RE_ARGS((const regex_t *dfa,
00187 const re_dfastate_t *state,
00188 re_node_set *states_node,
00189 bitset *states_ch));
00190 static int check_node_accept _RE_ARGS((const regex_t *preg, const re_token_t *node,
00191 const re_match_context_t *mctx, int idx));
00192 static reg_errcode_t extend_buffers _RE_ARGS((re_match_context_t *mctx));
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210 int
00211 regexec (preg, string, nmatch, pmatch, eflags)
00212 const regex_t *__restrict preg;
00213 const char *__restrict string;
00214 size_t nmatch;
00215 regmatch_t pmatch[];
00216 int eflags;
00217 {
00218 reg_errcode_t err;
00219 int length = strlen (string);
00220 if (preg->no_sub)
00221 err = re_search_internal (preg, string, length, 0, length, length, 0,
00222 NULL, eflags);
00223 else
00224 err = re_search_internal (preg, string, length, 0, length, length, nmatch,
00225 pmatch, eflags);
00226 return err != REG_NOERROR;
00227 }
00228 #ifdef _LIBC
00229 weak_alias (__regexec, regexec)
00230 #endif
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 int
00262 re_match (bufp, string, length, start, regs)
00263 struct re_pattern_buffer *bufp;
00264 const char *string;
00265 int length, start;
00266 struct re_registers *regs;
00267 {
00268 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
00269 }
00270 #ifdef _LIBC
00271 weak_alias (__re_match, re_match)
00272 #endif
00273
00274 int
00275 re_search (bufp, string, length, start, range, regs)
00276 struct re_pattern_buffer *bufp;
00277 const char *string;
00278 int length, start, range;
00279 struct re_registers *regs;
00280 {
00281 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
00282 }
00283 #ifdef _LIBC
00284 weak_alias (__re_search, re_search)
00285 #endif
00286
00287 int
00288 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
00289 struct re_pattern_buffer *bufp;
00290 const char *string1, *string2;
00291 int length1, length2, start, stop;
00292 struct re_registers *regs;
00293 {
00294 return re_search_2_stub (bufp, string1, length1, string2, length2,
00295 start, 0, regs, stop, 1);
00296 }
00297 #ifdef _LIBC
00298 weak_alias (__re_match_2, re_match_2)
00299 #endif
00300
00301 int
00302 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
00303 struct re_pattern_buffer *bufp;
00304 const char *string1, *string2;
00305 int length1, length2, start, range, stop;
00306 struct re_registers *regs;
00307 {
00308 return re_search_2_stub (bufp, string1, length1, string2, length2,
00309 start, range, regs, stop, 0);
00310 }
00311 #ifdef _LIBC
00312 weak_alias (__re_search_2, re_search_2)
00313 #endif
00314
00315 static int
00316 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
00317 stop, ret_len)
00318 struct re_pattern_buffer *bufp;
00319 const char *string1, *string2;
00320 int length1, length2, start, range, stop, ret_len;
00321 struct re_registers *regs;
00322 {
00323 const char *str;
00324 int rval;
00325 int len = length1 + length2;
00326 int free_str = 0;
00327
00328 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
00329 return -2;
00330
00331
00332 if (length2 > 0)
00333 if (length1 > 0)
00334 {
00335 char *s = re_malloc (char, len);
00336
00337 if (BE (s == NULL, 0))
00338 return -2;
00339 memcpy (s, string1, length1);
00340 memcpy (s + length1, string2, length2);
00341 str = s;
00342 free_str = 1;
00343 }
00344 else
00345 str = string2;
00346 else
00347 str = string1;
00348
00349 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
00350 ret_len);
00351 if (free_str)
00352 re_free ((char *) str);
00353 return rval;
00354 }
00355
00356
00357
00358
00359
00360
00361 static int
00362 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
00363 struct re_pattern_buffer *bufp;
00364 const char *string;
00365 int length, start, range, stop, ret_len;
00366 struct re_registers *regs;
00367 {
00368 reg_errcode_t result;
00369 regmatch_t *pmatch;
00370 int nregs, rval;
00371 int eflags = 0;
00372
00373
00374 if (BE (start < 0 || start > length, 0))
00375 return -1;
00376 if (BE (start + range > length, 0))
00377 range = length - start;
00378 else if (BE (start + range < 0, 0))
00379 range = -start;
00380
00381 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
00382 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
00383
00384
00385 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
00386 re_compile_fastmap (bufp);
00387
00388 if (BE (bufp->no_sub, 0))
00389 regs = NULL;
00390
00391
00392 if (regs == NULL)
00393 nregs = 1;
00394 else if (BE (bufp->regs_allocated == REGS_FIXED &&
00395 regs->num_regs < bufp->re_nsub + 1, 0))
00396 {
00397 nregs = regs->num_regs;
00398 if (BE (nregs < 1, 0))
00399 {
00400
00401 regs = NULL;
00402 nregs = 1;
00403 }
00404 }
00405 else
00406 nregs = bufp->re_nsub + 1;
00407 pmatch = re_malloc (regmatch_t, nregs);
00408 if (BE (pmatch == NULL, 0))
00409 return -2;
00410
00411 result = re_search_internal (bufp, string, length, start, range, stop,
00412 nregs, pmatch, eflags);
00413
00414 rval = 0;
00415
00416
00417 if (result != REG_NOERROR)
00418 rval = -1;
00419 else if (regs != NULL)
00420 {
00421
00422 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
00423 bufp->regs_allocated);
00424 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
00425 rval = -2;
00426 }
00427
00428 if (BE (rval == 0, 1))
00429 {
00430 if (ret_len)
00431 {
00432 assert (pmatch[0].rm_so == start);
00433 rval = pmatch[0].rm_eo - start;
00434 }
00435 else
00436 rval = pmatch[0].rm_so;
00437 }
00438 re_free (pmatch);
00439 return rval;
00440 }
00441
00442 static unsigned
00443 re_copy_regs (regs, pmatch, nregs, regs_allocated)
00444 struct re_registers *regs;
00445 regmatch_t *pmatch;
00446 int nregs, regs_allocated;
00447 {
00448 int rval = REGS_REALLOCATE;
00449 int i;
00450 int need_regs = nregs + 1;
00451
00452
00453
00454
00455 if (regs_allocated == REGS_UNALLOCATED)
00456 {
00457 regs->start = re_malloc (regoff_t, need_regs);
00458 if (BE (regs->start == NULL, 0))
00459 return REGS_UNALLOCATED;
00460 regs->end = re_malloc (regoff_t, need_regs);
00461 if (BE (regs->end == NULL, 0))
00462 {
00463 re_free (regs->start);
00464 return REGS_UNALLOCATED;
00465 }
00466 regs->num_regs = need_regs;
00467 }
00468 else if (regs_allocated == REGS_REALLOCATE)
00469 {
00470
00471
00472 if (need_regs > regs->num_regs)
00473 {
00474 regs->start = re_realloc (regs->start, regoff_t, need_regs);
00475 if (BE (regs->start == NULL, 0))
00476 {
00477 if (regs->end != NULL)
00478 re_free (regs->end);
00479 return REGS_UNALLOCATED;
00480 }
00481 regs->end = re_realloc (regs->end, regoff_t, need_regs);
00482 if (BE (regs->end == NULL, 0))
00483 {
00484 re_free (regs->start);
00485 return REGS_UNALLOCATED;
00486 }
00487 regs->num_regs = need_regs;
00488 }
00489 }
00490 else
00491 {
00492 assert (regs_allocated == REGS_FIXED);
00493
00494 assert (regs->num_regs >= nregs);
00495 rval = REGS_FIXED;
00496 }
00497
00498
00499 for (i = 0; i < nregs; ++i)
00500 {
00501 regs->start[i] = pmatch[i].rm_so;
00502 regs->end[i] = pmatch[i].rm_eo;
00503 }
00504 for ( ; i < regs->num_regs; ++i)
00505 regs->start[i] = regs->end[i] = -1;
00506
00507 return rval;
00508 }
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523 void
00524 re_set_registers (bufp, regs, num_regs, starts, ends)
00525 struct re_pattern_buffer *bufp;
00526 struct re_registers *regs;
00527 unsigned num_regs;
00528 regoff_t *starts, *ends;
00529 {
00530 if (num_regs)
00531 {
00532 bufp->regs_allocated = REGS_REALLOCATE;
00533 regs->num_regs = num_regs;
00534 regs->start = starts;
00535 regs->end = ends;
00536 }
00537 else
00538 {
00539 bufp->regs_allocated = REGS_UNALLOCATED;
00540 regs->num_regs = 0;
00541 regs->start = regs->end = (regoff_t *) 0;
00542 }
00543 }
00544 #ifdef _LIBC
00545 weak_alias (__re_set_registers, re_set_registers)
00546 #endif
00547
00548
00549
00550
00551 #if defined _REGEX_RE_COMP || defined _LIBC
00552 int
00553 # ifdef _LIBC
00554 weak_function
00555 # endif
00556 re_exec (s)
00557 const char *s;
00558 {
00559 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
00560 }
00561 #endif
00562
00563 static re_node_set empty_set;
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 static reg_errcode_t
00577 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
00578 eflags)
00579 const regex_t *preg;
00580 const char *string;
00581 int length, start, range, stop, eflags;
00582 size_t nmatch;
00583 regmatch_t pmatch[];
00584 {
00585 reg_errcode_t err;
00586 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
00587 re_string_t input;
00588 int left_lim, right_lim, incr;
00589 int fl_longest_match, match_first, match_last = -1;
00590 int fast_translate, sb;
00591 re_match_context_t mctx;
00592 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
00593 && range && !preg->can_be_null) ? preg->fastmap : NULL);
00594
00595
00596 if (BE (preg->used == 0 || dfa->init_state == NULL
00597 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
00598 || dfa->init_state_begbuf == NULL, 0))
00599 return REG_NOMATCH;
00600
00601 re_node_set_init_empty (&empty_set);
00602 memset (&mctx, '\0', sizeof (re_match_context_t));
00603
00604
00605 fl_longest_match = (nmatch != 0 || dfa->nbackref);
00606
00607 err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
00608 preg->translate, preg->syntax & RE_ICASE);
00609 if (BE (err != REG_NOERROR, 0))
00610 goto free_return;
00611 input.stop = stop;
00612
00613 err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
00614 if (BE (err != REG_NOERROR, 0))
00615 goto free_return;
00616
00617
00618
00619
00620
00621 if (nmatch > 1 || dfa->has_mb_node)
00622 {
00623 mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
00624 if (BE (mctx.state_log == NULL, 0))
00625 {
00626 err = REG_ESPACE;
00627 goto free_return;
00628 }
00629 }
00630 else
00631 mctx.state_log = NULL;
00632
00633 #ifdef DEBUG
00634
00635 assert (start + range >= 0 && start + range <= length);
00636 #endif
00637
00638 match_first = start;
00639 input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
00640 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
00641
00642
00643 incr = (range < 0) ? -1 : 1;
00644 left_lim = (range < 0) ? start + range : start;
00645 right_lim = (range < 0) ? start : start + range;
00646 #ifdef RE_ENABLE_I18N
00647 sb = re_mb_cur_max == 1;
00648 #else
00649 sb = 1;
00650 #endif
00651 fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
00652
00653 for (;;)
00654 {
00655
00656 if (fastmap)
00657 {
00658 if (BE (fast_translate, 1))
00659 {
00660 unsigned RE_TRANSLATE_TYPE t
00661 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
00662 if (BE (range >= 0, 1))
00663 {
00664 if (BE (t != NULL, 0))
00665 {
00666 while (BE (match_first < right_lim, 1)
00667 && !fastmap[t[(unsigned char) string[match_first]]])
00668 ++match_first;
00669 }
00670 else
00671 {
00672 while (BE (match_first < right_lim, 1)
00673 && !fastmap[(unsigned char) string[match_first]])
00674 ++match_first;
00675 }
00676 if (BE (match_first == right_lim, 0))
00677 {
00678 int ch = match_first >= length
00679 ? 0 : (unsigned char) string[match_first];
00680 if (!fastmap[t ? t[ch] : ch])
00681 break;
00682 }
00683 }
00684 else
00685 {
00686 while (match_first >= left_lim)
00687 {
00688 int ch = match_first >= length
00689 ? 0 : (unsigned char) string[match_first];
00690 if (fastmap[t ? t[ch] : ch])
00691 break;
00692 --match_first;
00693 }
00694 if (match_first < left_lim)
00695 break;
00696 }
00697 }
00698 else
00699 {
00700 int ch;
00701
00702 do
00703 {
00704
00705
00706
00707
00708
00709
00710 if (input.raw_mbs_idx + input.valid_len <= match_first
00711 || match_first < input.raw_mbs_idx)
00712 {
00713 err = re_string_reconstruct (&input, match_first, eflags,
00714 preg->newline_anchor);
00715 if (BE (err != REG_NOERROR, 0))
00716 goto free_return;
00717 }
00718
00719
00720 ch = ((match_first >= length) ? 0
00721 : re_string_byte_at (&input,
00722 match_first - input.raw_mbs_idx));
00723 if (fastmap[ch])
00724 break;
00725 match_first += incr;
00726 }
00727 while (match_first >= left_lim && match_first <= right_lim);
00728 if (! fastmap[ch])
00729 break;
00730 }
00731 }
00732
00733
00734
00735 err = re_string_reconstruct (&input, match_first, eflags,
00736 preg->newline_anchor);
00737 if (BE (err != REG_NOERROR, 0))
00738 goto free_return;
00739 #ifdef RE_ENABLE_I18N
00740
00741
00742 if (sb || re_string_first_byte (&input, 0))
00743 #endif
00744 {
00745
00746
00747 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
00748 match_last = check_matching (preg, &mctx, 0, fl_longest_match);
00749 if (match_last != -1)
00750 {
00751 if (BE (match_last == -2, 0))
00752 {
00753 err = REG_ESPACE;
00754 goto free_return;
00755 }
00756 else
00757 {
00758 mctx.match_last = match_last;
00759 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
00760 {
00761 re_dfastate_t *pstate = mctx.state_log[match_last];
00762 mctx.last_node = check_halt_state_context (preg, pstate,
00763 &mctx, match_last);
00764 }
00765 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
00766 || dfa->nbackref)
00767 {
00768 err = prune_impossible_nodes (preg, &mctx);
00769 if (err == REG_NOERROR)
00770 break;
00771 if (BE (err != REG_NOMATCH, 0))
00772 goto free_return;
00773 }
00774 else
00775 break;
00776 }
00777 }
00778 match_ctx_clean (&mctx);
00779 }
00780
00781 match_first += incr;
00782 if (match_first < left_lim || right_lim < match_first)
00783 break;
00784 }
00785
00786
00787 if (match_last != -1 && nmatch > 0)
00788 {
00789 int reg_idx;
00790
00791
00792 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00793 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
00794
00795
00796 pmatch[0].rm_so = 0;
00797 pmatch[0].rm_eo = mctx.match_last;
00798
00799 if (!preg->no_sub && nmatch > 1)
00800 {
00801 err = set_regs (preg, &mctx, nmatch, pmatch,
00802 dfa->has_plural_match && dfa->nbackref > 0);
00803 if (BE (err != REG_NOERROR, 0))
00804 goto free_return;
00805 }
00806
00807
00808
00809 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00810 if (pmatch[reg_idx].rm_so != -1)
00811 {
00812 pmatch[reg_idx].rm_so += match_first;
00813 pmatch[reg_idx].rm_eo += match_first;
00814 }
00815 }
00816 err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
00817 free_return:
00818 re_free (mctx.state_log);
00819 if (dfa->nbackref)
00820 match_ctx_free (&mctx);
00821 re_string_destruct (&input);
00822 return err;
00823 }
00824
00825 static reg_errcode_t
00826 prune_impossible_nodes (preg, mctx)
00827 const regex_t *preg;
00828 re_match_context_t *mctx;
00829 {
00830 int halt_node, match_last;
00831 reg_errcode_t ret;
00832 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
00833 re_dfastate_t **sifted_states;
00834 re_dfastate_t **lim_states = NULL;
00835 re_sift_context_t sctx;
00836 #ifdef DEBUG
00837 assert (mctx->state_log != NULL);
00838 #endif
00839 match_last = mctx->match_last;
00840 halt_node = mctx->last_node;
00841 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
00842 if (BE (sifted_states == NULL, 0))
00843 {
00844 ret = REG_ESPACE;
00845 goto free_return;
00846 }
00847 if (dfa->nbackref)
00848 {
00849 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
00850 if (BE (lim_states == NULL, 0))
00851 {
00852 ret = REG_ESPACE;
00853 goto free_return;
00854 }
00855 while (1)
00856 {
00857 memset (lim_states, '\0',
00858 sizeof (re_dfastate_t *) * (match_last + 1));
00859 match_ctx_clear_flag (mctx);
00860 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
00861 match_last, 0);
00862 ret = sift_states_backward (preg, mctx, &sctx);
00863 re_node_set_free (&sctx.limits);
00864 if (BE (ret != REG_NOERROR, 0))
00865 goto free_return;
00866 if (sifted_states[0] != NULL || lim_states[0] != NULL)
00867 break;
00868 do
00869 {
00870 --match_last;
00871 if (match_last < 0)
00872 {
00873 ret = REG_NOMATCH;
00874 goto free_return;
00875 }
00876 } while (!mctx->state_log[match_last]->halt);
00877 halt_node = check_halt_state_context (preg,
00878 mctx->state_log[match_last],
00879 mctx, match_last);
00880 }
00881 ret = merge_state_array (dfa, sifted_states, lim_states,
00882 match_last + 1);
00883 re_free (lim_states);
00884 lim_states = NULL;
00885 if (BE (ret != REG_NOERROR, 0))
00886 goto free_return;
00887 }
00888 else
00889 {
00890 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
00891 match_last, 0);
00892 ret = sift_states_backward (preg, mctx, &sctx);
00893 re_node_set_free (&sctx.limits);
00894 if (BE (ret != REG_NOERROR, 0))
00895 goto free_return;
00896 }
00897 re_free (mctx->state_log);
00898 mctx->state_log = sifted_states;
00899 sifted_states = NULL;
00900 mctx->last_node = halt_node;
00901 mctx->match_last = match_last;
00902 ret = REG_NOERROR;
00903 free_return:
00904 re_free (sifted_states);
00905 re_free (lim_states);
00906 return ret;
00907 }
00908
00909
00910
00911
00912
00913 static inline re_dfastate_t *
00914 acquire_init_state_context (err, preg, mctx, idx)
00915 reg_errcode_t *err;
00916 const regex_t *preg;
00917 const re_match_context_t *mctx;
00918 int idx;
00919 {
00920 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00921
00922 *err = REG_NOERROR;
00923 if (dfa->init_state->has_constraint)
00924 {
00925 unsigned int context;
00926 context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
00927 preg->newline_anchor);
00928 if (IS_WORD_CONTEXT (context))
00929 return dfa->init_state_word;
00930 else if (IS_ORDINARY_CONTEXT (context))
00931 return dfa->init_state;
00932 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
00933 return dfa->init_state_begbuf;
00934 else if (IS_NEWLINE_CONTEXT (context))
00935 return dfa->init_state_nl;
00936 else if (IS_BEGBUF_CONTEXT (context))
00937 {
00938
00939 return re_acquire_state_context (err, dfa,
00940 dfa->init_state->entrance_nodes,
00941 context);
00942 }
00943 else
00944
00945 return dfa->init_state;
00946 }
00947 else
00948 return dfa->init_state;
00949 }
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959 static int
00960 check_matching (preg, mctx, fl_search, fl_longest_match)
00961 const regex_t *preg;
00962 re_match_context_t *mctx;
00963 int fl_search, fl_longest_match;
00964 {
00965 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00966 reg_errcode_t err;
00967 int match = 0;
00968 int match_last = -1;
00969 int cur_str_idx = re_string_cur_idx (mctx->input);
00970 re_dfastate_t *cur_state;
00971
00972 cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
00973
00974 if (BE (cur_state == NULL, 0))
00975 return -2;
00976 if (mctx->state_log != NULL)
00977 mctx->state_log[cur_str_idx] = cur_state;
00978
00979
00980
00981 if (dfa->nbackref)
00982 {
00983 err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
00984 if (BE (err != REG_NOERROR, 0))
00985 return err;
00986 }
00987
00988 if (cur_state->has_backref)
00989 {
00990 err = transit_state_bkref (preg, &cur_state->nodes, mctx);
00991 if (BE (err != REG_NOERROR, 0))
00992 return err;
00993 }
00994
00995
00996 if (cur_state->halt)
00997 {
00998 if (!cur_state->has_constraint
00999 || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
01000 {
01001 if (!fl_longest_match)
01002 return cur_str_idx;
01003 else
01004 {
01005 match_last = cur_str_idx;
01006 match = 1;
01007 }
01008 }
01009 }
01010
01011 while (!re_string_eoi (mctx->input))
01012 {
01013 cur_state = transit_state (&err, preg, mctx, cur_state,
01014 fl_search && !match);
01015 if (cur_state == NULL)
01016 {
01017 cur_str_idx = re_string_cur_idx (mctx->input);
01018 if (BE (err != REG_NOERROR, 0))
01019 return -2;
01020 if (fl_search && !match)
01021 {
01022
01023
01024 #ifdef RE_ENABLE_I18N
01025 if (re_mb_cur_max == 1
01026 || re_string_first_byte (mctx->input, cur_str_idx))
01027 #endif
01028 cur_state = acquire_init_state_context (&err, preg, mctx,
01029 cur_str_idx);
01030 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
01031 return -2;
01032 if (mctx->state_log != NULL)
01033 mctx->state_log[cur_str_idx] = cur_state;
01034 }
01035 else if (!fl_longest_match && match)
01036 break;
01037 else
01038 {
01039 if (mctx->state_log == NULL)
01040 break;
01041 else
01042 {
01043 int max = mctx->state_log_top;
01044 for (; cur_str_idx <= max; ++cur_str_idx)
01045 if (mctx->