Main Page | Class List | Directories | File List | Class Members | File Members

regexec.c

Go to the documentation of this file.
00001 /* Extended regular expression matching and search library.
00002    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 static reg_errcode_t match_ctx_init _RE_ARGS((re_match_context_t *cache, int eflags,
00022                                      re_string_t *input, int n));
00023 static void match_ctx_clean _RE_ARGS((re_match_context_t *mctx));
00024 static void match_ctx_free _RE_ARGS((re_match_context_t *cache));
00025 static void match_ctx_free_subtops _RE_ARGS((re_match_context_t *mctx));
00026 static reg_errcode_t match_ctx_add_entry _RE_ARGS((re_match_context_t *cache, int node,
00027                                           int str_idx, int from, int to));
00028 static int search_cur_bkref_entry _RE_ARGS((re_match_context_t *mctx, int str_idx));
00029 static void match_ctx_clear_flag _RE_ARGS((re_match_context_t *mctx));
00030 static reg_errcode_t match_ctx_add_subtop _RE_ARGS((re_match_context_t *mctx, int node,
00031                                            int str_idx));
00032 static re_sub_match_last_t * match_ctx_add_sublast _RE_ARGS((re_sub_match_top_t *subtop,
00033                                                    int node, int str_idx));
00034 static void sift_ctx_init _RE_ARGS((re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
00035                            re_dfastate_t **limited_sts, int last_node,
00036                            int last_str_idx, int check_subexp));
00037 static reg_errcode_t re_search_internal _RE_ARGS((const regex_t *preg,
00038                                          const char *string, int length,
00039                                          int start, int range, int stop,
00040                                          size_t nmatch, regmatch_t pmatch[],
00041                                          int eflags));
00042 static int re_search_2_stub _RE_ARGS((struct re_pattern_buffer *bufp,
00043                              const char *string1, int length1,
00044                              const char *string2, int length2,
00045                              int start, int range, struct re_registers *regs,
00046                              int stop, int ret_len));
00047 static int re_search_stub _RE_ARGS((struct re_pattern_buffer *bufp,
00048                            const char *string, int length, int start,
00049                            int range, int stop, struct re_registers *regs,
00050                            int ret_len));
00051 static unsigned re_copy_regs _RE_ARGS((struct re_registers *regs, regmatch_t *pmatch,
00052                               int nregs, int regs_allocated));
00053 static inline re_dfastate_t *acquire_init_state_context _RE_ARGS((reg_errcode_t *err,
00054                                                          const regex_t *preg,
00055                                                          const re_match_context_t *mctx,
00056                                                          int idx));
00057 static reg_errcode_t prune_impossible_nodes _RE_ARGS((const regex_t *preg,
00058                                              re_match_context_t *mctx));
00059 static int check_matching _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
00060                            int fl_search, int fl_longest_match));
00061 static int check_halt_node_context _RE_ARGS((const re_dfa_t *dfa, int node,
00062                                     unsigned int context));
00063 static int check_halt_state_context _RE_ARGS((const regex_t *preg,
00064                                      const re_dfastate_t *state,
00065                                      const re_match_context_t *mctx, int idx));
00066 static void update_regs _RE_ARGS((re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
00067                          int cur_idx, int nmatch));
00068 static int proceed_next_node _RE_ARGS((const regex_t *preg, int nregs, regmatch_t *regs,
00069                               const re_match_context_t *mctx,
00070                               int *pidx, int node, re_node_set *eps_via_nodes,
00071                               struct re_fail_stack_t *fs));
00072 static reg_errcode_t push_fail_stack _RE_ARGS((struct re_fail_stack_t *fs, 
00073                                       int str_idx, int *dests, int nregs,
00074                                       regmatch_t *regs,
00075                                       re_node_set *eps_via_nodes));
00076 static int pop_fail_stack _RE_ARGS((struct re_fail_stack_t *fs, int *pidx, int nregs,
00077                            regmatch_t *regs, re_node_set *eps_via_nodes));
00078 static reg_errcode_t set_regs _RE_ARGS((const regex_t *preg,
00079                                const re_match_context_t *mctx,
00080                                size_t nmatch, regmatch_t *pmatch,
00081                                int fl_backtrack));
00082 static reg_errcode_t free_fail_stack_return _RE_ARGS((struct re_fail_stack_t *fs));
00083 
00084 #ifdef RE_ENABLE_I18N
00085 static int sift_states_iter_mb _RE_ARGS((const regex_t *preg,
00086                                 const re_match_context_t *mctx,
00087                                 re_sift_context_t *sctx,
00088                                 int node_idx, int str_idx, int max_str_idx));
00089 #endif /* RE_ENABLE_I18N */
00090 static reg_errcode_t sift_states_backward _RE_ARGS((const regex_t *preg,
00091                                            re_match_context_t *mctx,
00092                                            re_sift_context_t *sctx));
00093 static reg_errcode_t update_cur_sifted_state _RE_ARGS((const regex_t *preg,
00094                                               re_match_context_t *mctx,
00095                                               re_sift_context_t *sctx,
00096                                               int str_idx,
00097                                               re_node_set *dest_nodes));
00098 static reg_errcode_t add_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa,
00099                                             re_node_set *dest_nodes,
00100                                             const re_node_set *candidates));
00101 static reg_errcode_t sub_epsilon_src_nodes _RE_ARGS((re_dfa_t *dfa, int node,
00102                                             re_node_set *dest_nodes,
00103                                             const re_node_set *and_nodes));
00104 static int check_dst_limits _RE_ARGS((re_dfa_t *dfa, re_node_set *limits,
00105                              re_match_context_t *mctx, int dst_node,
00106                              int dst_idx, int src_node, int src_idx));
00107 static int check_dst_limits_calc_pos _RE_ARGS((re_dfa_t *dfa, re_match_context_t *mctx,
00108                                       int limit, re_node_set *eclosures,
00109                                       int subexp_idx, int node, int str_idx));
00110 static reg_errcode_t check_subexp_limits _RE_ARGS((re_dfa_t *dfa,
00111                                           re_node_set *dest_nodes,
00112                                           const re_node_set *candidates,
00113                                           re_node_set *limits,
00114                                           struct re_backref_cache_entry *bkref_ents,
00115                                           int str_idx));
00116 static reg_errcode_t sift_states_bkref _RE_ARGS((const regex_t *preg,
00117                                         re_match_context_t *mctx,
00118                                         re_sift_context_t *sctx,
00119                                         int str_idx, re_node_set *dest_nodes));
00120 static reg_errcode_t clean_state_log_if_need _RE_ARGS((re_match_context_t *mctx,
00121                                               int next_state_log_idx));
00122 static reg_errcode_t merge_state_array _RE_ARGS((re_dfa_t *dfa, re_dfastate_t **dst,
00123                                         re_dfastate_t **src, int num));
00124 static re_dfastate_t *transit_state _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
00125                                      re_match_context_t *mctx,
00126                                      re_dfastate_t *state, int fl_search));
00127 static reg_errcode_t check_subexp_matching_top _RE_ARGS((re_dfa_t *dfa,
00128                                                 re_match_context_t *mctx,
00129                                                 re_node_set *cur_nodes,
00130                                                 int str_idx));
00131 static re_dfastate_t *transit_state_sb _RE_ARGS((reg_errcode_t *err, const regex_t *preg,
00132                                         re_dfastate_t *pstate,
00133                                         int fl_search,
00134                                         re_match_context_t *mctx));
00135 #ifdef RE_ENABLE_I18N
00136 static reg_errcode_t transit_state_mb _RE_ARGS((const regex_t *preg,
00137                                        re_dfastate_t *pstate,
00138                                        re_match_context_t *mctx));
00139 #endif /* RE_ENABLE_I18N */
00140 static reg_errcode_t transit_state_bkref _RE_ARGS((const regex_t *preg,
00141                                           re_node_set *nodes,
00142                                           re_match_context_t *mctx));
00143 static reg_errcode_t get_subexp _RE_ARGS((const regex_t *preg, re_match_context_t *mctx,
00144                                  int bkref_node, int bkref_str_idx));
00145 static reg_errcode_t get_subexp_sub _RE_ARGS((const regex_t *preg,
00146                                      re_match_context_t *mctx,
00147                                      re_sub_match_top_t *sub_top,
00148                                      re_sub_match_last_t *sub_last,
00149                                      int bkref_node, int bkref_str));
00150 static int find_subexp_node _RE_ARGS((re_dfa_t *dfa, re_node_set *nodes,
00151                              int subexp_idx, int fl_open));
00152 static reg_errcode_t check_arrival _RE_ARGS((const regex_t *preg,
00153                                     re_match_context_t *mctx,
00154                                     state_array_t *path, int top_node,
00155                                     int top_str, int last_node, int last_str,
00156                                     int fl_open));
00157 static reg_errcode_t check_arrival_add_next_nodes _RE_ARGS((const regex_t *preg,
00158                                                    re_dfa_t *dfa,
00159                                                    re_match_context_t *mctx,
00160                                                    int str_idx,
00161                                                    re_node_set *cur_nodes,
00162                                                    re_node_set *next_nodes));
00163 static reg_errcode_t check_arrival_expand_ecl _RE_ARGS((re_dfa_t *dfa,
00164                                                re_node_set *cur_nodes,
00165                                                int ex_subexp, int fl_open));
00166 static reg_errcode_t check_arrival_expand_ecl_sub _RE_ARGS((re_dfa_t *dfa,
00167                                                    re_node_set *dst_nodes,
00168                                                    int target, int ex_subexp,
00169                                                    int fl_open));
00170 static reg_errcode_t expand_bkref_cache _RE_ARGS((const regex_t *preg,
00171                                          re_match_context_t *mctx,
00172                                          re_node_set *cur_nodes, int cur_str,
00173                                          int last_str, int subexp_num,
00174                                          int fl_open));
00175 static re_dfastate_t **build_trtable _RE_ARGS((const regex_t *dfa,
00176                                       const re_dfastate_t *state,
00177                                       int fl_search));
00178 #ifdef RE_ENABLE_I18N
00179 static int check_node_accept_bytes _RE_ARGS((const regex_t *preg, int node_idx,
00180                                     const re_string_t *input, int idx));
00181 # ifdef _LIBC
00182 static unsigned int find_collation_sequence_value _RE_ARGS((const unsigned char *mbs,
00183                                                    size_t name_len));
00184 # endif /* _LIBC */
00185 #endif /* RE_ENABLE_I18N */
00186 static int group_nodes_into_DFAstates _RE_ARGS((const regex_t *dfa,
00187                                        const re_dfastate_t *state,
00188                                        re_node_set *states_node,
00189                                        bitset *states_ch));
00190 static int check_node_accept _RE_ARGS((const regex_t *preg, const re_token_t *node,
00191                               const re_match_context_t *mctx, int idx));
00192 static reg_errcode_t extend_buffers _RE_ARGS((re_match_context_t *mctx));
00193 
00194 /* Entry point for POSIX code.  */
00195 
00196 /* regexec searches for a given pattern, specified by PREG, in the
00197    string STRING.
00198 
00199    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
00200    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
00201    least NMATCH elements, and we set them to the offsets of the
00202    corresponding matched substrings.
00203 
00204    EFLAGS specifies `execution flags' which affect matching: if
00205    REG_NOTBOL is set, then ^ does not match at the beginning of the
00206    string; if REG_NOTEOL is set, then $ does not match at the end.
00207 
00208    We return 0 if we find a match and REG_NOMATCH if not.  */
00209 
00210 int
00211 regexec (preg, string, nmatch, pmatch, eflags)
00212     const regex_t *__restrict preg;
00213     const char *__restrict string;
00214     size_t nmatch;
00215     regmatch_t pmatch[];
00216     int eflags;
00217 {
00218   reg_errcode_t err;
00219   int length = strlen (string);
00220   if (preg->no_sub)
00221     err = re_search_internal (preg, string, length, 0, length, length, 0,
00222                               NULL, eflags);
00223   else
00224     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
00225                               pmatch, eflags);
00226   return err != REG_NOERROR;
00227 }
00228 #ifdef _LIBC
00229 weak_alias (__regexec, regexec)
00230 #endif
00231 
00232 /* Entry points for GNU code.  */
00233 
00234 /* re_match, re_search, re_match_2, re_search_2
00235 
00236    The former two functions operate on STRING with length LENGTH,
00237    while the later two operate on concatenation of STRING1 and STRING2
00238    with lengths LENGTH1 and LENGTH2, respectively.
00239 
00240    re_match() matches the compiled pattern in BUFP against the string,
00241    starting at index START.
00242 
00243    re_search() first tries matching at index START, then it tries to match
00244    starting from index START + 1, and so on.  The last start position tried
00245    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
00246    way as re_match().)
00247 
00248    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
00249    the first STOP characters of the concatenation of the strings should be
00250    concerned.
00251 
00252    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
00253    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
00254    computed relative to the concatenation, not relative to the individual
00255    strings.)
00256 
00257    On success, re_match* functions return the length of the match, re_search*
00258    return the position of the start of the match.  Return value -1 means no
00259    match was found and -2 indicates an internal error.  */
00260 
00261 int
00262 re_match (bufp, string, length, start, regs)
00263     struct re_pattern_buffer *bufp;
00264     const char *string;
00265     int length, start;
00266     struct re_registers *regs;
00267 {
00268   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
00269 }
00270 #ifdef _LIBC
00271 weak_alias (__re_match, re_match)
00272 #endif
00273 
00274 int
00275 re_search (bufp, string, length, start, range, regs)
00276     struct re_pattern_buffer *bufp;
00277     const char *string;
00278     int length, start, range;
00279     struct re_registers *regs;
00280 {
00281   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
00282 }
00283 #ifdef _LIBC
00284 weak_alias (__re_search, re_search)
00285 #endif
00286 
00287 int
00288 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
00289     struct re_pattern_buffer *bufp;
00290     const char *string1, *string2;
00291     int length1, length2, start, stop;
00292     struct re_registers *regs;
00293 {
00294   return re_search_2_stub (bufp, string1, length1, string2, length2,
00295                            start, 0, regs, stop, 1);
00296 }
00297 #ifdef _LIBC
00298 weak_alias (__re_match_2, re_match_2)
00299 #endif
00300 
00301 int
00302 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
00303     struct re_pattern_buffer *bufp;
00304     const char *string1, *string2;
00305     int length1, length2, start, range, stop;
00306     struct re_registers *regs;
00307 {
00308   return re_search_2_stub (bufp, string1, length1, string2, length2,
00309                            start, range, regs, stop, 0);
00310 }
00311 #ifdef _LIBC
00312 weak_alias (__re_search_2, re_search_2)
00313 #endif
00314 
00315 static int
00316 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
00317                   stop, ret_len)
00318     struct re_pattern_buffer *bufp;
00319     const char *string1, *string2;
00320     int length1, length2, start, range, stop, ret_len;
00321     struct re_registers *regs;
00322 {
00323   const char *str;
00324   int rval;
00325   int len = length1 + length2;
00326   int free_str = 0;
00327 
00328   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
00329     return -2;
00330 
00331   /* Concatenate the strings.  */
00332   if (length2 > 0)
00333     if (length1 > 0)
00334       {
00335         char *s = re_malloc (char, len);
00336 
00337         if (BE (s == NULL, 0))
00338           return -2;
00339         memcpy (s, string1, length1);
00340         memcpy (s + length1, string2, length2);
00341         str = s;
00342         free_str = 1;
00343       }
00344     else
00345       str = string2;
00346   else
00347     str = string1;
00348 
00349   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
00350                          ret_len);
00351   if (free_str)
00352     re_free ((char *) str);
00353   return rval;
00354 }
00355 
00356 /* The parameters have the same meaning as those of re_search.
00357    Additional parameters:
00358    If RET_LEN is nonzero the length of the match is returned (re_match style);
00359    otherwise the position of the match is returned.  */
00360 
00361 static int
00362 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
00363     struct re_pattern_buffer *bufp;
00364     const char *string;
00365     int length, start, range, stop, ret_len;
00366     struct re_registers *regs;
00367 {
00368   reg_errcode_t result;
00369   regmatch_t *pmatch;
00370   int nregs, rval;
00371   int eflags = 0;
00372 
00373   /* Check for out-of-range.  */
00374   if (BE (start < 0 || start > length, 0))
00375     return -1;
00376   if (BE (start + range > length, 0))
00377     range = length - start;
00378   else if (BE (start + range < 0, 0))
00379     range = -start;
00380 
00381   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
00382   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
00383 
00384   /* Compile fastmap if we haven't yet.  */
00385   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
00386     re_compile_fastmap (bufp);
00387 
00388   if (BE (bufp->no_sub, 0))
00389     regs = NULL;
00390 
00391   /* We need at least 1 register.  */
00392   if (regs == NULL)
00393     nregs = 1;
00394   else if (BE (bufp->regs_allocated == REGS_FIXED &&
00395                regs->num_regs < bufp->re_nsub + 1, 0))
00396     {
00397       nregs = regs->num_regs;
00398       if (BE (nregs < 1, 0))
00399         {
00400           /* Nothing can be copied to regs.  */
00401           regs = NULL;
00402           nregs = 1;
00403         }
00404     }
00405   else
00406     nregs = bufp->re_nsub + 1;
00407   pmatch = re_malloc (regmatch_t, nregs);
00408   if (BE (pmatch == NULL, 0))
00409     return -2;
00410 
00411   result = re_search_internal (bufp, string, length, start, range, stop,
00412                                nregs, pmatch, eflags);
00413 
00414   rval = 0;
00415 
00416   /* I hope we needn't fill ther regs with -1's when no match was found.  */
00417   if (result != REG_NOERROR)
00418     rval = -1;
00419   else if (regs != NULL)
00420     {
00421       /* If caller wants register contents data back, copy them.  */
00422       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
00423                                            bufp->regs_allocated);
00424       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
00425         rval = -2;
00426     }
00427 
00428   if (BE (rval == 0, 1))
00429     {
00430       if (ret_len)
00431         {
00432           assert (pmatch[0].rm_so == start);
00433           rval = pmatch[0].rm_eo - start;
00434         }
00435       else
00436         rval = pmatch[0].rm_so;
00437     }
00438   re_free (pmatch);
00439   return rval;
00440 }
00441 
00442 static unsigned
00443 re_copy_regs (regs, pmatch, nregs, regs_allocated)
00444     struct re_registers *regs;
00445     regmatch_t *pmatch;
00446     int nregs, regs_allocated;
00447 {
00448   int rval = REGS_REALLOCATE;
00449   int i;
00450   int need_regs = nregs + 1;
00451   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
00452      uses.  */
00453 
00454   /* Have the register data arrays been allocated?  */
00455   if (regs_allocated == REGS_UNALLOCATED)
00456     { /* No.  So allocate them with malloc.  */
00457       regs->start = re_malloc (regoff_t, need_regs);
00458       if (BE (regs->start == NULL, 0))
00459         return REGS_UNALLOCATED;
00460       regs->end = re_malloc (regoff_t, need_regs);
00461       if (BE (regs->end == NULL, 0))
00462         {
00463           re_free (regs->start);
00464           return REGS_UNALLOCATED;
00465         }
00466       regs->num_regs = need_regs;
00467     }
00468   else if (regs_allocated == REGS_REALLOCATE)
00469     { /* Yes.  If we need more elements than were already
00470          allocated, reallocate them.  If we need fewer, just
00471          leave it alone.  */
00472       if (need_regs > regs->num_regs)
00473         {
00474           regs->start = re_realloc (regs->start, regoff_t, need_regs);
00475           if (BE (regs->start == NULL, 0))
00476             {
00477               if (regs->end != NULL)
00478                 re_free (regs->end);
00479               return REGS_UNALLOCATED;
00480             }
00481           regs->end = re_realloc (regs->end, regoff_t, need_regs);
00482           if (BE (regs->end == NULL, 0))
00483             {
00484               re_free (regs->start);
00485               return REGS_UNALLOCATED;
00486             }
00487           regs->num_regs = need_regs;
00488         }
00489     }
00490   else
00491     {
00492       assert (regs_allocated == REGS_FIXED);
00493       /* This function may not be called with REGS_FIXED and nregs too big.  */
00494       assert (regs->num_regs >= nregs);
00495       rval = REGS_FIXED;
00496     }
00497 
00498   /* Copy the regs.  */
00499   for (i = 0; i < nregs; ++i)
00500     {
00501       regs->start[i] = pmatch[i].rm_so;
00502       regs->end[i] = pmatch[i].rm_eo;
00503     }
00504   for ( ; i < regs->num_regs; ++i)
00505     regs->start[i] = regs->end[i] = -1;
00506 
00507   return rval;
00508 }
00509 
00510 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
00511    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
00512    this memory for recording register information.  STARTS and ENDS
00513    must be allocated using the malloc library routine, and must each
00514    be at least NUM_REGS * sizeof (regoff_t) bytes long.
00515 
00516    If NUM_REGS == 0, then subsequent matches should allocate their own
00517    register data.
00518 
00519    Unless this function is called, the first search or match using
00520    PATTERN_BUFFER will allocate its own register data, without
00521    freeing the old data.  */
00522 
00523 void
00524 re_set_registers (bufp, regs, num_regs, starts, ends)
00525     struct re_pattern_buffer *bufp;
00526     struct re_registers *regs;
00527     unsigned num_regs;
00528     regoff_t *starts, *ends;
00529 {
00530   if (num_regs)
00531     {
00532       bufp->regs_allocated = REGS_REALLOCATE;
00533       regs->num_regs = num_regs;
00534       regs->start = starts;
00535       regs->end = ends;
00536     }
00537   else
00538     {
00539       bufp->regs_allocated = REGS_UNALLOCATED;
00540       regs->num_regs = 0;
00541       regs->start = regs->end = (regoff_t *) 0;
00542     }
00543 }
00544 #ifdef _LIBC
00545 weak_alias (__re_set_registers, re_set_registers)
00546 #endif
00547 
00548 /* Entry points compatible with 4.2 BSD regex library.  We don't define
00549    them unless specifically requested.  */
00550 
00551 #if defined _REGEX_RE_COMP || defined _LIBC
00552 int
00553 # ifdef _LIBC
00554 weak_function
00555 # endif
00556 re_exec (s)
00557      const char *s;
00558 {
00559   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
00560 }
00561 #endif /* _REGEX_RE_COMP */
00562 
00563 static re_node_set empty_set;
00564 
00565 /* Internal entry point.  */
00566 
00567 /* Searches for a compiled pattern PREG in the string STRING, whose
00568    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
00569    mingings with regexec.  START, and RANGE have the same meanings
00570    with re_search.
00571    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
00572    otherwise return the error code.
00573    Note: We assume front end functions already check ranges.
00574    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
00575 
00576 static reg_errcode_t
00577 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
00578                     eflags)
00579     const regex_t *preg;
00580     const char *string;
00581     int length, start, range, stop, eflags;
00582     size_t nmatch;
00583     regmatch_t pmatch[];
00584 {
00585   reg_errcode_t err;
00586   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
00587   re_string_t input;
00588   int left_lim, right_lim, incr;
00589   int fl_longest_match, match_first, match_last = -1;
00590   int fast_translate, sb;
00591   re_match_context_t mctx;
00592   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
00593                     && range && !preg->can_be_null) ? preg->fastmap : NULL);
00594 
00595   /* Check if the DFA haven't been compiled.  */
00596   if (BE (preg->used == 0 || dfa->init_state == NULL
00597           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
00598           || dfa->init_state_begbuf == NULL, 0))
00599     return REG_NOMATCH;
00600 
00601   re_node_set_init_empty (&empty_set);
00602   memset (&mctx, '\0', sizeof (re_match_context_t));
00603 
00604   /* We must check the longest matching, if nmatch > 0.  */
00605   fl_longest_match = (nmatch != 0 || dfa->nbackref);
00606 
00607   err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
00608                             preg->translate, preg->syntax & RE_ICASE);
00609   if (BE (err != REG_NOERROR, 0))
00610     goto free_return;
00611   input.stop = stop;
00612 
00613   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
00614   if (BE (err != REG_NOERROR, 0))
00615     goto free_return;
00616 
00617   /* We will log all the DFA states through which the dfa pass,
00618      if nmatch > 1, or this dfa has "multibyte node", which is a
00619      back-reference or a node which can accept multibyte character or
00620      multi character collating element.  */
00621   if (nmatch > 1 || dfa->has_mb_node)
00622     {
00623       mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
00624       if (BE (mctx.state_log == NULL, 0))
00625         {
00626           err = REG_ESPACE;
00627           goto free_return;
00628         }
00629     }
00630   else
00631     mctx.state_log = NULL;
00632 
00633 #ifdef DEBUG
00634   /* We assume front-end functions already check them.  */
00635   assert (start + range >= 0 && start + range <= length);
00636 #endif
00637 
00638   match_first = start;
00639   input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
00640                        : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
00641 
00642   /* Check incrementally whether of not the input string match.  */
00643   incr = (range < 0) ? -1 : 1;
00644   left_lim = (range < 0) ? start + range : start;
00645   right_lim = (range < 0) ? start : start + range;
00646 #ifdef RE_ENABLE_I18N
00647   sb = re_mb_cur_max == 1;
00648 #else
00649   sb = 1;
00650 #endif
00651   fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
00652 
00653   for (;;)
00654     {
00655       /* At first get the current byte from input string.  */
00656       if (fastmap)
00657         {
00658           if (BE (fast_translate, 1))
00659             {
00660               unsigned RE_TRANSLATE_TYPE t
00661                 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
00662               if (BE (range >= 0, 1))
00663                 {
00664                   if (BE (t != NULL, 0))
00665                     {
00666                       while (BE (match_first < right_lim, 1)
00667                              && !fastmap[t[(unsigned char) string[match_first]]])
00668                         ++match_first;
00669                     }
00670                   else
00671                     {
00672                       while (BE (match_first < right_lim, 1)
00673                              && !fastmap[(unsigned char) string[match_first]])
00674                         ++match_first;
00675                     }
00676                   if (BE (match_first == right_lim, 0))
00677                     {
00678                       int ch = match_first >= length
00679                                ? 0 : (unsigned char) string[match_first];
00680                       if (!fastmap[t ? t[ch] : ch])
00681                         break;
00682                     }
00683                 }
00684               else
00685                 {
00686                   while (match_first >= left_lim)
00687                     {
00688                       int ch = match_first >= length
00689                                ? 0 : (unsigned char) string[match_first];
00690                       if (fastmap[t ? t[ch] : ch])
00691                         break;
00692                       --match_first;
00693                     }
00694                   if (match_first < left_lim)
00695                     break;
00696                 }
00697             }
00698           else
00699             {
00700               int ch;
00701 
00702               do
00703                 {
00704                   /* In this case, we can't determine easily the current byte,
00705                      since it might be a component byte of a multibyte
00706                      character.  Then we use the constructed buffer
00707                      instead.  */
00708                   /* If MATCH_FIRST is out of the valid range, reconstruct the
00709                      buffers.  */
00710                   if (input.raw_mbs_idx + input.valid_len <= match_first
00711                       || match_first < input.raw_mbs_idx)
00712                     {
00713                       err = re_string_reconstruct (&input, match_first, eflags,
00714                                                    preg->newline_anchor);
00715                       if (BE (err != REG_NOERROR, 0))
00716                         goto free_return;
00717                     }
00718                   /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
00719                      Note that MATCH_FIRST must not be smaller than 0.  */
00720                   ch = ((match_first >= length) ? 0
00721                        : re_string_byte_at (&input,
00722                                             match_first - input.raw_mbs_idx));
00723                   if (fastmap[ch])
00724                     break;
00725                   match_first += incr;
00726                 }
00727               while (match_first >= left_lim && match_first <= right_lim);
00728               if (! fastmap[ch])
00729                 break;
00730             }
00731         }
00732 
00733       /* Reconstruct the buffers so that the matcher can assume that
00734          the matching starts from the begining of the buffer.  */
00735       err = re_string_reconstruct (&input, match_first, eflags,
00736                                    preg->newline_anchor);
00737       if (BE (err != REG_NOERROR, 0))
00738         goto free_return;
00739 #ifdef RE_ENABLE_I18N
00740      /* Eliminate it when it is a component of a multibyte character
00741          and isn't the head of a multibyte character.  */
00742       if (sb || re_string_first_byte (&input, 0))
00743 #endif
00744         {
00745           /* It seems to be appropriate one, then use the matcher.  */
00746           /* We assume that the matching starts from 0.  */
00747           mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
00748           match_last = check_matching (preg, &mctx, 0, fl_longest_match);
00749           if (match_last != -1)
00750             {
00751               if (BE (match_last == -2, 0))
00752                 {
00753                   err = REG_ESPACE;
00754                   goto free_return;
00755                 }
00756               else
00757                 {
00758                   mctx.match_last = match_last;
00759                   if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
00760                     {
00761                       re_dfastate_t *pstate = mctx.state_log[match_last];
00762                       mctx.last_node = check_halt_state_context (preg, pstate,
00763                                                                  &mctx, match_last);
00764                     }
00765                   if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
00766                       || dfa->nbackref)
00767                     {
00768                       err = prune_impossible_nodes (preg, &mctx);
00769                       if (err == REG_NOERROR)
00770                         break;
00771                       if (BE (err != REG_NOMATCH, 0))
00772                         goto free_return;
00773                     }
00774                   else
00775                     break; /* We found a matching.  */
00776                 }
00777             }
00778           match_ctx_clean (&mctx);
00779         }
00780       /* Update counter.  */
00781       match_first += incr;
00782       if (match_first < left_lim || right_lim < match_first)
00783         break;
00784     }
00785 
00786   /* Set pmatch[] if we need.  */
00787   if (match_last != -1 && nmatch > 0)
00788     {
00789       int reg_idx;
00790 
00791       /* Initialize registers.  */
00792       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00793         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
00794 
00795       /* Set the points where matching start/end.  */
00796       pmatch[0].rm_so = 0;
00797       pmatch[0].rm_eo = mctx.match_last;
00798 
00799       if (!preg->no_sub && nmatch > 1)
00800         {
00801           err = set_regs (preg, &mctx, nmatch, pmatch,
00802                           dfa->has_plural_match && dfa->nbackref > 0);
00803           if (BE (err != REG_NOERROR, 0))
00804             goto free_return;
00805         }
00806 
00807       /* At last, add the offset to the each registers, since we slided
00808          the buffers so that We can assume that the matching starts from 0.  */
00809       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
00810         if (pmatch[reg_idx].rm_so != -1)
00811           {
00812             pmatch[reg_idx].rm_so += match_first;
00813             pmatch[reg_idx].rm_eo += match_first;
00814           }
00815     }
00816   err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
00817  free_return:
00818   re_free (mctx.state_log);
00819   if (dfa->nbackref)
00820     match_ctx_free (&mctx);
00821   re_string_destruct (&input);
00822   return err;
00823 }
00824 
00825 static reg_errcode_t
00826 prune_impossible_nodes (preg, mctx)
00827      const regex_t *preg;
00828      re_match_context_t *mctx;
00829 {
00830   int halt_node, match_last;
00831   reg_errcode_t ret;
00832   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
00833   re_dfastate_t **sifted_states;
00834   re_dfastate_t **lim_states = NULL;
00835   re_sift_context_t sctx;
00836 #ifdef DEBUG
00837   assert (mctx->state_log != NULL);
00838 #endif
00839   match_last = mctx->match_last;
00840   halt_node = mctx->last_node;
00841   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
00842   if (BE (sifted_states == NULL, 0))
00843     {
00844       ret = REG_ESPACE;
00845       goto free_return;
00846     }
00847   if (dfa->nbackref)
00848     {
00849       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
00850       if (BE (lim_states == NULL, 0))
00851         {
00852           ret = REG_ESPACE;
00853           goto free_return;
00854         }
00855       while (1)
00856         {
00857           memset (lim_states, '\0',
00858                   sizeof (re_dfastate_t *) * (match_last + 1));
00859           match_ctx_clear_flag (mctx);
00860           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
00861                          match_last, 0);
00862           ret = sift_states_backward (preg, mctx, &sctx);
00863           re_node_set_free (&sctx.limits);
00864           if (BE (ret != REG_NOERROR, 0))
00865               goto free_return;
00866           if (sifted_states[0] != NULL || lim_states[0] != NULL)
00867             break;
00868           do
00869             {
00870               --match_last;
00871               if (match_last < 0)
00872                 {
00873                   ret = REG_NOMATCH;
00874                   goto free_return;
00875                 }
00876             } while (!mctx->state_log[match_last]->halt);
00877           halt_node = check_halt_state_context (preg,
00878                                                 mctx->state_log[match_last],
00879                                                 mctx, match_last);
00880         }
00881       ret = merge_state_array (dfa, sifted_states, lim_states,
00882                                match_last + 1);
00883       re_free (lim_states);
00884       lim_states = NULL;
00885       if (BE (ret != REG_NOERROR, 0))
00886         goto free_return;
00887     }
00888   else
00889     {
00890       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
00891                      match_last, 0);
00892       ret = sift_states_backward (preg, mctx, &sctx);
00893       re_node_set_free (&sctx.limits);
00894       if (BE (ret != REG_NOERROR, 0))
00895         goto free_return;
00896     }
00897   re_free (mctx->state_log);
00898   mctx->state_log = sifted_states;
00899   sifted_states = NULL;
00900   mctx->last_node = halt_node;
00901   mctx->match_last = match_last;
00902   ret = REG_NOERROR;
00903  free_return:
00904   re_free (sifted_states);
00905   re_free (lim_states);
00906   return ret;
00907 }
00908 
00909 /* Acquire an initial state and return it.
00910    We must select appropriate initial state depending on the context,
00911    since initial states may have constraints like "<", "^", etc..  */
00912 
00913 static inline re_dfastate_t *
00914 acquire_init_state_context (err, preg, mctx, idx)
00915      reg_errcode_t *err;
00916      const regex_t *preg;
00917      const re_match_context_t *mctx;
00918      int idx;
00919 {
00920   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00921 
00922   *err = REG_NOERROR;
00923   if (dfa->init_state->has_constraint)
00924     {
00925       unsigned int context;
00926       context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
00927                                        preg->newline_anchor);
00928       if (IS_WORD_CONTEXT (context))
00929         return dfa->init_state_word;
00930       else if (IS_ORDINARY_CONTEXT (context))
00931         return dfa->init_state;
00932       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
00933         return dfa->init_state_begbuf;
00934       else if (IS_NEWLINE_CONTEXT (context))
00935         return dfa->init_state_nl;
00936       else if (IS_BEGBUF_CONTEXT (context))
00937         {
00938           /* It is relatively rare case, then calculate on demand.  */
00939           return  re_acquire_state_context (err, dfa,
00940                                             dfa->init_state->entrance_nodes,
00941                                             context);
00942         }
00943       else
00944         /* Must not happen?  */
00945         return dfa->init_state;
00946     }
00947   else
00948     return dfa->init_state;
00949 }
00950 
00951 /* Check whether the regular expression match input string INPUT or not,
00952    and return the index where the matching end, return -1 if not match,
00953    or return -2 in case of an error.
00954    FL_SEARCH means we must search where the matching starts,
00955    FL_LONGEST_MATCH means we want the POSIX longest matching.
00956    Note that the matcher assume that the maching starts from the current
00957    index of the buffer.  */
00958 
00959 static int
00960 check_matching (preg, mctx, fl_search, fl_longest_match)
00961     const regex_t *preg;
00962     re_match_context_t *mctx;
00963     int fl_search, fl_longest_match;
00964 {
00965   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
00966   reg_errcode_t err;
00967   int match = 0;
00968   int match_last = -1;
00969   int cur_str_idx = re_string_cur_idx (mctx->input);
00970   re_dfastate_t *cur_state;
00971 
00972   cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
00973   /* An initial state must not be NULL(invalid state).  */
00974   if (BE (cur_state == NULL, 0))
00975     return -2;
00976   if (mctx->state_log != NULL)
00977     mctx->state_log[cur_str_idx] = cur_state;
00978 
00979   /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
00980      later.  E.g. Processing back references.  */
00981   if (dfa->nbackref)
00982     {
00983       err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
00984       if (BE (err != REG_NOERROR, 0))
00985         return err;
00986     }
00987 
00988   if (cur_state->has_backref)
00989     {
00990       err = transit_state_bkref (preg, &cur_state->nodes, mctx);
00991       if (BE (err != REG_NOERROR, 0))
00992         return err;
00993     }
00994 
00995   /* If the RE accepts NULL string.  */
00996   if (cur_state->halt)
00997     {
00998       if (!cur_state->has_constraint
00999           || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
01000         {
01001           if (!fl_longest_match)
01002             return cur_str_idx;
01003           else
01004             {
01005               match_last = cur_str_idx;
01006               match = 1;
01007             }
01008         }
01009     }
01010 
01011   while (!re_string_eoi (mctx->input))
01012     {
01013       cur_state = transit_state (&err, preg, mctx, cur_state,
01014                                  fl_search && !match);
01015       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
01016         {
01017           cur_str_idx = re_string_cur_idx (mctx->input);
01018           if (BE (err != REG_NOERROR, 0))
01019             return -2;
01020           if (fl_search && !match)
01021             {
01022               /* Restart from initial state, since we are searching
01023                  the point from where matching start.  */
01024 #ifdef RE_ENABLE_I18N
01025               if (re_mb_cur_max == 1
01026                   || re_string_first_byte (mctx->input, cur_str_idx))
01027 #endif /* RE_ENABLE_I18N */
01028                 cur_state = acquire_init_state_context (&err, preg, mctx,
01029                                                         cur_str_idx);
01030               if (BE (cur_state == NULL && err != REG_NOERROR, 0))
01031                 return -2;
01032               if (mctx->state_log != NULL)
01033                 mctx->state_log[cur_str_idx] = cur_state;
01034             }
01035           else if (!fl_longest_match && match)
01036             break;
01037           else /* (fl_longest_match && match) || (!fl_search && !match)  */
01038             {
01039               if (mctx->state_log == NULL)
01040                 break;
01041               else
01042                 {
01043                   int max = mctx->state_log_top;
01044                   for (; cur_str_idx <= max; ++cur_str_idx)
01045                     if (mctx->