Main Page | Class List | Directories | File List | Class Members | File Members

regcomp.c File Reference

Go to the source code of this file.

Defines

#define REG_NOERROR_IDX   0
#define REG_NOMATCH_IDX   (REG_NOERROR_IDX + sizeof "Success")
#define REG_BADPAT_IDX   (REG_NOMATCH_IDX + sizeof "No match")
#define REG_ECOLLATE_IDX   (REG_BADPAT_IDX + sizeof "Invalid regular expression")
#define REG_ECTYPE_IDX   (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
#define REG_EESCAPE_IDX   (REG_ECTYPE_IDX + sizeof "Invalid character class name")
#define REG_ESUBREG_IDX   (REG_EESCAPE_IDX + sizeof "Trailing backslash")
#define REG_EBRACK_IDX   (REG_ESUBREG_IDX + sizeof "Invalid back reference")
#define REG_EPAREN_IDX   (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
#define REG_EBRACE_IDX   (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
#define REG_BADBR_IDX   (REG_EBRACE_IDX + sizeof "Unmatched \\{")
#define REG_ERANGE_IDX   (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
#define REG_ESPACE_IDX   (REG_ERANGE_IDX + sizeof "Invalid range end")
#define REG_BADRPT_IDX   (REG_ESPACE_IDX + sizeof "Memory exhausted")
#define REG_EEND_IDX   (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
#define REG_ESIZE_IDX   (REG_EEND_IDX + sizeof "Premature end of regular expression")
#define REG_ERPAREN_IDX   (REG_ESIZE_IDX + sizeof "Regular expression too big")
#define BRACKET_NAME_BUF_SIZE   32
#define BUILD_CHARCLASS_LOOP(ctype_func)

Functions

static reg_errcode_t re_compile_internal _RE_ARGS ((regex_t *preg, const char *pattern, int length, reg_syntax_t syntax))
const char * re_compile_pattern (char *pattern, size_t length, struct re_pattern_buffer *bufp) const
reg_syntax_t re_set_syntax (reg_syntax_t syntax)
int re_compile_fastmap (struct re_pattern_buffer *bufp)
static void re_set_fastmap (char *fastmap, int icase, int ch)
static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap)
int regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
size_t regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
static void free_dfa_content (re_dfa_t *dfa)
void regfree (regex_t *preg)
static reg_errcode_t re_compile_internal (regex_t *preg, const char *pattern, int length, reg_syntax_t syntax)
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len)
static reg_errcode_t init_word_char (re_dfa_t *dfa)
static void free_workarea_compile (regex_t *preg)
static reg_errcode_t create_initial_state (re_dfa_t *dfa)
static reg_errcode_t analyze (re_dfa_t *dfa)
static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node)
static void calc_first (re_dfa_t *dfa, bin_tree_t *node)
static void calc_next (re_dfa_t *dfa, bin_tree_t *node)
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node)
static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, int root_node, unsigned int init_constraint)
static int search_duplicated_node (re_dfa_t *dfa, int org_node, unsigned int constraint)
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, unsigned int constraint)
static void calc_inveclosure (re_dfa_t *dfa)
static reg_errcode_t calc_eclosure (re_dfa_t *dfa)
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root)
static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax)
static int peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
static int peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
static bin_tree_tparse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err)
static bin_tree_tparse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, int nest, reg_errcode_t *err)
static bin_tree_tparse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, int nest, reg_errcode_t *err)
static bin_tree_tparse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, int nest, reg_errcode_t *err)
static bin_tree_tparse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, int nest, reg_errcode_t *err)
static bin_tree_tparse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, bracket_elem_t *start_elem, bracket_elem_t *end_elem)
static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, const unsigned char *name)
static bin_tree_tparse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token, int token_len, re_dfa_t *dfa, reg_syntax_t syntax)
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token)
static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, const unsigned char *name)
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, re_bitset_ptr_t sbcset, const unsigned char *class_name, reg_syntax_t syntax)
static bin_tree_tbuild_word_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, int not, reg_errcode_t *err)
static int fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
static bin_tree_tcreate_tree (bin_tree_t *left, bin_tree_t *right, re_token_type_t type, int index)
static void free_bin_tree (bin_tree_t *tree)
static bin_tree_tduplicate_tree (bin_tree_t *src, re_dfa_t *dfa) const

Variables

const char __re_error_msgid[] attribute_hidden
reg_syntax_t re_syntax_options


Define Documentation

#define BRACKET_NAME_BUF_SIZE   32
 

Definition at line 2359 of file regcomp.c.

Referenced by parse_bracket_exp(), and parse_bracket_symbol().

#define BUILD_CHARCLASS_LOOP ctype_func   ) 
 

Value:

for (i = 0; i < SBC_MAX; ++i)   \
      {                                 \
        if (ctype_func (i))             \
          {                                     \
            int ch = trans ? trans[i] : i;      \
            bitset_set (sbcset, ch);            \
          }                                     \
      }

Referenced by build_charclass().

#define REG_BADBR_IDX   (REG_EBRACE_IDX + sizeof "Unmatched \\{")
 

#define REG_BADPAT_IDX   (REG_NOMATCH_IDX + sizeof "No match")
 

#define REG_BADRPT_IDX   (REG_ESPACE_IDX + sizeof "Memory exhausted")
 

#define REG_EBRACE_IDX   (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
 

#define REG_EBRACK_IDX   (REG_ESUBREG_IDX + sizeof "Invalid back reference")
 

#define REG_ECOLLATE_IDX   (REG_BADPAT_IDX + sizeof "Invalid regular expression")
 

#define REG_ECTYPE_IDX   (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
 

#define REG_EEND_IDX   (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
 

#define REG_EESCAPE_IDX   (REG_ECTYPE_IDX + sizeof "Invalid character class name")
 

#define REG_EPAREN_IDX   (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
 

#define REG_ERANGE_IDX   (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
 

#define REG_ERPAREN_IDX   (REG_ESIZE_IDX + sizeof "Regular expression too big")
 

#define REG_ESIZE_IDX   (REG_EEND_IDX + sizeof "Premature end of regular expression")
 

#define REG_ESPACE_IDX   (REG_ERANGE_IDX + sizeof "Invalid range end")
 

#define REG_ESUBREG_IDX   (REG_EESCAPE_IDX + sizeof "Trailing backslash")
 

#define REG_NOERROR_IDX   0
 

#define REG_NOMATCH_IDX   (REG_NOERROR_IDX + sizeof "Success")
 


Function Documentation

static reg_errcode_t re_compile_internal _RE_ARGS (regex_t *preg, const char *pattern, int length, reg_syntax_t syntax)   )  [static]
 

static reg_errcode_t analyze re_dfa_t dfa  )  [static]
 

Definition at line 917 of file regcomp.c.

References analyze_tree(), BE, calc_eclosure(), calc_inveclosure(), i, NULL, re_malloc, re_node_set_init_empty, REG_ESPACE, and REG_NOERROR.

Referenced by re_compile_internal().

00919 {
00920   int i;
00921   reg_errcode_t ret;
00922 
00923   /* Allocate arrays.  */
00924   dfa->nexts = re_malloc (int, dfa->nodes_alloc);
00925   dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
00926   dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
00927   dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
00928   dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
00929   if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
00930           || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
00931     return REG_ESPACE;
00932   /* Initialize them.  */
00933   for (i = 0; i < dfa->nodes_len; ++i)
00934     {
00935       dfa->nexts[i] = -1;
00936       re_node_set_init_empty (dfa->edests + i);
00937       re_node_set_init_empty (dfa->eclosures + i);
00938       re_node_set_init_empty (dfa->inveclosures + i);
00939     }
00940 
00941   ret = analyze_tree (dfa, dfa->str_tree);
00942   if (BE (ret == REG_NOERROR, 1))
00943     {
00944       ret = calc_eclosure (dfa);
00945       if (ret == REG_NOERROR)
00946         calc_inveclosure (dfa);
00947     }
00948   return ret;
00949 }

static reg_errcode_t analyze_tree re_dfa_t dfa,
bin_tree_t node
[static]
 

Definition at line 956 of file regcomp.c.

References BE, calc_epsdest(), calc_first(), calc_next(), bin_tree_t::eclosure, bin_tree_t::first, bin_tree_t::left, re_node_set::nelem, bin_tree_t::next, NULL, REG_NOERROR, and bin_tree_t::right.

Referenced by analyze().

00959 {
00960   reg_errcode_t ret;
00961   if (node->first == -1)
00962     calc_first (dfa, node);
00963   if (node->next == -1)
00964     calc_next (dfa, node);
00965   if (node->eclosure.nelem == 0)
00966     calc_epsdest (dfa, node);
00967   /* Calculate "first" etc. for the left child.  */
00968   if (node->left != NULL)
00969     {
00970       ret = analyze_tree (dfa, node->left);
00971       if (BE (ret != REG_NOERROR, 0))
00972         return ret;
00973     }
00974   /* Calculate "first" etc. for the right child.  */
00975   if (node->right != NULL)
00976     {
00977       ret = analyze_tree (dfa, node->right);
00978       if (BE (ret != REG_NOERROR, 0))
00979         return ret;
00980     }
00981   return REG_NOERROR;
00982 }

static reg_errcode_t build_charclass RE_TRANSLATE_TYPE  trans,
re_bitset_ptr_t  sbcset,
const unsigned char *  class_name,
reg_syntax_t  syntax
[static]
 

Definition at line 3217 of file regcomp.c.

References __wctype, BE, BUILD_CHARCLASS_LOOP, i, isblank, name, NULL, RE_ICASE, re_realloc, REG_ECTYPE, REG_ESPACE, and REG_NOERROR.

Referenced by build_word_op(), and parse_bracket_exp().

03223 {
03224   int i;
03225   const char *name = (const char *) class_name;
03226 
03227   /* In case of REG_ICASE "upper" and "lower" match the both of
03228      upper and lower cases.  */
03229   if ((syntax & RE_ICASE)
03230       && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
03231     name = "alpha";
03232 
03233 #ifdef RE_ENABLE_I18N
03234   /* Check the space of the arrays.  */
03235   if (*char_class_alloc == mbcset->nchar_classes)
03236     {
03237       /* Not enough, realloc it.  */
03238       /* +1 in case of mbcset->nchar_classes is 0.  */
03239       *char_class_alloc = 2 * mbcset->nchar_classes + 1;
03240       /* Use realloc since array is NULL if *alloc == 0.  */
03241       mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
03242                                          *char_class_alloc);
03243       if (BE (mbcset->char_classes == NULL, 0))
03244         return REG_ESPACE;
03245     }
03246   mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
03247 #endif /* RE_ENABLE_I18N */
03248 
03249 #define BUILD_CHARCLASS_LOOP(ctype_func)\
03250     for (i = 0; i < SBC_MAX; ++i)       \
03251       {                                 \
03252         if (ctype_func (i))             \
03253           {                                     \
03254             int ch = trans ? trans[i] : i;      \
03255             bitset_set (sbcset, ch);            \
03256           }                                     \
03257       }
03258 
03259   if (strcmp (name, "alnum") == 0)
03260     BUILD_CHARCLASS_LOOP (isalnum)
03261   else if (strcmp (name, "cntrl") == 0)
03262     BUILD_CHARCLASS_LOOP (iscntrl)
03263   else if (strcmp (name, "lower") == 0)
03264     BUILD_CHARCLASS_LOOP (islower)
03265   else if (strcmp (name, "space") == 0)
03266     BUILD_CHARCLASS_LOOP (isspace)
03267   else if (strcmp (name, "alpha") == 0)
03268     BUILD_CHARCLASS_LOOP (isalpha)
03269   else if (strcmp (name, "digit") == 0)
03270     BUILD_CHARCLASS_LOOP (isdigit)
03271   else if (strcmp (name, "print") == 0)
03272     BUILD_CHARCLASS_LOOP (isprint)
03273   else if (strcmp (name, "upper") == 0)
03274     BUILD_CHARCLASS_LOOP (isupper)
03275   else if (strcmp (name, "blank") == 0)
03276     BUILD_CHARCLASS_LOOP (isblank)
03277   else if (strcmp (name, "graph") == 0)
03278     BUILD_CHARCLASS_LOOP (isgraph)
03279   else if (strcmp (name, "punct") == 0)
03280     BUILD_CHARCLASS_LOOP (ispunct)
03281   else if (strcmp (name, "xdigit") == 0)
03282     BUILD_CHARCLASS_LOOP (isxdigit)
03283   else
03284     return REG_ECTYPE;
03285 
03286   return REG_NOERROR;
03287 }

static reg_errcode_t build_collating_symbol re_bitset_ptr_t  sbcset,
const unsigned char *  name
[static]
 

Definition at line 2485 of file regcomp.c.

References BE, bitset_set, REG_ECOLLATE, and REG_NOERROR.

Referenced by parse_bracket_exp().

02489 {
02490   size_t name_len = strlen ((const char *) name);
02491   if (BE (name_len != 1, 0))
02492     return REG_ECOLLATE;
02493   else
02494     {
02495       bitset_set (sbcset, name[0]);
02496       return REG_NOERROR;
02497     }
02498 }

static reg_errcode_t build_equiv_class re_bitset_ptr_t  sbcset,
const unsigned char *  name
[static]
 

Definition at line 3125 of file regcomp.c.

References BE, bitset_set, len, NULL, re_realloc, REG_ECOLLATE, REG_ESPACE, REG_NOERROR, and SBC_MAX.

Referenced by parse_bracket_exp().

03129 {
03130 #if defined _LIBC && defined RE_ENABLE_I18N
03131   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03132   if (nrules != 0)
03133     {
03134       const int32_t *table, *indirect;
03135       const unsigned char *weights, *extra, *cp;
03136       unsigned char char_buf[2];
03137       int32_t idx1, idx2;
03138       unsigned int ch;
03139       size_t len;
03140       /* This #include defines a local function!  */
03141 # include <locale/weight.h>
03142       /* Calculate the index for equivalence class.  */
03143       cp = name;
03144       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03145       weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
03146                                                _NL_COLLATE_WEIGHTMB);
03147       extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
03148                                                    _NL_COLLATE_EXTRAMB);
03149       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
03150                                                 _NL_COLLATE_INDIRECTMB);
03151       idx1 = findidx (&cp);
03152       if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
03153         /* This isn't a valid character.  */
03154         return REG_ECOLLATE;
03155 
03156       /* Build single byte matcing table for this equivalence class.  */
03157       char_buf[1] = (unsigned char) '\0';
03158       len = weights[idx1];
03159       for (ch = 0; ch < SBC_MAX; ++ch)
03160         {
03161           char_buf[0] = ch;
03162           cp = char_buf;
03163           idx2 = findidx (&cp);
03164 /*
03165           idx2 = table[ch];
03166 */
03167           if (idx2 == 0)
03168             /* This isn't a valid character.  */
03169             continue;
03170           if (len == weights[idx2])
03171             {
03172               int cnt = 0;
03173               while (cnt <= len &&
03174                      weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
03175                 ++cnt;
03176 
03177               if (cnt > len)
03178                 bitset_set (sbcset, ch);
03179             }
03180         }
03181       /* Check whether the array has enough space.  */
03182       if (*equiv_class_alloc == mbcset->nequiv_classes)
03183         {
03184           /* Not enough, realloc it.  */
03185           /* +1 in case of mbcset->nequiv_classes is 0.  */
03186           *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
03187           /* Use realloc since the array is NULL if *alloc == 0.  */
03188           mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
03189                                               *equiv_class_alloc);
03190           if (BE (mbcset->equiv_classes == NULL, 0))
03191             return REG_ESPACE;
03192         }
03193       mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
03194     }
03195   else
03196 #endif /* _LIBC && RE_ENABLE_I18N */
03197     {
03198       if (BE (strlen ((const char *) name) != 1, 0))
03199         return REG_ECOLLATE;
03200       bitset_set (sbcset, *name);
03201     }
03202   return REG_NOERROR;
03203 }

static reg_errcode_t build_range_exp re_bitset_ptr_t  sbcset,
bracket_elem_t start_elem,
bracket_elem_t end_elem
[static]
 

Definition at line 2375 of file regcomp.c.

References __btowc, BE, bitset_set, CHAR_CLASS, COLL_SYM, EQUIV_CLASS, NULL, bracket_elem_t::opr, re_realloc, REG_ECOLLATE, REG_ERANGE, REG_ESPACE, REG_NOERROR, SB_CHAR, SBC_MAX, and bracket_elem_t::type.

Referenced by parse_bracket_exp().

02379 {
02380   unsigned int start_ch, end_ch;
02381   /* Equivalence Classes and Character Classes can't be a range start/end.  */
02382   if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
02383           || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
02384           0))
02385     return REG_ERANGE;
02386 
02387   /* We can handle no multi character collating elements without libc
02388      support.  */
02389   if (BE ((start_elem->type == COLL_SYM
02390            && strlen ((char *) start_elem->opr.name) > 1)
02391           || (end_elem->type == COLL_SYM
02392               && strlen ((char *) end_elem->opr.name) > 1), 0))
02393     return REG_ECOLLATE;
02394 
02395 # ifdef RE_ENABLE_I18N
02396   {
02397     wchar_t wc, start_wc, end_wc;
02398     wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
02399 
02400     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
02401                 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
02402                    : 0));
02403     end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
02404               : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
02405                  : 0));
02406     start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
02407                 ? __btowc (start_ch) : start_elem->opr.wch);
02408     end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
02409               ? __btowc (end_ch) : end_elem->opr.wch);
02410     cmp_buf[0] = start_wc;
02411     cmp_buf[4] = end_wc;
02412     if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
02413       return REG_ERANGE;
02414 
02415     /* Check the space of the arrays.  */
02416     if (*range_alloc == mbcset->nranges)
02417       {
02418         /* There are not enough space, need realloc.  */
02419         wchar_t *new_array_start, *new_array_end;
02420         int new_nranges;
02421 
02422         /* +1 in case of mbcset->nranges is 0.  */
02423         new_nranges = 2 * mbcset->nranges + 1;
02424         /* Use realloc since mbcset->range_starts and mbcset->range_ends
02425            are NULL if *range_alloc == 0.  */
02426         new_array_start = re_realloc (mbcset->range_starts, wchar_t,
02427                                       new_nranges);
02428         new_array_end = re_realloc (mbcset->range_ends, wchar_t,
02429                                     new_nranges);
02430 
02431         if (BE (new_array_start == NULL || new_array_end == NULL, 0))
02432           return REG_ESPACE;
02433 
02434         mbcset->range_starts = new_array_start;
02435         mbcset->range_ends = new_array_end;
02436         *range_alloc = new_nranges;
02437       }
02438 
02439     mbcset->range_starts[mbcset->nranges] = start_wc;
02440     mbcset->range_ends[mbcset->nranges++] = end_wc;
02441 
02442     /* Build the table for single byte characters.  */
02443     for (wc = 0; wc <= SBC_MAX; ++wc)
02444       {
02445         cmp_buf[2] = wc;
02446         if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
02447             && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
02448           bitset_set (sbcset, wc);
02449       }
02450   }
02451 # else /* not RE_ENABLE_I18N */
02452   {
02453     unsigned int ch;
02454     start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
02455                 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
02456                    : 0));
02457     end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
02458               : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
02459                  : 0));
02460     if (start_ch > end_ch)
02461       return REG_ERANGE;
02462     /* Build the table for single byte characters.  */
02463     for (ch = 0; ch <= SBC_MAX; ++ch)
02464       if (start_ch <= ch  && ch <= end_ch)
02465         bitset_set (sbcset, ch);
02466   }
02467 # endif /* not RE_ENABLE_I18N */
02468   return REG_NOERROR;
02469 }

static bin_tree_t* build_word_op re_dfa_t dfa,
RE_TRANSLATE_TYPE  trans,
int  not,
reg_errcode_t err
[static]
 

Definition at line 3290 of file regcomp.c.

References __btowc, BE, bitset_not(), bitset_set, BITSET_UINTS, build_charclass(), create_tree(), i, NULL, OP_ALT, re_token_t::opr, re_dfa_add_node(), re_free, REG_ESPACE, REG_NOERROR, SBC_MAX, SIMPLE_BRACKET, and re_token_t::type.

Referenced by parse_expression().

03295 {
03296   re_bitset_ptr_t sbcset;
03297 #ifdef RE_ENABLE_I18N
03298   re_charset_t *mbcset;
03299   int alloc = 0;
03300 #else /* not RE_ENABLE_I18N */
03301   int non_match = 0;
03302 #endif /* not RE_ENABLE_I18N */
03303   reg_errcode_t ret;
03304   re_token_t br_token;
03305   bin_tree_t *tree;
03306   int new_idx;
03307 
03308   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
03309 #ifdef RE_ENABLE_I18N
03310   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
03311 #endif /* RE_ENABLE_I18N */
03312 
03313 #ifdef RE_ENABLE_I18N
03314   if (BE (sbcset == NULL || mbcset == NULL, 0))
03315 #else /* not RE_ENABLE_I18N */
03316   if (BE (sbcset == NULL, 0))
03317 #endif /* not RE_ENABLE_I18N */
03318     {
03319       *err = REG_ESPACE;
03320       return NULL;
03321     }
03322 
03323   if (not)
03324     {
03325 #ifdef RE_ENABLE_I18N
03326       int i;
03327       /*
03328       if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
03329         bitset_set(cset->sbcset, '\0');
03330       */
03331       mbcset->non_match = 1;
03332       if (re_mb_cur_max > 1)
03333         for (i = 0; i < SBC_MAX; ++i)
03334           if (__btowc (i) == WEOF)
03335             bitset_set (sbcset, i);
03336 #else /* not RE_ENABLE_I18N */
03337       non_match = 1;
03338 #endif /* not RE_ENABLE_I18N */
03339     }
03340 
03341   /* We don't care the syntax in this case.  */
03342   ret = build_charclass (trans, sbcset,
03343 #ifdef RE_ENABLE_I18N
03344                          mbcset, &alloc,
03345 #endif /* RE_ENABLE_I18N */
03346                          (const unsigned char *) "alpha", 0);
03347 
03348   if (BE (ret != REG_NOERROR, 0))
03349     {
03350       re_free (sbcset);
03351 #ifdef RE_ENABLE_I18N
03352       free_charset (mbcset);
03353 #endif /* RE_ENABLE_I18N */
03354       *err = ret;
03355       return NULL;
03356     }
03357   /* \w match '_' also.  */
03358   bitset_set (sbcset, '_');
03359 
03360   /* If it is non-matching list.  */
03361 #ifdef RE_ENABLE_I18N
03362   if (mbcset->non_match)
03363 #else /* not RE_ENABLE_I18N */
03364   if (non_match)
03365 #endif /* not RE_ENABLE_I18N */
03366     bitset_not (sbcset);
03367 
03368   /* Build a tree for simple bracket.  */
03369   br_token.type = SIMPLE_BRACKET;
03370   br_token.opr.sbcset = sbcset;
03371   new_idx = re_dfa_add_node (dfa, br_token, 0);
03372   tree = create_tree (NULL, NULL, 0, new_idx);
03373   if (BE (new_idx == -1 || tree == NULL, 0))
03374     goto build_word_op_espace;
03375 
03376 #ifdef RE_ENABLE_I18N
03377   if (re_mb_cur_max > 1)
03378     {
03379       re_token_t alt_token;
03380       bin_tree_t *mbc_tree;
03381       /* Build a tree for complex bracket.  */
03382       br_token.type = COMPLEX_BRACKET;
03383       br_token.opr.mbcset = mbcset;
03384       dfa->has_mb_node = 1;
03385       new_idx = re_dfa_add_node (dfa, br_token, 0);
03386       mbc_tree = create_tree (NULL, NULL, 0, new_idx);
03387       if (BE (new_idx == -1 || mbc_tree == NULL, 0))
03388         goto build_word_op_espace;
03389       /* Then join them by ALT node.  */
03390       alt_token.type = OP_ALT;
03391       new_idx = re_dfa_add_node (dfa, alt_token, 0);
03392       tree = create_tree (tree, mbc_tree, 0, new_idx);
03393       if (BE (new_idx != -1 && mbc_tree != NULL, 1))
03394         return tree;
03395     }
03396   else
03397     {
03398       free_charset (mbcset);
03399       return tree;
03400     }
03401 #else /* not RE_ENABLE_I18N */
03402   return tree;
03403 #endif /* not RE_ENABLE_I18N */
03404 
03405  build_word_op_espace:
03406   re_free (sbcset);
03407 #ifdef RE_ENABLE_I18N
03408   free_charset (mbcset);
03409 #endif /* RE_ENABLE_I18N */
03410   *err = REG_ESPACE;
03411   return NULL;
03412 }

static reg_errcode_t calc_eclosure re_dfa_t dfa  )  [static]
 

Definition at line 1338 of file regcomp.c.

References BE, calc_eclosure_iter(), err(), re_node_set_free, and REG_NOERROR.

Referenced by analyze().

01340 {
01341   int node_idx, incomplete;
01342 #ifdef DEBUG
01343   assert (dfa->nodes_len > 0);
01344 #endif
01345   incomplete = 0;
01346   /* For each nodes, calculate epsilon closure.  */
01347   for (node_idx = 0; ; ++node_idx)
01348     {
01349       reg_errcode_t err;
01350       re_node_set eclosure_elem;
01351       if (node_idx == dfa->nodes_len)
01352         {
01353           if (!incomplete)
01354             break;
01355           incomplete = 0;
01356           node_idx = 0;
01357         }
01358 
01359 #ifdef DEBUG
01360       assert (dfa->eclosures[node_idx].nelem != -1);
01361 #endif
01362       /* If we have already calculated, skip it.  */
01363       if (dfa->eclosures[node_idx].nelem != 0)
01364         continue;
01365       /* Calculate epsilon closure of `node_idx'.  */
01366       err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
01367       if (BE (err != REG_NOERROR, 0))
01368         return err;
01369 
01370       if (dfa->eclosures[node_idx].nelem == 0)
01371         {
01372           incomplete = 1;
01373           re_node_set_free (&eclosure_elem);
01374         }
01375     }
01376   return REG_NOERROR;
01377 }

static reg_errcode_t calc_eclosure_iter re_node_set new_set,
re_dfa_t dfa,
int  node,
int  root
[static]
 

Definition at line 1382 of file regcomp.c.

References ANCHOR, BE, duplicate_node_closure(), re_dfa_t::eclosures, re_dfa_t::edests, re_node_set::elems, err(), i, IS_EPSILON_NODE, re_node_set::nelem, re_dfa_t::nodes, re_token_t::opr, re_node_set_alloc(), re_node_set_free, re_node_set_insert(), re_node_set_merge(), REG_NOERROR, and re_token_t::type.

Referenced by calc_eclosure().

01386 {
01387   reg_errcode_t err;
01388   unsigned int constraint;
01389   int i, incomplete;
01390   re_node_set eclosure;
01391   incomplete = 0;
01392   err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
01393   if (BE (err != REG_NOERROR, 0))
01394     return err;
01395 
01396   /* This indicates that we are calculating this node now.
01397      We reference this value to avoid infinite loop.  */
01398   dfa->eclosures[node].nelem = -1;
01399 
01400   constraint = ((dfa->nodes[node].type == ANCHOR)
01401                 ? dfa->nodes[node].opr.ctx_type : 0);
01402   /* If the current node has constraints, duplicate all nodes.
01403      Since they must inherit the constraints.  */
01404   if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
01405     {
01406       int org_node, cur_node;
01407       org_node = cur_node = node;
01408       err = duplicate_node_closure (dfa, node, node, node, constraint);
01409       if (BE (err != REG_NOERROR, 0))
01410         return err;
01411     }
01412 
01413   /* Expand each epsilon destination nodes.  */
01414   if (IS_EPSILON_NODE(dfa->nodes[node].type))
01415     for (i = 0; i < dfa->edests[node].nelem; ++i)
01416       {
01417         re_node_set eclosure_elem;
01418         int edest = dfa->edests[node].elems[i];
01419         /* If calculating the epsilon closure of `edest' is in progress,
01420            return intermediate result.  */
01421         if (dfa->eclosures[edest].nelem == -1)
01422           {
01423             incomplete = 1;
01424             continue;
01425           }
01426         /* If we haven't calculated the epsilon closure of `edest' yet,
01427            calculate now. Otherwise use calculated epsilon closure.  */
01428         if (dfa->eclosures[edest].nelem == 0)
01429           {
01430             err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
01431             if (BE (err != REG_NOERROR, 0))
01432               return err;
01433           }
01434         else
01435           eclosure_elem = dfa->eclosures[edest];
01436         /* Merge the epsilon closure of `edest'.  */
01437         re_node_set_merge (&eclosure, &eclosure_elem);
01438         /* If the epsilon closure of `edest' is incomplete,
01439            the epsilon closure of this node is also incomplete.  */
01440         if (dfa->eclosures[edest].nelem == 0)
01441           {
01442             incomplete = 1;
01443             re_node_set_free (&eclosure_elem);
01444           }
01445       }
01446 
01447   /* Epsilon closures include itself.  */
01448   re_node_set_insert (&eclosure, node);
01449   if (incomplete && !root)
01450     dfa->eclosures[node].nelem = 0;
01451   else
01452     dfa->eclosures[node] = eclosure;
01453   *new_set = eclosure;
01454   return REG_NOERROR;
01455 }

static void calc_epsdest re_dfa_t dfa,
bin_tree_t node
[static]
 

Definition at line 1099 of file regcomp.c.

References ANCHOR, calc_first(), calc_next(), bin_tree_t::first, bin_tree_t::left, bin_tree_t::next, bin_tree_t::node_idx, NULL, OP_ALT, OP_BACK_REF, OP_CLOSE_SUBEXP, OP_DUP_ASTERISK, OP_DUP_PLUS, OP_DUP_QUESTION, OP_OPEN_SUBEXP, re_node_set_init_1(), re_node_set_init_2(), bin_tree_t::right, and bin_tree_t::type.

Referenced by analyze_tree().

01102 {
01103   int idx;
01104   idx = node->node_idx;
01105   if (node->type == 0)
01106     {
01107       if (dfa->nodes[idx].type == OP_DUP_ASTERISK
01108           || dfa->nodes[idx].type == OP_DUP_PLUS
01109           || dfa->nodes[idx].type == OP_DUP_QUESTION)
01110         {
01111           if (node->left->first == -1)
01112             calc_first (dfa, node->left);
01113           if (node->next == -1)
01114             calc_next (dfa, node);
01115           re_node_set_init_2 (dfa->edests + idx, node->left->first,
01116                               node->next);
01117         }
01118       else if (dfa->nodes[idx].type == OP_ALT)
01119         {
01120           int left, right;
01121           if (node->left != NULL)
01122             {
01123               if (node->left->first == -1)
01124                 calc_first (dfa, node->left);
01125               left = node->left->first;
01126             }
01127           else
01128             {
01129               if (node->next == -1)
01130                 calc_next (dfa, node);
01131               left = node->next;
01132             }
01133           if (node->right != NULL)
01134             {
01135               if (node->right->first == -1)
01136                 calc_first (dfa, node->right);
01137               right = node->right->first;
01138             }
01139           else
01140             {
01141               if (node->next == -1)
01142                 calc_next (dfa, node);
01143               right = node->next;
01144             }
01145           re_node_set_init_2 (dfa->edests + idx, left, right);
01146         }
01147       else if (dfa->nodes[idx].type == ANCHOR
01148                || dfa->nodes[idx].type == OP_OPEN_SUBEXP
01149                || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
01150                || dfa->node