00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "awk.h"
00027
00028 static reg_syntax_t syn;
00029
00030
00031
00032 Regexp *
00033 make_regexp(const char *s, size_t len, int ignorecase)
00034 {
00035 Regexp *rp;
00036 const char *rerr;
00037 const char *src = s;
00038 char *temp;
00039 const char *end = s + len;
00040 register char *dest;
00041 register int c, c2;
00042 #ifdef MBS_SUPPORT
00043
00044
00045 size_t is_multibyte = 0;
00046 mbstate_t mbs;
00047
00048 if (gawk_mb_cur_max > 1)
00049 memset(&mbs, 0, sizeof(mbstate_t));
00050 #endif
00051
00052
00053
00054
00055
00056
00057
00058
00059 emalloc(dest, char *, len + 2, "make_regexp");
00060 temp = dest;
00061
00062 while (src < end) {
00063 #ifdef MBS_SUPPORT
00064 if (gawk_mb_cur_max > 1 && !is_multibyte) {
00065
00066
00067 is_multibyte = mbrlen(src, end - src, &mbs);
00068 if ((is_multibyte == 1) || (is_multibyte == (size_t) -1)
00069 || (is_multibyte == (size_t) -2 || (is_multibyte == 0))) {
00070
00071 is_multibyte = 0;
00072 }
00073 }
00074 #endif
00075
00076 if (
00077 #ifdef MBS_SUPPORT
00078
00079
00080 (gawk_mb_cur_max == 1 || ! is_multibyte) &&
00081 #endif
00082 (*src == '\\')) {
00083 c = *++src;
00084 switch (c) {
00085 case 'a':
00086 case 'b':
00087 case 'f':
00088 case 'n':
00089 case 'r':
00090 case 't':
00091 case 'v':
00092 case 'x':
00093 case '0':
00094 case '1':
00095 case '2':
00096 case '3':
00097 case '4':
00098 case '5':
00099 case '6':
00100 case '7':
00101 c2 = parse_escape(&src);
00102 if (c2 < 0)
00103 cant_happen();
00104
00105
00106
00107
00108
00109 if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
00110 && strchr("()|*+?.^$\\[]", c2) != NULL)
00111 *dest++ = '\\';
00112 *dest++ = (char) c2;
00113 break;
00114 case '8':
00115 case '9':
00116 *dest++ = c;
00117 src++;
00118 break;
00119 case 'y':
00120
00121 if (! do_traditional) {
00122 *dest++ = '\\';
00123 *dest++ = 'b';
00124 src++;
00125 break;
00126 }
00127
00128 default:
00129 *dest++ = '\\';
00130 *dest++ = (char) c;
00131 src++;
00132 break;
00133 }
00134 } else
00135 *dest++ = *src++;
00136 #ifdef MBS_SUPPORT
00137 if (gawk_mb_cur_max > 1 && is_multibyte)
00138 is_multibyte--;
00139 #endif
00140 }
00141
00142 *dest = '\0' ;
00143 emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
00144 memset((char *) rp, 0, sizeof(*rp));
00145 rp->pat.allocated = 0;
00146 emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
00147
00148 if (ignorecase)
00149 rp->pat.translate = casetable;
00150 else
00151 rp->pat.translate = NULL;
00152 len = dest - temp;
00153 if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
00154 fatal("%s: /%s/", rerr, temp);
00155
00156
00157 rp->pat.newline_anchor = FALSE;
00158
00159 free(temp);
00160 return rp;
00161 }
00162
00163
00164
00165 int
00166 research(Regexp *rp, register const char *str, int start,
00167 register size_t len, int need_start)
00168 {
00169 const char *ret = str;
00170
00171 if (ret) {
00172
00173
00174
00175
00176 int res = re_search(&(rp->pat), str, start+len,
00177 start, len, need_start ? &(rp->regs) : NULL);
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191 if (res == -2) {
00192
00193 fatal(_("regex match failed, not enough memory to match string \"%.*s%s\""),
00194 (int) (len > 10 ? 10 : len), str + start,
00195 len > 10 ? "..." : "");
00196 }
00197 return res;
00198 } else
00199 return -1;
00200 }
00201
00202
00203
00204 void
00205 refree(Regexp *rp)
00206 {
00207
00208
00209
00210
00211
00212
00213 rp->pat.translate = NULL;
00214
00215 regfree(& rp->pat);
00216 if (rp->regs.start)
00217 free(rp->regs.start);
00218 if (rp->regs.end)
00219 free(rp->regs.end);
00220 free(rp);
00221 }
00222
00223
00224
00225 Regexp *
00226 re_update(NODE *t)
00227 {
00228 NODE *t1;
00229
00230 if ((t->re_flags & CASE) == IGNORECASE) {
00231 if ((t->re_flags & CONST) != 0) {
00232 assert(t->type == Node_regex);
00233 return t->re_reg;
00234 }
00235 t1 = force_string(tree_eval(t->re_exp));
00236 if (t->re_text != NULL) {
00237 if (cmp_nodes(t->re_text, t1) == 0) {
00238 free_temp(t1);
00239 return t->re_reg;
00240 }
00241 unref(t->re_text);
00242 }
00243 t->re_text = dupnode(t1);
00244 free_temp(t1);
00245 }
00246 if (t->re_reg != NULL)
00247 refree(t->re_reg);
00248 if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) {
00249 t1 = force_string(tree_eval(t->re_exp));
00250 unref(t->re_text);
00251 t->re_text = dupnode(t1);
00252 free_temp(t1);
00253 }
00254 t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
00255 IGNORECASE);
00256 t->re_flags &= ~CASE;
00257 t->re_flags |= IGNORECASE;
00258 return t->re_reg;
00259 }
00260
00261
00262
00263 void
00264 resetup()
00265 {
00266 if (do_posix)
00267 syn = RE_SYNTAX_POSIX_AWK;
00268 else if (do_traditional)
00269 syn = RE_SYNTAX_AWK;
00270 else
00271 syn = RE_SYNTAX_GNU_AWK;
00272
00273
00274
00275
00276
00277 if (do_intervals)
00278 syn |= RE_INTERVALS;
00279
00280 (void) re_set_syntax(syn);
00281 }
00282
00283
00284
00285 int
00286 reisstring(const char *text, size_t len, Regexp *re, const char *buf)
00287 {
00288 static char metas[] = ".*+(){}[]|?^$\\";
00289 int i;
00290 int res;
00291 const char *matched;
00292
00293
00294 for (i = 0; i < len; i++) {
00295 if (strchr(metas, text[i]) != NULL) {
00296 return FALSE;
00297 }
00298 }
00299
00300
00301 matched = &buf[RESTART(re, buf)];
00302
00303 res = STREQN(text, matched, len);
00304
00305 return res;
00306 }
00307
00308
00309
00310 int
00311 remaybelong(const char *text, size_t len)
00312 {
00313 while (len--) {
00314 if (strchr("*+|?", *text++) != NULL) {
00315 return TRUE;
00316 }
00317 }
00318
00319 return FALSE;
00320 }
00321
00322
00323
00324 const char *
00325 reflags2str(int flagval)
00326 {
00327 static const struct flagtab values[] = {
00328 { RE_BACKSLASH_ESCAPE_IN_LISTS, "RE_BACKSLASH_ESCAPE_IN_LISTS" },
00329 { RE_BK_PLUS_QM, "RE_BK_PLUS_QM" },
00330 { RE_CHAR_CLASSES, "RE_CHAR_CLASSES" },
00331 { RE_CONTEXT_INDEP_ANCHORS, "RE_CONTEXT_INDEP_ANCHORS" },
00332 { RE_CONTEXT_INDEP_OPS, "RE_CONTEXT_INDEP_OPS" },
00333 { RE_CONTEXT_INVALID_OPS, "RE_CONTEXT_INVALID_OPS" },
00334 { RE_DOT_NEWLINE, "RE_DOT_NEWLINE" },
00335 { RE_DOT_NOT_NULL, "RE_DOT_NOT_NULL" },
00336 { RE_HAT_LISTS_NOT_NEWLINE, "RE_HAT_LISTS_NOT_NEWLINE" },
00337 { RE_INTERVALS, "RE_INTERVALS" },
00338 { RE_LIMITED_OPS, "RE_LIMITED_OPS" },
00339 { RE_NEWLINE_ALT, "RE_NEWLINE_ALT" },
00340 { RE_NO_BK_BRACES, "RE_NO_BK_BRACES" },
00341 { RE_NO_BK_PARENS, "RE_NO_BK_PARENS" },
00342 { RE_NO_BK_REFS, "RE_NO_BK_REFS" },
00343 { RE_NO_BK_VBAR, "RE_NO_BK_VBAR" },
00344 { RE_NO_EMPTY_RANGES, "RE_NO_EMPTY_RANGES" },
00345 { RE_UNMATCHED_RIGHT_PAREN_ORD, "RE_UNMATCHED_RIGHT_PAREN_ORD" },
00346 { RE_NO_POSIX_BACKTRACKING, "RE_NO_POSIX_BACKTRACKING" },
00347 { RE_NO_GNU_OPS, "RE_NO_GNU_OPS" },
00348 { RE_DEBUG, "RE_DEBUG" },
00349 { RE_INVALID_INTERVAL_ORD, "RE_INVALID_INTERVAL_ORD" },
00350 { RE_ICASE, "RE_ICASE" },
00351 { 0, NULL },
00352 };
00353
00354 return genflags2str(flagval, values);
00355 }