00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 static void re_string_construct_common _RE_ARGS((const char *str, int len,
00022 re_string_t *pstr,
00023 RE_TRANSLATE_TYPE trans, int icase));
00024 #ifdef RE_ENABLE_I18N
00025 static int re_string_skip_chars _RE_ARGS((re_string_t *pstr, int new_raw_idx,
00026 wint_t *last_wc));
00027 #endif
00028 static re_dfastate_t *create_newstate_common _RE_ARGS((re_dfa_t *dfa,
00029 const re_node_set *nodes,
00030 unsigned int hash));
00031 static reg_errcode_t register_state _RE_ARGS((re_dfa_t *dfa, re_dfastate_t *newstate,
00032 unsigned int hash));
00033 static re_dfastate_t *create_ci_newstate _RE_ARGS((re_dfa_t *dfa,
00034 const re_node_set *nodes,
00035 unsigned int hash));
00036 static re_dfastate_t *create_cd_newstate _RE_ARGS((re_dfa_t *dfa,
00037 const re_node_set *nodes,
00038 unsigned int context,
00039 unsigned int hash));
00040 static inline unsigned int calc_state_hash _RE_ARGS((const re_node_set *nodes,
00041 unsigned int context));
00042
00043
00044
00045
00046
00047
00048 static reg_errcode_t
00049 re_string_allocate (pstr, str, len, init_len, trans, icase)
00050 re_string_t *pstr;
00051 const char *str;
00052 int len, init_len, icase;
00053 RE_TRANSLATE_TYPE trans;
00054 {
00055 reg_errcode_t ret;
00056 int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
00057 re_string_construct_common (str, len, pstr, trans, icase);
00058 pstr->stop = pstr->len;
00059
00060 ret = re_string_realloc_buffers (pstr, init_buf_len);
00061 if (BE (ret != REG_NOERROR, 0))
00062 return ret;
00063
00064 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
00065 : (unsigned char *) str);
00066 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
00067 pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
00068 #ifdef RE_ENABLE_I18N
00069 || re_mb_cur_max > 1
00070 #endif
00071 ) ? pstr->valid_len : len;
00072 return REG_NOERROR;
00073 }
00074
00075
00076
00077 static reg_errcode_t
00078 re_string_construct (pstr, str, len, trans, icase)
00079 re_string_t *pstr;
00080 const char *str;
00081 int len, icase;
00082 RE_TRANSLATE_TYPE trans;
00083 {
00084 reg_errcode_t ret;
00085 re_string_construct_common (str, len, pstr, trans, icase);
00086 pstr->stop = pstr->len;
00087
00088 pstr->valid_len = 0;
00089
00090 if (len > 0)
00091 {
00092 ret = re_string_realloc_buffers (pstr, len + 1);
00093 if (BE (ret != REG_NOERROR, 0))
00094 return ret;
00095 }
00096 pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
00097 : (unsigned char *) str);
00098 pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
00099
00100 if (icase)
00101 {
00102 #ifdef RE_ENABLE_I18N
00103 if (re_mb_cur_max > 1)
00104 build_wcs_upper_buffer (pstr);
00105 else
00106 #endif
00107 build_upper_buffer (pstr);
00108 }
00109 else
00110 {
00111 #ifdef RE_ENABLE_I18N
00112 if (re_mb_cur_max > 1)
00113 build_wcs_buffer (pstr);
00114 else
00115 #endif
00116 {
00117 if (trans != NULL)
00118 re_string_translate_buffer (pstr);
00119 else
00120 pstr->valid_len = len;
00121 }
00122 }
00123
00124
00125 pstr->valid_len = pstr->bufs_len;
00126 return REG_NOERROR;
00127 }
00128
00129
00130
00131 static reg_errcode_t
00132 re_string_realloc_buffers (pstr, new_buf_len)
00133 re_string_t *pstr;
00134 int new_buf_len;
00135 {
00136 #ifdef RE_ENABLE_I18N
00137 if (re_mb_cur_max > 1)
00138 {
00139 wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
00140 if (BE (new_array == NULL, 0))
00141 return REG_ESPACE;
00142 pstr->wcs = new_array;
00143 }
00144 #endif
00145 if (MBS_ALLOCATED (pstr))
00146 {
00147 unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
00148 new_buf_len);
00149 if (BE (new_array == NULL, 0))
00150 return REG_ESPACE;
00151 pstr->mbs = new_array;
00152 }
00153 if (MBS_CASE_ALLOCATED (pstr))
00154 {
00155 unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
00156 new_buf_len);
00157 if (BE (new_array == NULL, 0))
00158 return REG_ESPACE;
00159 pstr->mbs_case = new_array;
00160 if (!MBS_ALLOCATED (pstr))
00161 pstr->mbs = pstr->mbs_case;
00162 }
00163 pstr->bufs_len = new_buf_len;
00164 return REG_NOERROR;
00165 }
00166
00167
00168 static void
00169 re_string_construct_common (str, len, pstr, trans, icase)
00170 const char *str;
00171 int len;
00172 re_string_t *pstr;
00173 RE_TRANSLATE_TYPE trans;
00174 int icase;
00175 {
00176 memset (pstr, '\0', sizeof (re_string_t));
00177 pstr->raw_mbs = (const unsigned char *) str;
00178 pstr->len = len;
00179 pstr->trans = trans;
00180 pstr->icase = icase ? 1 : 0;
00181 }
00182
00183 #ifdef RE_ENABLE_I18N
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196 static void
00197 build_wcs_buffer (pstr)
00198 re_string_t *pstr;
00199 {
00200 mbstate_t prev_st;
00201 int byte_idx, end_idx, mbclen, remain_len;
00202
00203
00204 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
00205 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
00206 {
00207 wchar_t wc;
00208 remain_len = end_idx - byte_idx;
00209 prev_st = pstr->cur_state;
00210 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
00211 + byte_idx), remain_len, &pstr->cur_state);
00212 if (BE (mbclen == (size_t) -2, 0))
00213 {
00214
00215 pstr->cur_state = prev_st;
00216 break;
00217 }
00218 else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
00219 {
00220
00221 mbclen = 1;
00222 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
00223 pstr->cur_state = prev_st;
00224 }
00225
00226
00227 if (pstr->trans != NULL && mbclen == 1)
00228 {
00229 int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
00230 pstr->mbs_case[byte_idx] = ch;
00231 }
00232
00233 pstr->wcs[byte_idx++] = wc;
00234
00235 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
00236 pstr->wcs[byte_idx++] = WEOF;
00237 }
00238 pstr->valid_len = byte_idx;
00239 }
00240
00241
00242
00243
00244 static void
00245 build_wcs_upper_buffer (pstr)
00246 re_string_t *pstr;
00247 {
00248 mbstate_t prev_st;
00249 int byte_idx, end_idx, mbclen, remain_len;
00250
00251
00252 end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
00253 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
00254 {
00255 wchar_t wc;
00256 remain_len = end_idx - byte_idx;
00257 prev_st = pstr->cur_state;
00258 mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
00259 + byte_idx), remain_len, &pstr->cur_state);
00260 if (BE (mbclen == (size_t) -2, 0))
00261 {
00262
00263 pstr->cur_state = prev_st;
00264 break;
00265 }
00266 else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
00267 {
00268
00269 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
00270
00271 if (pstr->trans != NULL && mbclen == 1)
00272 {
00273 ch = pstr->trans[ch];
00274 pstr->mbs_case[byte_idx] = ch;
00275 }
00276 pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
00277 pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
00278 if (BE (mbclen == (size_t) -1, 0))
00279 pstr->cur_state = prev_st;
00280 }
00281 else
00282 {
00283 if (iswlower (wc))
00284 wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
00285 else
00286 memcpy (pstr->mbs + byte_idx,
00287 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
00288 pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
00289
00290 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
00291 pstr->wcs[byte_idx++] = WEOF;
00292 }
00293 }
00294 pstr->valid_len = byte_idx;
00295 }
00296
00297
00298
00299
00300 static int
00301 re_string_skip_chars (pstr, new_raw_idx, last_wc)
00302 re_string_t *pstr;
00303 int new_raw_idx;
00304 wint_t *last_wc;
00305 {
00306 mbstate_t prev_st;
00307 int rawbuf_idx, mbclen;
00308 wchar_t wc = 0;
00309
00310
00311 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
00312 rawbuf_idx < new_raw_idx;)
00313 {
00314 int remain_len;
00315 remain_len = pstr->len - rawbuf_idx;
00316 prev_st = pstr->cur_state;
00317 mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
00318 remain_len, &pstr->cur_state);
00319 if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
00320 {
00321
00322 mbclen = 1;
00323 pstr->cur_state = prev_st;
00324 }
00325
00326 rawbuf_idx += mbclen;
00327 }
00328 *last_wc = (wint_t) wc;
00329 return rawbuf_idx;
00330 }
00331 #endif
00332
00333
00334
00335
00336 static void
00337 build_upper_buffer (pstr)
00338 re_string_t *pstr;
00339 {
00340 int char_idx, end_idx;
00341 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
00342
00343 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
00344 {
00345 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
00346 if (pstr->trans != NULL)
00347 {
00348 ch = pstr->trans[ch];
00349 pstr->mbs_case[char_idx] = ch;
00350 }
00351 if (islower (ch))
00352 pstr->mbs[char_idx] = toupper (ch);
00353 else
00354 pstr->mbs[char_idx] = ch;
00355 }
00356 pstr->valid_len = char_idx;
00357 }
00358
00359
00360
00361 static void
00362 re_string_translate_buffer (pstr)
00363 re_string_t *pstr;
00364 {
00365 int buf_idx, end_idx;
00366 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
00367
00368 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
00369 {
00370 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
00371 pstr->mbs_case[buf_idx] = pstr->trans[ch];
00372 }
00373
00374 pstr->valid_len = buf_idx;
00375 }
00376
00377
00378
00379
00380
00381 static reg_errcode_t
00382 re_string_reconstruct (pstr, idx, eflags, newline)
00383 re_string_t *pstr;
00384 int idx, eflags, newline;
00385 {
00386 int offset = idx - pstr->raw_mbs_idx;
00387 if (offset < 0)
00388 {
00389
00390 #ifdef RE_ENABLE_I18N
00391 if (re_mb_cur_max > 1)
00392 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
00393 #endif
00394 pstr->len += pstr->raw_mbs_idx;
00395 pstr->stop += pstr->raw_mbs_idx;
00396 pstr->valid_len = pstr->raw_mbs_idx = 0;
00397 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
00398 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
00399 if (!MBS_CASE_ALLOCATED (pstr))
00400 pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
00401 if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
00402 pstr->mbs = (unsigned char *) pstr->raw_mbs;
00403 offset = idx;
00404 }
00405
00406 if (offset != 0)
00407 {
00408
00409 if (offset < pstr->valid_len)
00410 {
00411
00412 pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
00413 newline);
00414 #ifdef RE_ENABLE_I18N
00415 if (re_mb_cur_max > 1)
00416 memmove (pstr->wcs, pstr->wcs + offset,
00417 (pstr->valid_len - offset) * sizeof (wint_t));
00418 #endif
00419 if (MBS_ALLOCATED (pstr))
00420 memmove (pstr->mbs, pstr->mbs + offset,
00421 pstr->valid_len - offset);
00422 if (MBS_CASE_ALLOCATED (pstr))
00423 memmove (pstr->mbs_case, pstr->mbs_case + offset,
00424 pstr->valid_len - offset);
00425 pstr->valid_len -= offset;
00426 #if DEBUG
00427 assert (pstr->valid_len > 0);
00428 #endif
00429 }
00430 else
00431 {
00432
00433 pstr->valid_len = 0;
00434 #ifdef RE_ENABLE_I18N
00435 if (re_mb_cur_max > 1)
00436 {
00437 int wcs_idx;
00438 wint_t wc;
00439 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
00440 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
00441 pstr->wcs[wcs_idx] = WEOF;
00442 if (pstr->trans && wc <= 0xff)
00443 wc = pstr->trans[wc];
00444 pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
00445 : ((newline && IS_WIDE_NEWLINE (wc))
00446 ? CONTEXT_NEWLINE : 0));
00447 }
00448 else
00449 #endif
00450 {
00451 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
00452 if (pstr->trans)
00453 c = pstr->trans[c];
00454 pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
00455 : ((newline && IS_NEWLINE (c))
00456 ? CONTEXT_NEWLINE : 0));
00457 }
00458 }
00459 if (!MBS_CASE_ALLOCATED (pstr))
00460 {
00461 pstr->mbs_case += offset;
00462
00463 if (!MBS_ALLOCATED (pstr))
00464 pstr->mbs += offset;
00465 }
00466 }
00467 pstr->raw_mbs_idx = idx;
00468 pstr->len -= offset;
00469 pstr->stop -= offset;
00470
00471
00472 #ifdef RE_ENABLE_I18N
00473 if (re_mb_cur_max > 1)
00474 {
00475 if (pstr->icase)
00476 build_wcs_upper_buffer (pstr);
00477 else
00478 build_wcs_buffer (pstr);
00479 }
00480 else
00481 #endif
00482 {
00483 if (pstr->icase)
00484 build_upper_buffer (pstr);
00485 else if (pstr->trans != NULL)
00486 re_string_translate_buffer (pstr);
00487 }
00488 pstr->cur_idx = 0;
00489
00490 return REG_NOERROR;
00491 }
00492
00493 static void
00494 re_string_destruct (pstr)
00495 re_string_t *pstr;
00496 {
00497 #ifdef RE_ENABLE_I18N
00498 re_free (pstr->wcs);
00499 #endif
00500 if (MBS_ALLOCATED (pstr))
00501 re_free (pstr->mbs);
00502 if (MBS_CASE_ALLOCATED (pstr))
00503 re_free (pstr->mbs_case);
00504 }
00505
00506
00507
00508 static unsigned int
00509 re_string_context_at (input, idx, eflags, newline_anchor)
00510 const re_string_t *input;
00511 int idx, eflags, newline_anchor;
00512 {
00513 int c;
00514 if (idx < 0 || idx == input->len)
00515 {
00516 if (idx < 0)
00517
00518
00519 return input->tip_context;
00520 else
00521 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
00522 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
00523 }
00524 #ifdef RE_ENABLE_I18N
00525 if (re_mb_cur_max > 1)
00526 {
00527 wint_t wc;
00528 int wc_idx = idx;
00529 while(input->wcs[wc_idx] == WEOF)
00530 {
00531 #ifdef DEBUG
00532
00533 assert (wc_idx >= 0);
00534 #endif
00535 --wc_idx;
00536 if (wc_idx < 0)
00537 return input->tip_context;
00538 }
00539 wc = input->wcs[wc_idx];
00540 if (IS_WIDE_WORD_CHAR (wc))
00541 return CONTEXT_WORD;
00542 return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
00543 }
00544 else
00545 #endif
00546 {
00547 c = re_string_byte_at (input, idx);
00548 if (IS_WORD_CHAR (c))
00549 return CONTEXT_WORD;
00550 return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
00551 }
00552 }
00553
00554
00555
00556 static reg_errcode_t
00557 re_node_set_alloc (set, size)
00558 re_node_set *set;
00559 int size;
00560 {
00561 set->alloc = size;
00562 set->nelem = 0;
00563 set->elems = re_malloc (int, size);
00564 if (BE (set->elems == NULL, 0))
00565 return REG_ESPACE;
00566 return REG_NOERROR;
00567 }
00568
00569 static reg_errcode_t
00570 re_node_set_init_1 (set, elem)
00571 re_node_set *set;
00572 int elem;
00573 {
00574 set->alloc = 1;
00575 set->nelem = 1;
00576 set->elems = re_malloc (int, 1);
00577 if (BE (set->elems == NULL, 0))
00578 {
00579 set->alloc = set->nelem = 0;
00580 return REG_ESPACE;
00581 }
00582 set->elems[0] = elem;
00583 return REG_NOERROR;
00584 }
00585
00586 static reg_errcode_t
00587 re_node_set_init_2 (set, elem1, elem2)
00588 re_node_set *set;
00589 int elem1, elem2;
00590 {
00591 set->alloc = 2;
00592 set->elems = re_malloc (int, 2);
00593 if (BE (set->elems == NULL, 0))
00594 return REG_ESPACE;
00595 if (elem1 == elem2)
00596 {
00597 set->nelem = 1;
00598 set->elems[0] = elem1;
00599 }
00600 else
00601 {
00602 set->nelem = 2;
00603 if (elem1 < elem2)
00604 {
00605 set->elems[0] = elem1;
00606 set->elems[1] = elem2;
00607 }
00608 else
00609 {
00610 set->elems[0] = elem2;
00611 set->elems[1] = elem1;
00612 }
00613 }
00614 return REG_NOERROR;
00615 }
00616
00617 static reg_errcode_t
00618 re_node_set_init_copy (dest, src)
00619 re_node_set *dest;
00620 const re_node_set *src;
00621 {
00622 dest->nelem = src->nelem;
00623 if (src->nelem > 0)
00624 {
00625 dest->alloc = dest->nelem;
00626 dest->elems = re_malloc (int, dest->alloc);
00627 if (BE (dest->elems == NULL, 0))
00628 {
00629 dest->alloc = dest->nelem = 0;
00630 return REG_ESPACE;
00631 }
00632 memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
00633 }
00634 else
00635 re_node_set_init_empty (dest);
00636 return REG_NOERROR;
00637 }
00638
00639
00640
00641
00642
00643 static reg_errcode_t
00644 re_node_set_add_intersect (dest, src1, src2)
00645 re_node_set *dest;
00646 const re_node_set *src1, *src2;
00647 {
00648 int i1, i2, id;
00649 if (src1->nelem > 0 && src2->nelem > 0)
00650 {
00651 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
00652 {
00653 dest->alloc = src1->nelem + src2->nelem + dest->nelem;
00654 dest->elems = re_realloc (dest->elems, int, dest->alloc);
00655 if (BE (dest->elems == NULL, 0))
00656 return REG_ESPACE;
00657 }
00658 }
00659 else
00660 return REG_NOERROR;
00661
00662 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
00663 {
00664 if (src1->elems[i1] > src2->elems[i2])
00665 {
00666 ++i2;
00667 continue;
00668 }
00669 if (src1->elems[i1] == src2->elems[i2])
00670 {
00671 while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
00672 ++id;
00673 if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
00674 ++id;
00675 else
00676 {
00677 memmove (dest->elems + id + 1, dest->elems + id,
00678 sizeof (int) * (dest->nelem - id));
00679 dest->elems[id++] = src2->elems[i2++];
00680 ++dest->nelem;
00681 }
00682 }
00683 ++i1;
00684 }
00685 return REG_NOERROR;
00686 }
00687
00688
00689
00690
00691 static reg_errcode_t
00692 re_node_set_init_union (dest, src1, src2)
00693 re_node_set *dest;
00694 const re_node_set *src1, *src2;
00695 {
00696 int i1, i2, id;
00697 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
00698 {
00699 dest->alloc = src1->nelem + src2->nelem;
00700 dest->elems = re_malloc (int, dest->alloc);
00701 if (BE (dest->elems == NULL, 0))
00702 return REG_ESPACE;
00703 }
00704 else
00705 {
00706 if (src1 != NULL && src1->nelem > 0)
00707 return re_node_set_init_copy (dest, src1);
00708 else if (src2 != NULL && src2->nelem > 0)
00709 return re_node_set_init_copy (dest, src2);
00710 else
00711 re_node_set_init_empty (dest);
00712 return REG_NOERROR;
00713 }
00714 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
00715 {
00716 if (src1->elems[i1] > src2->elems[i2])
00717 {
00718 dest->elems[id++] = src2->elems[i2++];
00719 continue;
00720 }
00721 if (src1->elems[i1] == src2->elems[i2])
00722 ++i2;
00723 dest->elems[id++] = src1->elems[i1++];
00724 }
00725 if (i1 < src1->nelem)
00726 {
00727 memcpy (dest->elems + id, src1->elems + i1,
00728 (src1->nelem - i1) * sizeof (int));
00729 id += src1->nelem - i1;
00730 }
00731 else if (i2 < src2->nelem)
00732 {
00733 memcpy (dest->elems + id, src2->elems + i2,
00734 (src2->nelem - i2) * sizeof (int));
00735 id += src2->nelem - i2;
00736 }
00737 dest->nelem = id;
00738 return REG_NOERROR;
00739 }
00740
00741
00742
00743
00744 static reg_errcode_t
00745 re_node_set_merge (dest, src)
00746 re_node_set *dest;
00747 const re_node_set *src;
00748 {
00749 int si, di;
00750 if (src == NULL || src->nelem == 0)
00751 return REG_NOERROR;
00752 if (dest->alloc < src->nelem + dest->nelem)
00753 {
00754 int *new_buffer;
00755 dest->alloc = 2 * (src->nelem + dest->alloc);
00756 new_buffer = re_realloc (dest->elems, int, dest->alloc);
00757 if (BE (new_buffer == NULL, 0))
00758 return REG_ESPACE;
00759 dest->elems = new_buffer;
00760 }
00761
00762 for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
00763 {
00764 int cp_from, ncp, mid, right, src_elem = src->elems[si];
00765
00766 right = dest->nelem;
00767 while (di < right)
00768 {
00769 mid = (di + right) / 2;
00770 if (dest->elems[mid] < src_elem)
00771 di = mid + 1;
00772 else
00773 right = mid;
00774 }
00775 if (di >= dest->nelem)
00776 break;
00777
00778 if (dest->elems[di] == src_elem)
00779 {
00780
00781 ++di;
00782 ++si;
00783 continue;
00784 }
00785
00786
00787 cp_from = si;
00788 while (si < src->nelem && src->elems[si] < dest->elems[di])
00789 ++si;
00790
00791 ncp = si - cp_from;
00792 memmove (dest->elems + di + ncp, dest->elems + di,
00793 sizeof (int) * (dest->nelem - di));
00794 memcpy (dest->elems + di, src->elems + cp_from,
00795 sizeof (int) * ncp);
00796
00797 di += ncp;
00798 dest->nelem += ncp;
00799 }
00800
00801
00802 if (si < src->nelem)
00803 {
00804 memcpy (dest->elems + di, src->elems + si,
00805 sizeof (int) * (src->nelem - si));
00806 dest->nelem += src->nelem - si;
00807 }
00808 return REG_NOERROR;
00809 }
00810
00811
00812
00813
00814
00815 static int
00816 re_node_set_insert (set, elem)
00817 re_node_set *set;
00818 int elem;
00819 {
00820 int idx, right, mid;
00821
00822 if (set->elems == NULL || set->alloc == 0)
00823 {
00824 if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
00825 return 1;
00826 else
00827 return -1;
00828 }
00829
00830
00831 idx = 0;
00832 right = set->nelem;
00833 while (idx < right)
00834 {
00835 mid = (idx + right) / 2;
00836 if (set->elems[mid] < elem)
00837 idx = mid + 1;
00838 else
00839 right = mid;
00840 }
00841
00842
00843 if (set->alloc < set->nelem + 1)
00844 {
00845 int *new_array;
00846 set->alloc = set->alloc * 2;
00847 new_array = re_malloc (int, set->alloc);
00848 if (BE (new_array == NULL, 0))
00849 return -1;
00850
00851 if (idx > 0)
00852 memcpy (new_array, set->elems, sizeof (int) * (idx));
00853
00854 if (set->nelem - idx > 0)
00855 memcpy (new_array + idx + 1, set->elems + idx,
00856 sizeof (int) * (set->nelem - idx));
00857 re_free (set->elems);
00858 set->elems = new_array;
00859 }
00860 else
00861 {
00862
00863 if (set->nelem - idx > 0)
00864 memmove (set->elems + idx + 1, set->elems + idx,
00865 sizeof (int) * (set->nelem - idx));
00866 }
00867
00868 set->elems[idx] = elem;
00869 ++set->nelem;
00870 return 1;
00871 }
00872
00873
00874
00875
00876 static int
00877 re_node_set_compare (set1, set2)
00878 const re_node_set *set1, *set2;
00879 {
00880 int i;
00881 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
00882 return 0;
00883 for (i = 0 ; i < set1->nelem ; i++)
00884 if (set1->elems[i] != set2->elems[i])
00885 return 0;
00886 return 1;
00887 }
00888
00889
00890
00891 static int
00892 re_node_set_contains (set, elem)
00893 const re_node_set *set;
00894 int elem;
00895 {
00896 int idx, right, mid;
00897 if (set->nelem <= 0)
00898 return 0;
00899
00900
00901 idx = 0;
00902 right = set->nelem - 1;
00903 while (idx < right)
00904 {
00905 mid = (idx + right) / 2;
00906 if (set->elems[mid] < elem)
00907 idx = mid + 1;
00908 else
00909 right = mid;
00910 }
00911 return set->elems[idx] == elem ? idx + 1 : 0;
00912 }
00913
00914 static void
00915 re_node_set_remove_at (set, idx)
00916 re_node_set *set;
00917 int idx;
00918 {
00919 if (idx < 0 || idx >= set->nelem)
00920 return;
00921 if (idx < set->nelem - 1)
00922 memmove (set->elems + idx, set->elems + idx + 1,
00923 sizeof (int) * (set->nelem - idx - 1));
00924 --set->nelem;
00925 }
00926
00927
00928
00929
00930
00931 static int
00932 re_dfa_add_node (dfa, token, mode)
00933 re_dfa_t *dfa;
00934 re_token_t token;
00935 int mode;
00936 {
00937 if (dfa->nodes_len >= dfa->nodes_alloc)
00938 {
00939 re_token_t *new_array;
00940 dfa->nodes_alloc *= 2;
00941 new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
00942 if (BE (new_array == NULL, 0))
00943 return -1;
00944 else
00945 dfa->nodes = new_array;
00946 if (mode)
00947 {
00948 int *new_nexts, *new_indices;
00949 re_node_set *new_edests, *new_eclosures, *new_inveclosures;
00950
00951 new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
00952 new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
00953 new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
00954 new_eclosures = re_realloc (dfa->eclosures, re_node_set,
00955 dfa->nodes_alloc);
00956 new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
00957 dfa->nodes_alloc);
00958 if (BE (new_nexts == NULL || new_indices == NULL
00959 || new_edests == NULL || new_eclosures == NULL
00960 || new_inveclosures == NULL, 0))
00961 return -1;
00962 dfa->nexts = new_nexts;
00963 dfa->org_indices = new_indices;
00964 dfa->edests = new_edests;
00965 dfa->eclosures = new_eclosures;
00966 dfa->inveclosures = new_inveclosures;
00967 }
00968 }
00969 dfa->nodes[dfa->nodes_len] = token;
00970 dfa->nodes[dfa->nodes_len].duplicated = 0;
00971 dfa->nodes[dfa->nodes_len].constraint = 0;
00972 return dfa->nodes_len++;
00973 }
00974
00975 static unsigned int inline
00976 calc_state_hash (nodes, context)
00977 const re_node_set *nodes;
00978 unsigned int context;
00979 {
00980 unsigned int hash = nodes->nelem + context;
00981 int i;
00982 for (i = 0 ; i < nodes->nelem ; i++)
00983 hash += nodes->elems[i];
00984 return hash;
00985 }
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996 static re_dfastate_t*
00997 re_acquire_state (err, dfa, nodes)
00998 reg_errcode_t *err;
00999 re_dfa_t *dfa;
01000 const re_node_set *nodes;
01001 {
01002 unsigned int hash;
01003 re_dfastate_t *new_state;
01004 struct re_state_table_entry *spot;
01005 int i;
01006 if (BE (nodes->nelem == 0, 0))
01007 {
01008 *err = REG_NOERROR;
01009 return NULL;
01010 }
01011 hash = calc_state_hash (nodes, 0);
01012 spot = dfa->state_table + (hash & dfa->state_hash_mask);
01013
01014 for (i = 0 ; i < spot->num ; i++)
01015 {
01016 re_dfastate_t *state = spot->array[i];
01017 if (hash != state->hash)
01018 continue;
01019 if (re_node_set_compare (&state->nodes, nodes))
01020 return state;
01021 }
01022
01023
01024 new_state = create_ci_newstate (dfa, nodes, hash);
01025 if (BE (new_state != NULL, 1))
01026 return new_state;
01027 else
01028 {
01029 *err = REG_ESPACE;
01030 return NULL;
01031 }
01032 }
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044 static re_dfastate_t*
01045 re_acquire_state_context (err, dfa, nodes, context)
01046 reg_errcode_t *err;
01047 re_dfa_t *dfa;
01048 const re_node_set *nodes;
01049 unsigned int context;
01050 {
01051 unsigned int hash;
01052 re_dfastate_t *new_state;
01053 struct re_state_table_entry *spot;
01054 int i;
01055 if (nodes->nelem == 0)
01056 {
01057 *err = REG_NOERROR;
01058 return NULL;
01059 }
01060 hash = calc_state_hash (nodes, context);
01061 spot = dfa->state_table + (hash & dfa->state_hash_mask);
01062
01063 for (i = 0 ; i < spot->num ; i++)
01064 {
01065 re_dfastate_t *state = spot->array[i];
01066 if (hash != state->hash)
01067 continue;
01068 if (re_node_set_compare (state->entrance_nodes, nodes)
01069 && state->context == context)
01070 return state;
01071 }
01072
01073 new_state = create_cd_newstate (dfa, nodes, context, hash);
01074 if (BE (new_state != NULL, 1))
01075 return new_state;
01076 else
01077 {
01078 *err = REG_ESPACE;
01079 return NULL;
01080 }
01081 }
01082
01083
01084
01085
01086 static re_dfastate_t *
01087 create_newstate_common (dfa, nodes, hash)
01088 re_dfa_t *dfa;
01089 const re_node_set *nodes;
01090 unsigned int hash;
01091 {
01092 re_dfastate_t *newstate;
01093 reg_errcode_t err;
01094 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
01095 if (BE (newstate == NULL, 0))
01096 return NULL;
01097 err = re_node_set_init_copy (&newstate->nodes, nodes);
01098 if (BE (err != REG_NOERROR, 0))
01099 {
01100 re_free (newstate);
01101 return NULL;
01102 }
01103 newstate->trtable = NULL;
01104 newstate->trtable_search = NULL;
01105 newstate->hash = hash;
01106 return newstate;
01107 }
01108
01109
01110
01111
01112 static reg_errcode_t
01113 register_state (dfa, newstate, hash)
01114 re_dfa_t *dfa;
01115 re_dfastate_t *newstate;
01116 unsigned int hash;
01117 {
01118 struct re_state_table_entry *spot;
01119 spot = dfa->state_table + (hash & dfa->state_hash_mask);
01120
01121 if (spot->alloc <= spot->num)
01122 {
01123 re_dfastate_t **new_array;
01124 spot->alloc = 2 * spot->num + 2;
01125 new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
01126 if (BE (new_array == NULL, 0))
01127 return REG_ESPACE;
01128 spot->array = new_array;
01129 }
01130 spot->array[spot->num++] = newstate;
01131 return REG_NOERROR;
01132 }
01133
01134
01135
01136
01137 static re_dfastate_t *
01138 create_ci_newstate (dfa, nodes, hash)
01139 re_dfa_t *dfa;
01140 const re_node_set *nodes;
01141 unsigned int hash;
01142 {
01143 int i;
01144 reg_errcode_t err;
01145 re_dfastate_t *newstate;
01146 newstate = create_newstate_common (dfa, nodes, hash);
01147 if (BE (newstate == NULL, 0))
01148 return NULL;
01149 newstate->entrance_nodes = &newstate->nodes;
01150
01151 for (i = 0 ; i < nodes->nelem ; i++)
01152 {
01153 re_token_t *node = dfa->nodes + nodes->elems[i];
01154 re_token_type_t type = node->type;
01155 if (type == CHARACTER && !node->constraint)
01156 continue;
01157
01158
01159 else if (type == END_OF_RE)
01160 newstate->halt = 1;
01161 #ifdef RE_ENABLE_I18N
01162 else if (type == COMPLEX_BRACKET
01163 || (type == OP_PERIOD && re_mb_cur_max > 1))
01164 newstate->accept_mb = 1;
01165 #endif
01166 else if (type == OP_BACK_REF)
01167 newstate->has_backref = 1;
01168 else if (type == ANCHOR || node->constraint)
01169 newstate->has_constraint = 1;
01170 }
01171 err = register_state (dfa, newstate, hash);
01172 if (BE (err != REG_NOERROR, 0))
01173 {
01174 free_state (newstate);
01175 newstate = NULL;
01176 }
01177 return newstate;
01178 }
01179
01180
01181
01182
01183 static re_dfastate_t *
01184 create_cd_newstate (dfa, nodes, context, hash)
01185 re_dfa_t *dfa;
01186 const re_node_set *nodes;
01187 unsigned int context, hash;
01188 {
01189 int i, nctx_nodes = 0;
01190 reg_errcode_t err;
01191 re_dfastate_t *newstate;
01192
01193 newstate = create_newstate_common (dfa, nodes, hash);
01194 if (BE (newstate == NULL, 0))
01195 return NULL;
01196 newstate->context = context;
01197 newstate->entrance_nodes = &newstate->nodes;
01198
01199 for (i = 0 ; i < nodes->nelem ; i++)
01200 {
01201 unsigned int constraint = 0;
01202 re_token_t *node = dfa->nodes + nodes->elems[i];
01203 re_token_type_t type = node->type;
01204 if (node->constraint)
01205 constraint = node->constraint;
01206
01207 if (type == CHARACTER && !constraint)
01208 continue;
01209
01210 else if (type == END_OF_RE)
01211 newstate->halt = 1;
01212 #ifdef RE_ENABLE_I18N
01213 else if (type == COMPLEX_BRACKET
01214 || (type == OP_PERIOD && re_mb_cur_max > 1))
01215 newstate->accept_mb = 1;
01216 #endif
01217 else if (type == OP_BACK_REF)
01218 newstate->has_backref = 1;
01219 else if (type == ANCHOR)
01220 constraint = node->opr.ctx_type;
01221
01222 if (constraint)
01223 {
01224 if (newstate->entrance_nodes == &newstate->nodes)
01225 {
01226 newstate->entrance_nodes = re_malloc (re_node_set, 1);
01227 if (BE (newstate->entrance_nodes == NULL, 0))
01228 {
01229 free_state (newstate);
01230 return NULL;
01231 }
01232 re_node_set_init_copy (newstate->entrance_nodes, nodes);
01233 nctx_nodes = 0;
01234 newstate->has_constraint = 1;
01235 }
01236
01237 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
01238 {
01239 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
01240 ++nctx_nodes;
01241 }
01242 }
01243 }
01244 err = register_state (dfa, newstate, hash);
01245 if (BE (err != REG_NOERROR, 0))
01246 {
01247 free_state (newstate);
01248 newstate = NULL;
01249 }
01250 return newstate;
01251 }
01252
01253 static void
01254 free_state (state)
01255 re_dfastate_t *state;
01256 {
01257 if (state->entrance_nodes != &state->nodes)
01258 {
01259 re_node_set_free (state->entrance_nodes);
01260 re_free (state->entrance_nodes);
01261 }
01262 re_node_set_free (&state->nodes);
01263 re_free (state->trtable);
01264 re_free (state->trtable_search);
01265 re_free (state);
01266 }