00001
00002
00003
00004
00005 #ifndef IS_INVALID_CHAR
00006 #define IS_INVALID_CHAR(enc, ptr, n) (0)
00007 #endif
00008
00009 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
00010 case BT_LEAD ## n: \
00011 if (end - ptr < n) \
00012 return XML_TOK_PARTIAL_CHAR; \
00013 if (IS_INVALID_CHAR(enc, ptr, n)) { \
00014 *(nextTokPtr) = (ptr); \
00015 return XML_TOK_INVALID; \
00016 } \
00017 ptr += n; \
00018 break;
00019
00020 #define INVALID_CASES(ptr, nextTokPtr) \
00021 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
00022 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
00023 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
00024 case BT_NONXML: \
00025 case BT_MALFORM: \
00026 case BT_TRAIL: \
00027 *(nextTokPtr) = (ptr); \
00028 return XML_TOK_INVALID;
00029
00030 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
00031 case BT_LEAD ## n: \
00032 if (end - ptr < n) \
00033 return XML_TOK_PARTIAL_CHAR; \
00034 if (!IS_NAME_CHAR(enc, ptr, n)) { \
00035 *nextTokPtr = ptr; \
00036 return XML_TOK_INVALID; \
00037 } \
00038 ptr += n; \
00039 break;
00040
00041 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
00042 case BT_NONASCII: \
00043 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
00044 *nextTokPtr = ptr; \
00045 return XML_TOK_INVALID; \
00046 } \
00047 case BT_NMSTRT: \
00048 case BT_HEX: \
00049 case BT_DIGIT: \
00050 case BT_NAME: \
00051 case BT_MINUS: \
00052 ptr += MINBPC(enc); \
00053 break; \
00054 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
00055 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
00056 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
00057
00058 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
00059 case BT_LEAD ## n: \
00060 if (end - ptr < n) \
00061 return XML_TOK_PARTIAL_CHAR; \
00062 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
00063 *nextTokPtr = ptr; \
00064 return XML_TOK_INVALID; \
00065 } \
00066 ptr += n; \
00067 break;
00068
00069 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
00070 case BT_NONASCII: \
00071 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
00072 *nextTokPtr = ptr; \
00073 return XML_TOK_INVALID; \
00074 } \
00075 case BT_NMSTRT: \
00076 case BT_HEX: \
00077 ptr += MINBPC(enc); \
00078 break; \
00079 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
00080 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
00081 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
00082
00083 #ifndef PREFIX
00084 #define PREFIX(ident) ident
00085 #endif
00086
00087
00088
00089 static int PTRCALL
00090 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
00091 const char *end, const char **nextTokPtr)
00092 {
00093 if (ptr != end) {
00094 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00095 *nextTokPtr = ptr;
00096 return XML_TOK_INVALID;
00097 }
00098 ptr += MINBPC(enc);
00099 while (ptr != end) {
00100 switch (BYTE_TYPE(enc, ptr)) {
00101 INVALID_CASES(ptr, nextTokPtr)
00102 case BT_MINUS:
00103 if ((ptr += MINBPC(enc)) == end)
00104 return XML_TOK_PARTIAL;
00105 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00106 if ((ptr += MINBPC(enc)) == end)
00107 return XML_TOK_PARTIAL;
00108 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00109 *nextTokPtr = ptr;
00110 return XML_TOK_INVALID;
00111 }
00112 *nextTokPtr = ptr + MINBPC(enc);
00113 return XML_TOK_COMMENT;
00114 }
00115 break;
00116 default:
00117 ptr += MINBPC(enc);
00118 break;
00119 }
00120 }
00121 }
00122 return XML_TOK_PARTIAL;
00123 }
00124
00125
00126
00127 static int PTRCALL
00128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
00129 const char *end, const char **nextTokPtr)
00130 {
00131 if (ptr == end)
00132 return XML_TOK_PARTIAL;
00133 switch (BYTE_TYPE(enc, ptr)) {
00134 case BT_MINUS:
00135 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00136 case BT_LSQB:
00137 *nextTokPtr = ptr + MINBPC(enc);
00138 return XML_TOK_COND_SECT_OPEN;
00139 case BT_NMSTRT:
00140 case BT_HEX:
00141 ptr += MINBPC(enc);
00142 break;
00143 default:
00144 *nextTokPtr = ptr;
00145 return XML_TOK_INVALID;
00146 }
00147 while (ptr != end) {
00148 switch (BYTE_TYPE(enc, ptr)) {
00149 case BT_PERCNT:
00150 if (ptr + MINBPC(enc) == end)
00151 return XML_TOK_PARTIAL;
00152
00153 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
00154 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
00155 *nextTokPtr = ptr;
00156 return XML_TOK_INVALID;
00157 }
00158
00159 case BT_S: case BT_CR: case BT_LF:
00160 *nextTokPtr = ptr;
00161 return XML_TOK_DECL_OPEN;
00162 case BT_NMSTRT:
00163 case BT_HEX:
00164 ptr += MINBPC(enc);
00165 break;
00166 default:
00167 *nextTokPtr = ptr;
00168 return XML_TOK_INVALID;
00169 }
00170 }
00171 return XML_TOK_PARTIAL;
00172 }
00173
00174 static int PTRCALL
00175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
00176 const char *end, int *tokPtr)
00177 {
00178 int upper = 0;
00179 *tokPtr = XML_TOK_PI;
00180 if (end - ptr != MINBPC(enc)*3)
00181 return 1;
00182 switch (BYTE_TO_ASCII(enc, ptr)) {
00183 case ASCII_x:
00184 break;
00185 case ASCII_X:
00186 upper = 1;
00187 break;
00188 default:
00189 return 1;
00190 }
00191 ptr += MINBPC(enc);
00192 switch (BYTE_TO_ASCII(enc, ptr)) {
00193 case ASCII_m:
00194 break;
00195 case ASCII_M:
00196 upper = 1;
00197 break;
00198 default:
00199 return 1;
00200 }
00201 ptr += MINBPC(enc);
00202 switch (BYTE_TO_ASCII(enc, ptr)) {
00203 case ASCII_l:
00204 break;
00205 case ASCII_L:
00206 upper = 1;
00207 break;
00208 default:
00209 return 1;
00210 }
00211 if (upper)
00212 return 0;
00213 *tokPtr = XML_TOK_XML_DECL;
00214 return 1;
00215 }
00216
00217
00218
00219 static int PTRCALL
00220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
00221 const char *end, const char **nextTokPtr)
00222 {
00223 int tok;
00224 const char *target = ptr;
00225 if (ptr == end)
00226 return XML_TOK_PARTIAL;
00227 switch (BYTE_TYPE(enc, ptr)) {
00228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00229 default:
00230 *nextTokPtr = ptr;
00231 return XML_TOK_INVALID;
00232 }
00233 while (ptr != end) {
00234 switch (BYTE_TYPE(enc, ptr)) {
00235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00236 case BT_S: case BT_CR: case BT_LF:
00237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00238 *nextTokPtr = ptr;
00239 return XML_TOK_INVALID;
00240 }
00241 ptr += MINBPC(enc);
00242 while (ptr != end) {
00243 switch (BYTE_TYPE(enc, ptr)) {
00244 INVALID_CASES(ptr, nextTokPtr)
00245 case BT_QUEST:
00246 ptr += MINBPC(enc);
00247 if (ptr == end)
00248 return XML_TOK_PARTIAL;
00249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00250 *nextTokPtr = ptr + MINBPC(enc);
00251 return tok;
00252 }
00253 break;
00254 default:
00255 ptr += MINBPC(enc);
00256 break;
00257 }
00258 }
00259 return XML_TOK_PARTIAL;
00260 case BT_QUEST:
00261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00262 *nextTokPtr = ptr;
00263 return XML_TOK_INVALID;
00264 }
00265 ptr += MINBPC(enc);
00266 if (ptr == end)
00267 return XML_TOK_PARTIAL;
00268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00269 *nextTokPtr = ptr + MINBPC(enc);
00270 return tok;
00271 }
00272
00273 default:
00274 *nextTokPtr = ptr;
00275 return XML_TOK_INVALID;
00276 }
00277 }
00278 return XML_TOK_PARTIAL;
00279 }
00280
00281 static int PTRCALL
00282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
00283 const char *end, const char **nextTokPtr)
00284 {
00285 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
00286 ASCII_T, ASCII_A, ASCII_LSQB };
00287 int i;
00288
00289 if (end - ptr < 6 * MINBPC(enc))
00290 return XML_TOK_PARTIAL;
00291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
00292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
00293 *nextTokPtr = ptr;
00294 return XML_TOK_INVALID;
00295 }
00296 }
00297 *nextTokPtr = ptr;
00298 return XML_TOK_CDATA_SECT_OPEN;
00299 }
00300
00301 static int PTRCALL
00302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
00303 const char *end, const char **nextTokPtr)
00304 {
00305 if (ptr == end)
00306 return XML_TOK_NONE;
00307 if (MINBPC(enc) > 1) {
00308 size_t n = end - ptr;
00309 if (n & (MINBPC(enc) - 1)) {
00310 n &= ~(MINBPC(enc) - 1);
00311 if (n == 0)
00312 return XML_TOK_PARTIAL;
00313 end = ptr + n;
00314 }
00315 }
00316 switch (BYTE_TYPE(enc, ptr)) {
00317 case BT_RSQB:
00318 ptr += MINBPC(enc);
00319 if (ptr == end)
00320 return XML_TOK_PARTIAL;
00321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00322 break;
00323 ptr += MINBPC(enc);
00324 if (ptr == end)
00325 return XML_TOK_PARTIAL;
00326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00327 ptr -= MINBPC(enc);
00328 break;
00329 }
00330 *nextTokPtr = ptr + MINBPC(enc);
00331 return XML_TOK_CDATA_SECT_CLOSE;
00332 case BT_CR:
00333 ptr += MINBPC(enc);
00334 if (ptr == end)
00335 return XML_TOK_PARTIAL;
00336 if (BYTE_TYPE(enc, ptr) == BT_LF)
00337 ptr += MINBPC(enc);
00338 *nextTokPtr = ptr;
00339 return XML_TOK_DATA_NEWLINE;
00340 case BT_LF:
00341 *nextTokPtr = ptr + MINBPC(enc);
00342 return XML_TOK_DATA_NEWLINE;
00343 INVALID_CASES(ptr, nextTokPtr)
00344 default:
00345 ptr += MINBPC(enc);
00346 break;
00347 }
00348 while (ptr != end) {
00349 switch (BYTE_TYPE(enc, ptr)) {
00350 #define LEAD_CASE(n) \
00351 case BT_LEAD ## n: \
00352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00353 *nextTokPtr = ptr; \
00354 return XML_TOK_DATA_CHARS; \
00355 } \
00356 ptr += n; \
00357 break;
00358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00359 #undef LEAD_CASE
00360 case BT_NONXML:
00361 case BT_MALFORM:
00362 case BT_TRAIL:
00363 case BT_CR:
00364 case BT_LF:
00365 case BT_RSQB:
00366 *nextTokPtr = ptr;
00367 return XML_TOK_DATA_CHARS;
00368 default:
00369 ptr += MINBPC(enc);
00370 break;
00371 }
00372 }
00373 *nextTokPtr = ptr;
00374 return XML_TOK_DATA_CHARS;
00375 }
00376
00377
00378
00379 static int PTRCALL
00380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
00381 const char *end, const char **nextTokPtr)
00382 {
00383 if (ptr == end)
00384 return XML_TOK_PARTIAL;
00385 switch (BYTE_TYPE(enc, ptr)) {
00386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00387 default:
00388 *nextTokPtr = ptr;
00389 return XML_TOK_INVALID;
00390 }
00391 while (ptr != end) {
00392 switch (BYTE_TYPE(enc, ptr)) {
00393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00394 case BT_S: case BT_CR: case BT_LF:
00395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00396 switch (BYTE_TYPE(enc, ptr)) {
00397 case BT_S: case BT_CR: case BT_LF:
00398 break;
00399 case BT_GT:
00400 *nextTokPtr = ptr + MINBPC(enc);
00401 return XML_TOK_END_TAG;
00402 default:
00403 *nextTokPtr = ptr;
00404 return XML_TOK_INVALID;
00405 }
00406 }
00407 return XML_TOK_PARTIAL;
00408 #ifdef XML_NS
00409 case BT_COLON:
00410
00411
00412 ptr += MINBPC(enc);
00413 break;
00414 #endif
00415 case BT_GT:
00416 *nextTokPtr = ptr + MINBPC(enc);
00417 return XML_TOK_END_TAG;
00418 default:
00419 *nextTokPtr = ptr;
00420 return XML_TOK_INVALID;
00421 }
00422 }
00423 return XML_TOK_PARTIAL;
00424 }
00425
00426
00427
00428 static int PTRCALL
00429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
00430 const char *end, const char **nextTokPtr)
00431 {
00432 if (ptr != end) {
00433 switch (BYTE_TYPE(enc, ptr)) {
00434 case BT_DIGIT:
00435 case BT_HEX:
00436 break;
00437 default:
00438 *nextTokPtr = ptr;
00439 return XML_TOK_INVALID;
00440 }
00441 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00442 switch (BYTE_TYPE(enc, ptr)) {
00443 case BT_DIGIT:
00444 case BT_HEX:
00445 break;
00446 case BT_SEMI:
00447 *nextTokPtr = ptr + MINBPC(enc);
00448 return XML_TOK_CHAR_REF;
00449 default:
00450 *nextTokPtr = ptr;
00451 return XML_TOK_INVALID;
00452 }
00453 }
00454 }
00455 return XML_TOK_PARTIAL;
00456 }
00457
00458
00459
00460 static int PTRCALL
00461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
00462 const char *end, const char **nextTokPtr)
00463 {
00464 if (ptr != end) {
00465 if (CHAR_MATCHES(enc, ptr, ASCII_x))
00466 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00467 switch (BYTE_TYPE(enc, ptr)) {
00468 case BT_DIGIT:
00469 break;
00470 default:
00471 *nextTokPtr = ptr;
00472 return XML_TOK_INVALID;
00473 }
00474 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00475 switch (BYTE_TYPE(enc, ptr)) {
00476 case BT_DIGIT:
00477 break;
00478 case BT_SEMI:
00479 *nextTokPtr = ptr + MINBPC(enc);
00480 return XML_TOK_CHAR_REF;
00481 default:
00482 *nextTokPtr = ptr;
00483 return XML_TOK_INVALID;
00484 }
00485 }
00486 }
00487 return XML_TOK_PARTIAL;
00488 }
00489
00490
00491
00492 static int PTRCALL
00493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
00494 const char **nextTokPtr)
00495 {
00496 if (ptr == end)
00497 return XML_TOK_PARTIAL;
00498 switch (BYTE_TYPE(enc, ptr)) {
00499 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00500 case BT_NUM:
00501 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00502 default:
00503 *nextTokPtr = ptr;
00504 return XML_TOK_INVALID;
00505 }
00506 while (ptr != end) {
00507 switch (BYTE_TYPE(enc, ptr)) {
00508 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00509 case BT_SEMI:
00510 *nextTokPtr = ptr + MINBPC(enc);
00511 return XML_TOK_ENTITY_REF;
00512 default:
00513 *nextTokPtr = ptr;
00514 return XML_TOK_INVALID;
00515 }
00516 }
00517 return XML_TOK_PARTIAL;
00518 }
00519
00520
00521
00522 static int PTRCALL
00523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
00524 const char **nextTokPtr)
00525 {
00526 #ifdef XML_NS
00527 int hadColon = 0;
00528 #endif
00529 while (ptr != end) {
00530 switch (BYTE_TYPE(enc, ptr)) {
00531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00532 #ifdef XML_NS
00533 case BT_COLON:
00534 if (hadColon) {
00535 *nextTokPtr = ptr;
00536 return XML_TOK_INVALID;
00537 }
00538 hadColon = 1;
00539 ptr += MINBPC(enc);
00540 if (ptr == end)
00541 return XML_TOK_PARTIAL;
00542 switch (BYTE_TYPE(enc, ptr)) {
00543 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00544 default:
00545 *nextTokPtr = ptr;
00546 return XML_TOK_INVALID;
00547 }
00548 break;
00549 #endif
00550 case BT_S: case BT_CR: case BT_LF:
00551 for (;;) {
00552 int t;
00553
00554 ptr += MINBPC(enc);
00555 if (ptr == end)
00556 return XML_TOK_PARTIAL;
00557 t = BYTE_TYPE(enc, ptr);
00558 if (t == BT_EQUALS)
00559 break;
00560 switch (t) {
00561 case BT_S:
00562 case BT_LF:
00563 case BT_CR:
00564 break;
00565 default:
00566 *nextTokPtr = ptr;
00567 return XML_TOK_INVALID;
00568 }
00569 }
00570
00571 case BT_EQUALS:
00572 {
00573 int open;
00574 #ifdef XML_NS
00575 hadColon = 0;
00576 #endif
00577 for (;;) {
00578 ptr += MINBPC(enc);
00579 if (ptr == end)
00580 return XML_TOK_PARTIAL;
00581 open = BYTE_TYPE(enc, ptr);
00582 if (open == BT_QUOT || open == BT_APOS)
00583 break;
00584 switch (open) {
00585 case BT_S:
00586 case BT_LF:
00587 case BT_CR:
00588 break;
00589 default:
00590 *nextTokPtr = ptr;
00591 return XML_TOK_INVALID;
00592 }
00593 }
00594 ptr += MINBPC(enc);
00595
00596 for (;;) {
00597 int t;
00598 if (ptr == end)
00599 return XML_TOK_PARTIAL;
00600 t = BYTE_TYPE(enc, ptr);
00601 if (t == open)
00602 break;
00603 switch (t) {
00604 INVALID_CASES(ptr, nextTokPtr)
00605 case BT_AMP:
00606 {
00607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
00608 if (tok <= 0) {
00609 if (tok == XML_TOK_INVALID)
00610 *nextTokPtr = ptr;
00611 return tok;
00612 }
00613 break;
00614 }
00615 case BT_LT:
00616 *nextTokPtr = ptr;
00617 return XML_TOK_INVALID;
00618 default:
00619 ptr += MINBPC(enc);
00620 break;
00621 }
00622 }
00623 ptr += MINBPC(enc);
00624 if (ptr == end)
00625 return XML_TOK_PARTIAL;
00626 switch (BYTE_TYPE(enc, ptr)) {
00627 case BT_S:
00628 case BT_CR:
00629 case BT_LF:
00630 break;
00631 case BT_SOL:
00632 goto sol;
00633 case BT_GT:
00634 goto gt;
00635 default:
00636 *nextTokPtr = ptr;
00637 return XML_TOK_INVALID;
00638 }
00639
00640 for (;;) {
00641 ptr += MINBPC(enc);
00642 if (ptr == end)
00643 return XML_TOK_PARTIAL;
00644 switch (BYTE_TYPE(enc, ptr)) {
00645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00646 case BT_S: case BT_CR: case BT_LF:
00647 continue;
00648 case BT_GT:
00649 gt:
00650 *nextTokPtr = ptr + MINBPC(enc);
00651 return XML_TOK_START_TAG_WITH_ATTS;
00652 case BT_SOL:
00653 sol:
00654 ptr += MINBPC(enc);
00655 if (ptr == end)
00656 return XML_TOK_PARTIAL;
00657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00658 *nextTokPtr = ptr;
00659 return XML_TOK_INVALID;
00660 }
00661 *nextTokPtr = ptr + MINBPC(enc);
00662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
00663 default:
00664 *nextTokPtr = ptr;
00665 return XML_TOK_INVALID;
00666 }
00667 break;
00668 }
00669 break;
00670 }
00671 default:
00672 *nextTokPtr = ptr;
00673 return XML_TOK_INVALID;
00674 }
00675 }
00676 return XML_TOK_PARTIAL;
00677 }
00678
00679
00680
00681 static int PTRCALL
00682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
00683 const char **nextTokPtr)
00684 {
00685 #ifdef XML_NS
00686 int hadColon;
00687 #endif
00688 if (ptr == end)
00689 return XML_TOK_PARTIAL;
00690 switch (BYTE_TYPE(enc, ptr)) {
00691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00692 case BT_EXCL:
00693 if ((ptr += MINBPC(enc)) == end)
00694 return XML_TOK_PARTIAL;
00695 switch (BYTE_TYPE(enc, ptr)) {
00696 case BT_MINUS:
00697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00698 case BT_LSQB:
00699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
00700 end, nextTokPtr);
00701 }
00702 *nextTokPtr = ptr;
00703 return XML_TOK_INVALID;
00704 case BT_QUEST:
00705 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00706 case BT_SOL:
00707 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00708 default:
00709 *nextTokPtr = ptr;
00710 return XML_TOK_INVALID;
00711 }
00712 #ifdef XML_NS
00713 hadColon = 0;
00714 #endif
00715
00716 while (ptr != end) {
00717 switch (BYTE_TYPE(enc, ptr)) {
00718 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00719 #ifdef XML_NS
00720 case BT_COLON:
00721 if (hadColon) {
00722 *nextTokPtr = ptr;
00723 return XML_TOK_INVALID;
00724 }
00725 hadColon = 1;
00726 ptr += MINBPC(enc);
00727 if (ptr == end)
00728 return XML_TOK_PARTIAL;
00729 switch (BYTE_TYPE(enc, ptr)) {
00730 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00731 default:
00732 *nextTokPtr = ptr;
00733 return XML_TOK_INVALID;
00734 }
00735 break;
00736 #endif
00737 case BT_S: case BT_CR: case BT_LF:
00738 {
00739 ptr += MINBPC(enc);
00740 while (ptr != end) {
00741 switch (BYTE_TYPE(enc, ptr)) {
00742 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00743 case BT_GT:
00744 goto gt;
00745 case BT_SOL:
00746 goto sol;
00747 case BT_S: case BT_CR: case BT_LF:
00748 ptr += MINBPC(enc);
00749 continue;
00750 default:
00751 *nextTokPtr = ptr;
00752 return XML_TOK_INVALID;
00753 }
00754 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
00755 }
00756 return XML_TOK_PARTIAL;
00757 }
00758 case BT_GT:
00759 gt:
00760 *nextTokPtr = ptr + MINBPC(enc);
00761 return XML_TOK_START_TAG_NO_ATTS;
00762 case BT_SOL:
00763 sol:
00764 ptr += MINBPC(enc);
00765 if (ptr == end)
00766 return XML_TOK_PARTIAL;
00767 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00768 *nextTokPtr = ptr;
00769 return XML_TOK_INVALID;
00770 }
00771 *nextTokPtr = ptr + MINBPC(enc);
00772 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
00773 default:
00774 *nextTokPtr = ptr;
00775 return XML_TOK_INVALID;
00776 }
00777 }
00778 return XML_TOK_PARTIAL;
00779 }
00780
00781 static int PTRCALL
00782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
00783 const char **nextTokPtr)
00784 {
00785 if (ptr == end)
00786 return XML_TOK_NONE;
00787 if (MINBPC(enc) > 1) {
00788 size_t n = end - ptr;
00789 if (n & (MINBPC(enc) - 1)) {
00790 n &= ~(MINBPC(enc) - 1);
00791 if (n == 0)
00792 return XML_TOK_PARTIAL;
00793 end = ptr + n;
00794 }
00795 }
00796 switch (BYTE_TYPE(enc, ptr)) {
00797 case BT_LT:
00798 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00799 case BT_AMP:
00800 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00801 case BT_CR:
00802 ptr += MINBPC(enc);
00803 if (ptr == end)
00804 return XML_TOK_TRAILING_CR;
00805 if (BYTE_TYPE(enc, ptr) == BT_LF)
00806 ptr += MINBPC(enc);
00807 *nextTokPtr = ptr;
00808 return XML_TOK_DATA_NEWLINE;
00809 case BT_LF:
00810 *nextTokPtr = ptr + MINBPC(enc);
00811 return XML_TOK_DATA_NEWLINE;
00812 case BT_RSQB:
00813 ptr += MINBPC(enc);
00814 if (ptr == end)
00815 return XML_TOK_TRAILING_RSQB;
00816 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00817 break;
00818 ptr += MINBPC(enc);
00819 if (ptr == end)
00820 return XML_TOK_TRAILING_RSQB;
00821 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00822 ptr -= MINBPC(enc);
00823 break;
00824 }
00825 *nextTokPtr = ptr;
00826 return XML_TOK_INVALID;
00827 INVALID_CASES(ptr, nextTokPtr)
00828 default:
00829 ptr += MINBPC(enc);
00830 break;
00831 }
00832 while (ptr != end) {
00833 switch (BYTE_TYPE(enc, ptr)) {
00834 #define LEAD_CASE(n) \
00835 case BT_LEAD ## n: \
00836 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00837 *nextTokPtr = ptr; \
00838 return XML_TOK_DATA_CHARS; \
00839 } \
00840 ptr += n; \
00841 break;
00842 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00843 #undef LEAD_CASE
00844 case BT_RSQB:
00845 if (ptr + MINBPC(enc) != end) {
00846 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
00847 ptr += MINBPC(enc);
00848 break;
00849 }
00850 if (ptr + 2*MINBPC(enc) != end) {
00851 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
00852 ptr += MINBPC(enc);
00853 break;
00854 }
00855 *nextTokPtr = ptr + 2*MINBPC(enc);
00856 return XML_TOK_INVALID;
00857 }
00858 }
00859
00860 case BT_AMP:
00861 case BT_LT:
00862 case BT_NONXML:
00863 case BT_MALFORM:
00864 case BT_TRAIL:
00865 case BT_CR:
00866 case BT_LF:
00867 *nextTokPtr = ptr;
00868 return XML_TOK_DATA_CHARS;
00869 default:
00870 ptr += MINBPC(enc);
00871 break;
00872 }
00873 }
00874 *nextTokPtr = ptr;
00875 return XML_TOK_DATA_CHARS;
00876 }
00877
00878
00879
00880 static int PTRCALL
00881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
00882 const char **nextTokPtr)
00883 {
00884 if (ptr == end)
00885 return -XML_TOK_PERCENT;
00886 switch (BYTE_TYPE(enc, ptr)) {
00887 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00888 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
00889 *nextTokPtr = ptr;
00890 return XML_TOK_PERCENT;
00891 default:
00892 *nextTokPtr = ptr;
00893 return XML_TOK_INVALID;
00894 }
00895 while (ptr != end) {
00896 switch (BYTE_TYPE(enc, ptr)) {
00897 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00898 case BT_SEMI:
00899 *nextTokPtr = ptr + MINBPC(enc);
00900 return XML_TOK_PARAM_ENTITY_REF;
00901 default:
00902 *nextTokPtr = ptr;
00903 return XML_TOK_INVALID;
00904 }
00905 }
00906 return XML_TOK_PARTIAL;
00907 }
00908
00909 static int PTRCALL
00910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
00911 const char **nextTokPtr)
00912 {
00913 if (ptr == end)
00914 return XML_TOK_PARTIAL;
00915 switch (BYTE_TYPE(enc, ptr)) {
00916 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00917 default:
00918 *nextTokPtr = ptr;
00919 return XML_TOK_INVALID;
00920 }
00921 while (ptr != end) {
00922 switch (BYTE_TYPE(enc, ptr)) {
00923 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00924 case BT_CR: case BT_LF: case BT_S:
00925 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
00926 *nextTokPtr = ptr;
00927 return XML_TOK_POUND_NAME;
00928 default:
00929 *nextTokPtr = ptr;
00930 return XML_TOK_INVALID;
00931 }
00932 }
00933 return -XML_TOK_POUND_NAME;
00934 }
00935
00936 static int PTRCALL
00937 PREFIX(scanLit)(int open, const ENCODING *enc,
00938 const char *ptr, const char *end,
00939 const char **nextTokPtr)
00940 {
00941 while (ptr != end) {
00942 int t = BYTE_TYPE(enc, ptr);
00943 switch (t) {
00944 INVALID_CASES(ptr, nextTokPtr)
00945 case BT_QUOT:
00946 case BT_APOS:
00947 ptr += MINBPC(enc);
00948 if (t != open)
00949 break;
00950 if (ptr == end)
00951 return -XML_TOK_LITERAL;
00952 *nextTokPtr = ptr;
00953 switch (BYTE_TYPE(enc, ptr)) {
00954 case BT_S: case BT_CR: case BT_LF:
00955 case BT_GT: case BT_PERCNT: case BT_LSQB:
00956 return XML_TOK_LITERAL;
00957 default:
00958 return XML_TOK_INVALID;
00959 }
00960 default:
00961 ptr += MINBPC(enc);
00962 break;
00963 }
00964 }
00965 return XML_TOK_PARTIAL;
00966 }
00967
00968 static int PTRCALL
00969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
00970 const char **nextTokPtr)
00971 {
00972 int tok;
00973 if (ptr == end)
00974 return XML_TOK_NONE;
00975 if (MINBPC(enc) > 1) {
00976 size_t n = end - ptr;
00977 if (n & (MINBPC(enc) - 1)) {
00978 n &= ~(MINBPC(enc) - 1);
00979 if (n == 0)
00980 return XML_TOK_PARTIAL;
00981 end = ptr + n;
00982 }
00983 }
00984 switch (BYTE_TYPE(enc, ptr)) {
00985 case BT_QUOT:
00986 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
00987 case BT_APOS:
00988 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
00989 case BT_LT:
00990 {
00991 ptr += MINBPC(enc);
00992 if (ptr == end)
00993 return XML_TOK_PARTIAL;
00994 switch (BYTE_TYPE(enc, ptr)) {
00995 case BT_EXCL:
00996 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00997 case BT_QUEST:
00998 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00999 case BT_NMSTRT:
01000 case BT_HEX:
01001 case BT_NONASCII:
01002 case BT_LEAD2:
01003 case BT_LEAD3:
01004 case BT_LEAD4:
01005 *nextTokPtr = ptr - MINBPC(enc);
01006 return XML_TOK_INSTANCE_START;
01007 }
01008 *nextTokPtr = ptr;
01009 return XML_TOK_INVALID;
01010 }
01011 case BT_CR:
01012 if (ptr + MINBPC(enc) == end) {
01013 *nextTokPtr = end;
01014
01015 return -XML_TOK_PROLOG_S;
01016 }
01017
01018 case BT_S: case BT_LF:
01019 for (;;) {
01020 ptr += MINBPC(enc);
01021 if (ptr == end)
01022 break;
01023 switch (BYTE_TYPE(enc, ptr)) {
01024 case BT_S: case BT_LF:
01025 break;
01026 case BT_CR:
01027
01028 if (ptr + MINBPC(enc) != end)
01029 break;
01030
01031 default:
01032 *nextTokPtr = ptr;
01033 return XML_TOK_PROLOG_S;
01034 }
01035 }
01036 *nextTokPtr = ptr;
01037 return XML_TOK_PROLOG_S;
01038 case BT_PERCNT:
01039 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01040 case BT_COMMA:
01041 *nextTokPtr = ptr + MINBPC(enc);
01042 return XML_TOK_COMMA;
01043 case BT_LSQB:
01044 *nextTokPtr = ptr + MINBPC(enc);
01045 return XML_TOK_OPEN_BRACKET;
01046 case BT_RSQB:
01047 ptr += MINBPC(enc);
01048 if (ptr == end)
01049 return -XML_TOK_CLOSE_BRACKET;
01050 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
01051 if (ptr + MINBPC(enc) == end)
01052 return XML_TOK_PARTIAL;
01053 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
01054 *nextTokPtr = ptr + 2*MINBPC(enc);
01055 return XML_TOK_COND_SECT_CLOSE;
01056 }
01057 }
01058 *nextTokPtr = ptr;
01059 return XML_TOK_CLOSE_BRACKET;
01060 case BT_LPAR:
01061 *nextTokPtr = ptr + MINBPC(enc);
01062 return XML_TOK_OPEN_PAREN;
01063 case BT_RPAR:
01064 ptr += MINBPC(enc);
01065 if (ptr == end)
01066 return -XML_TOK_CLOSE_PAREN;
01067 switch (BYTE_TYPE(enc, ptr)) {
01068 case BT_AST:
01069 *nextTokPtr = ptr + MINBPC(enc);
01070 return XML_TOK_CLOSE_PAREN_ASTERISK;
01071 case BT_QUEST:
01072 *nextTokPtr = ptr + MINBPC(enc);
01073 return XML_TOK_CLOSE_PAREN_QUESTION;
01074 case BT_PLUS:
01075 *nextTokPtr = ptr + MINBPC(enc);
01076 return XML_TOK_CLOSE_PAREN_PLUS;
01077 case BT_CR: case BT_LF: case BT_S:
01078 case BT_GT: case BT_COMMA: case BT_VERBAR:
01079 case BT_RPAR:
01080 *nextTokPtr = ptr;
01081 return XML_TOK_CLOSE_PAREN;
01082 }
01083 *nextTokPtr = ptr;
01084 return XML_TOK_INVALID;
01085 case BT_VERBAR:
01086 *nextTokPtr = ptr + MINBPC(enc);
01087 return XML_TOK_OR;
01088 case BT_GT:
01089 *nextTokPtr = ptr + MINBPC(enc);
01090 return XML_TOK_DECL_CLOSE;
01091 case BT_NUM:
01092 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01093 #define LEAD_CASE(n) \
01094 case BT_LEAD ## n: \
01095 if (end - ptr < n) \
01096 return XML_TOK_PARTIAL_CHAR; \
01097 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
01098 ptr += n; \
01099 tok = XML_TOK_NAME; \
01100 break; \
01101 } \
01102 if (IS_NAME_CHAR(enc, ptr, n)) { \
01103 ptr += n; \
01104 tok = XML_TOK_NMTOKEN; \
01105 break; \
01106 } \
01107 *nextTokPtr = ptr; \
01108 return XML_TOK_INVALID;
01109 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01110 #undef LEAD_CASE
01111 case BT_NMSTRT:
01112 case BT_HEX:
01113 tok = XML_TOK_NAME;
01114 ptr += MINBPC(enc);
01115 break;
01116 case BT_DIGIT:
01117 case BT_NAME:
01118 case BT_MINUS:
01119 #ifdef XML_NS
01120 case BT_COLON:
01121 #endif
01122 tok = XML_TOK_NMTOKEN;
01123 ptr += MINBPC(enc);
01124 break;
01125 case BT_NONASCII:
01126 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
01127 ptr += MINBPC(enc);
01128 tok = XML_TOK_NAME;
01129 break;
01130 }
01131 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
01132 ptr += MINBPC(enc);
01133 tok = XML_TOK_NMTOKEN;
01134 break;
01135 }
01136
01137 default:
01138 *nextTokPtr = ptr;
01139 return XML_TOK_INVALID;
01140 }
01141 while (ptr != end) {
01142 switch (BYTE_TYPE(enc, ptr)) {
01143 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01144 case BT_GT: case BT_RPAR: case BT_COMMA:
01145 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
01146 case BT_S: case BT_CR: case BT_LF:
01147 *nextTokPtr = ptr;
01148 return tok;
01149 #ifdef XML_NS
01150 case BT_COLON:
01151 ptr += MINBPC(enc);
01152 switch (tok) {
01153 case XML_TOK_NAME:
01154 if (ptr == end)
01155 return XML_TOK_PARTIAL;
01156 tok = XML_TOK_PREFIXED_NAME;
01157 switch (BYTE_TYPE(enc, ptr)) {
01158 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01159 default:
01160 tok = XML_TOK_NMTOKEN;
01161 break;
01162 }
01163 break;
01164 case XML_TOK_PREFIXED_NAME:
01165 tok = XML_TOK_NMTOKEN;
01166 break;
01167 }
01168 break;
01169 #endif
01170 case BT_PLUS:
01171 if (tok == XML_TOK_NMTOKEN) {
01172 *nextTokPtr = ptr;
01173 return XML_TOK_INVALID;
01174 }
01175 *nextTokPtr = ptr + MINBPC(enc);
01176 return XML_TOK_NAME_PLUS;
01177 case BT_AST:
01178 if (tok == XML_TOK_NMTOKEN) {
01179 *nextTokPtr = ptr;
01180 return XML_TOK_INVALID;
01181 }
01182 *nextTokPtr = ptr + MINBPC(enc);
01183 return XML_TOK_NAME_ASTERISK;
01184 case BT_QUEST:
01185 if (tok == XML_TOK_NMTOKEN) {
01186 *nextTokPtr = ptr;
01187 return XML_TOK_INVALID;
01188 }
01189 *nextTokPtr = ptr + MINBPC(enc);