#include <stddef.h>#include "expat_external.h"#include "internal.h"#include "xmltok.h"#include "nametab.h"#include "xmltok_impl.h"#include "ascii.h"#include "xmltok_impl.c"#include "asciitab.h"#include "utf8tab.h"#include "iasciitab.h"#include "latin1tab.h"#include "xmltok_ns.c"Go to the source code of this file.
Classes | |
| struct | normal_encoding |
| struct | unknown_encoding |
Defines | |
| #define | IGNORE_SECTION_TOK_VTABLE |
| #define | VTABLE1 |
| #define | VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) |
| #define | UCS2_GET_NAMING(pages, hi, lo) (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) |
| #define | UTF8_GET_NAMING2(pages, byte) |
| #define | UTF8_GET_NAMING3(pages, byte) |
| #define | UTF8_GET_NAMING(pages, p, n) |
| #define | UTF8_INVALID2(p) ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) |
| #define | UTF8_INVALID3(p) |
| #define | UTF8_INVALID4(p) |
| #define | utf8_isName4 isNever |
| #define | utf8_isNmstrt4 isNever |
| #define | AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *) (enc)) |
| #define | STANDARD_VTABLE(E) |
| #define | NORMAL_VTABLE(E) |
| #define | MINBPC(enc) 1 |
| #define | SB_BYTE_TYPE(enc, p) (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) |
| #define | BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) |
| #define | BYTE_TO_ASCII(enc, p) (*(p)) |
| #define | IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p)) |
| #define | IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p)) |
| #define | IS_INVALID_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p)) |
| #define | IS_NAME_CHAR_MINBPC(enc, p) (0) |
| #define | IS_NMSTRT_CHAR_MINBPC(enc, p) (0) |
| #define | CHAR_MATCHES(enc, p, c) (*(p) == c) |
| #define | PREFIX(ident) normal_ ## ident |
| #define | BT_COLON BT_NMSTRT |
| #define | DEFINE_UTF16_TO_UTF8(E) |
| #define | DEFINE_UTF16_TO_UTF16(E) |
| #define | SET2(ptr, ch) (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) |
| #define | GET_LO(ptr) ((unsigned char)(ptr)[0]) |
| #define | GET_HI(ptr) ((unsigned char)(ptr)[1]) |
| #define | LITTLE2_BYTE_TYPE(enc, p) |
| #define | LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) |
| #define | LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) |
| #define | LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) |
| #define | LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) |
| #define | BIG2_BYTE_TYPE(enc, p) |
| #define | BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) |
| #define | BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) |
| #define | BIG2_IS_NAME_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) |
| #define | BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) |
| #define | AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) |
| #define | INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) |
| #define | SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) |
| #define | NS(x) x |
Enumerations | |
| enum | { UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 } |
| enum | { UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, NO_ENC } |
Functions | |
| static int PTRFASTCALL | isNever (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isName2 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isName3 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isNmstrt2 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isNmstrt3 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isInvalid2 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isInvalid3 (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | utf8_isInvalid4 (const ENCODING *enc, const char *p) |
| static int FASTCALL | checkCharRefNumber (int) |
| static void PTRCALL | utf8_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
| static void PTRCALL | utf8_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
| static void PTRCALL | latin1_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
| static void PTRCALL | latin1_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
| static void PTRCALL | ascii_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
| static int PTRFASTCALL | unicode_byte_type (char hi, char lo) |
| static int FASTCALL | streqci (const char *s1, const char *s2) |
| static void PTRCALL | initUpdatePosition (const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) |
| static int | toAscii (const ENCODING *enc, const char *ptr, const char *end) |
| static int FASTCALL | isSpace (int c) |
| static int | parsePseudoAttribute (const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **nameEndPtr, const char **valPtr, const char **nextTokPtr) |
| static int | doParseXmlDecl (const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone) |
| int FASTCALL | XmlUtf8Encode (int c, char *buf) |
| int FASTCALL | XmlUtf16Encode (int charNum, unsigned short *buf) |
| int | XmlSizeOfUnknownEncoding (void) |
| static int PTRFASTCALL | unknown_isName (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | unknown_isNmstrt (const ENCODING *enc, const char *p) |
| static int PTRFASTCALL | unknown_isInvalid (const ENCODING *enc, const char *p) |
| static void PTRCALL | unknown_toUtf8 (const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) |
| static void PTRCALL | unknown_toUtf16 (const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) |
| ENCODING * | XmlInitUnknownEncoding (void *mem, int *table, CONVERTER convert, void *userData) |
| static int FASTCALL | getEncodingIndex (const char *name) |
| static int | initScan (const ENCODING **encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr) |
Variables | |
| static const struct normal_encoding | utf8_encoding |
| static const struct normal_encoding | internal_utf8_encoding |
| static const struct normal_encoding | latin1_encoding |
| static const struct normal_encoding | ascii_encoding |
| static const struct normal_encoding | little2_encoding |
| static const struct normal_encoding | internal_little2_encoding |
| static const struct normal_encoding | big2_encoding |
| static const struct normal_encoding | internal_big2_encoding |
| static const char | KW_version [] |
| static const char | KW_encoding [] |
| static const char | KW_standalone [] |
| static const char | KW_yes [] |
| static const char | KW_no [] |
| static const char | KW_ISO_8859_1 [] |
| static const char | KW_US_ASCII [] |
| static const char | KW_UTF_8 [] |
| static const char | KW_UTF_16 [] |
| static const char | KW_UTF_16BE [] |
| static const char | KW_UTF_16LE [] |
|
|
|
|
|
|
|
|
|
|
|
Value: ((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1])) Definition at line 921 of file xmltok.c. Referenced by unicode_byte_type(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: static void PTRCALL \ E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ } |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Definition at line 1624 of file xmltok.c. Referenced by XmlInitUnknownEncoding(). |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: ((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Value: E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Definition at line 206 of file xmltok.c. Referenced by ascii_toUtf8(). |
|
|
|
|
|
Value: ((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) |
|
|
Value: (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F))) Definition at line 51 of file xmltok.c. Referenced by utf8_isName2(), and utf8_isNmstrt2(). |
|
|
Value: (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) Definition at line 62 of file xmltok.c. Referenced by utf8_isName3(), and utf8_isNmstrt3(). |
|
|
Definition at line 87 of file xmltok.c. Referenced by utf8_isInvalid2(). |
|
|
Value: (((p)[2] & 0x80) == 0 \ || \ ((*p) == 0xEF && (p)[1] == 0xBF \ ? \ (p)[2] > 0xBD \ : \ ((p)[2] & 0xC0) == 0xC0) \ || \ ((*p) == 0xE0 \ ? \ (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ : \ ((p)[1] & 0x80) == 0 \ || \ ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) Definition at line 90 of file xmltok.c. Referenced by utf8_isInvalid3(). |
|
|
Value: (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \ || \ ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \ || \ ((*p) == 0xF0 \ ? \ (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ : \ ((p)[1] & 0x80) == 0 \ || \ ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) Definition at line 107 of file xmltok.c. Referenced by utf8_isInvalid4(). |
|
|
|
|
|
|
|
|
Definition at line 42 of file xmltok.c. Referenced by ascii_toUtf8(). |
|
|
Value: { PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
PREFIX(sameName), \
PREFIX(nameMatchesAscii), \
PREFIX(nameLength), \
PREFIX(skipS), \
PREFIX(getAtts), \
PREFIX(charRefNumber), \
PREFIX(predefinedEntityName), \
PREFIX(updatePosition), \
PREFIX(isPublicId)
|
|
|
Definition at line 308 of file xmltok.c. 00308 { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 00309 UTF8_cval1 = 0x00, 00310 UTF8_cval2 = 0xc0, 00311 UTF8_cval3 = 0xe0, 00312 UTF8_cval4 = 0xf0 00313 };
|
|
|
Definition at line 1565 of file xmltok.c. 01567 { 01568 *nextTokPtr = ptr + 3; 01569 *encPtr = encodingTable[UTF_8_ENC]; 01570 return XML_TOK_BOM; 01571 } 01572 break; 01573 default: 01574 if (ptr[0] == '\0') { 01575 /* 0 isn't a legal data character. Furthermore a document
|
|
||||||||||||||||||||||||
|
Definition at line 706 of file xmltok.c. References STANDARD_VTABLE, and VTABLE. 00709 { 00710 { VTABLE, 2, 0, 00711 #if BYTEORDER == 1234 00712 1
|
|
|
Definition at line 1313 of file xmltok.c. 01319 { 01320 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 01321 while (*fromP != fromLim && *toP != toLim) { 01322 unsigned short c = uenc->utf16[(unsigned char)**fromP]; 01323 if (c == 0) { 01324 c = (unsigned short) 01325 uenc->convert(uenc->userData, *fromP); 01326 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 01327 - (BT_LEAD2 - 2)); 01328 } 01329 else
|
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
Definition at line 1221 of file xmltok.c. 01221 { 01222 if (charNum < 0) 01223 return 0; 01224 if (charNum < 0x10000) { 01225 buf[0] = (unsigned short)charNum; 01226 return 1; 01227 } 01228 if (charNum < 0x110000) { 01229 charNum -= 0x10000; 01230 buf[0] = (unsigned short)((charNum >> 10) + 0xD800); 01231 buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); 01232 return 2; 01233 } 01234 return 0; 01235 } 01236 01237 struct unknown_encoding { 01238 struct normal_encoding normal; 01239 CONVERTER convert; 01240 void *userData; 01241 unsigned short utf16[256]; 01242 char utf8[256][4]; 01243 }; 01244 01245 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *) (enc)) 01246 01247 int 01248 XmlSizeOfUnknownEncoding(void) 01249 { 01250 return sizeof(struct unknown_encoding); 01251 } 01252 01253 static int PTRFASTCALL 01254 unknown_isName(const ENCODING *enc, const char *p) 01255 { 01256 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 01257 int c = uenc->convert(uenc->userData, p); 01258 if (c & ~0xFFFF) 01259 return 0; 01260 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 01261 } 01262 01263 static int PTRFASTCALL 01264 unknown_isNmstrt(const ENCODING *enc, const char *p) 01265 { 01266 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 01267 int c = uenc->convert(uenc->userData, p); 01268 if (c & ~0xFFFF) 01269 return 0; 01270 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 01271 } 01272 01273 static int PTRFASTCALL 01274 unknown_isInvalid(const ENCODING *enc, const char *p) 01275 { 01276 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 01277 int c = uenc->convert(uenc->userData, p); 01278 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 01279 } 01280 01281 static void PTRCALL 01282 unknown_toUtf8(const ENCODING *enc, 01283 const char **fromP, const char *fromLim, 01284 char **toP, const char *toLim) 01285 { 01286 const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 01287 char buf[XML_UTF8_ENCODE_MAX]; 01288 for (;;) { 01289 const char *utf8; 01290 int n; 01291 if (*fromP == fromLim) 01292 break; 01293 utf8 = uenc->utf8[(unsigned char)**fromP]; 01294 n = *utf8++; 01295 if (n == 0) { 01296 int c = uenc->convert(uenc->userData, *fromP); 01297 n = XmlUtf8Encode(c, buf); 01298 if (n > toLim - *toP) 01299 break; 01300 utf8 = buf; 01301 *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 01302 - (BT_LEAD2 - 2)); 01303 } 01304 else { 01305 if (n > toLim - *toP) 01306 break; 01307 (*fromP)++; 01308 } 01309 do { 01310 *(*toP)++ = *utf8++;
|
|
|
Definition at line 1601 of file xmltok.c.
|
|
||||||||||||||||||||||||||||
|
|
|
||||||||||||||||||||
|
Definition at line 1080 of file xmltok.c.
|
|
||||||||||||
|
Definition at line 121 of file xmltok.c.
|
|
|
Definition at line 1099 of file xmltok.c. 01102 { 01103 if (versionPtr) 01104 *versionPtr = val; 01105 if (versionEndPtr) 01106 *versionEndPtr = ptr; 01107 if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 01108 *badPtr = ptr; 01109 return 0;
|
|
||||||||||||||||||||||||
|
Definition at line 599 of file xmltok.c. 00600 { \ 00601 /* Avoid copying first half only of surrogate */ \ 00602 if (fromLim - *fromP > ((toLim - *toP) << 1) \ 00603 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ 00604 fromLim -= 2; \ 00605 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
|
|
||||||||||||||||||||||||
|
|