00001
00002
00003
00004
00005 #include "Python.h"
00006 #include <ctype.h>
00007
00008 #include "compile.h"
00009 #include "frameobject.h"
00010 #include "expat.h"
00011
00012 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
00013
00014 #ifndef PyDoc_STRVAR
00015
00016
00017
00018
00019
00020
00021
00022
00023 #define PyDoc_STR(str) str
00024 #define PyDoc_VAR(name) static char name[]
00025 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
00026 #endif
00027
00028 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
00029
00030 #define Py_USING_UNICODE
00031 #else
00032 #define FIX_TRACE
00033 #endif
00034
00035 enum HandlerTypes {
00036 StartElement,
00037 EndElement,
00038 ProcessingInstruction,
00039 CharacterData,
00040 UnparsedEntityDecl,
00041 NotationDecl,
00042 StartNamespaceDecl,
00043 EndNamespaceDecl,
00044 Comment,
00045 StartCdataSection,
00046 EndCdataSection,
00047 Default,
00048 DefaultHandlerExpand,
00049 NotStandalone,
00050 ExternalEntityRef,
00051 StartDoctypeDecl,
00052 EndDoctypeDecl,
00053 EntityDecl,
00054 XmlDecl,
00055 ElementDecl,
00056 AttlistDecl,
00057 #if XML_COMBINED_VERSION >= 19504
00058 SkippedEntity,
00059 #endif
00060 _DummyDecl
00061 };
00062
00063 static PyObject *ErrorObject;
00064
00065
00066
00067
00068
00069 typedef struct {
00070 PyObject_HEAD
00071
00072 XML_Parser itself;
00073 int returns_unicode;
00074
00075 int ordered_attributes;
00076 int specified_attributes;
00077 int in_callback;
00078 int ns_prefixes;
00079 XML_Char *buffer;
00080
00081 int buffer_size;
00082 int buffer_used;
00083 PyObject *intern;
00084 PyObject **handlers;
00085 } xmlparseobject;
00086
00087 #define CHARACTER_DATA_BUFFER_SIZE 8192
00088
00089 static PyTypeObject Xmlparsetype;
00090
00091 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
00092 typedef void* xmlhandler;
00093
00094 struct HandlerInfo {
00095 const char *name;
00096 xmlhandlersetter setter;
00097 xmlhandler handler;
00098 PyCodeObject *tb_code;
00099 PyObject *nameobj;
00100 };
00101
00102 static struct HandlerInfo handler_info[64];
00103
00104
00105
00106
00107 static int
00108 set_error_attr(PyObject *err, char *name, int value)
00109 {
00110 PyObject *v = PyInt_FromLong(value);
00111
00112 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
00113 Py_DECREF(v);
00114 return 0;
00115 }
00116 Py_DECREF(v);
00117 return 1;
00118 }
00119
00120
00121
00122
00123 static PyObject *
00124 set_error(xmlparseobject *self, enum XML_Error code)
00125 {
00126 PyObject *err;
00127 char buffer[256];
00128 XML_Parser parser = self->itself;
00129 int lineno = XML_GetErrorLineNumber(parser);
00130 int column = XML_GetErrorColumnNumber(parser);
00131
00132
00133
00134 sprintf(buffer, "%.200s: line %i, column %i",
00135 XML_ErrorString(code), lineno, column);
00136 err = PyObject_CallFunction(ErrorObject, "s", buffer);
00137 if ( err != NULL
00138 && set_error_attr(err, "code", code)
00139 && set_error_attr(err, "offset", column)
00140 && set_error_attr(err, "lineno", lineno)) {
00141 PyErr_SetObject(ErrorObject, err);
00142 }
00143 Py_DECREF(err);
00144 return NULL;
00145 }
00146
00147 static int
00148 have_handler(xmlparseobject *self, int type)
00149 {
00150 PyObject *handler = self->handlers[type];
00151 return handler != NULL;
00152 }
00153
00154 static PyObject *
00155 get_handler_name(struct HandlerInfo *hinfo)
00156 {
00157 PyObject *name = hinfo->nameobj;
00158 if (name == NULL) {
00159 name = PyString_FromString(hinfo->name);
00160 hinfo->nameobj = name;
00161 }
00162 Py_XINCREF(name);
00163 return name;
00164 }
00165
00166
00167 #ifdef Py_USING_UNICODE
00168
00169
00170
00171 static PyObject *
00172 conv_string_to_unicode(const XML_Char *str)
00173 {
00174
00175
00176
00177 if (str == NULL) {
00178 Py_INCREF(Py_None);
00179 return Py_None;
00180 }
00181 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
00182 }
00183
00184 static PyObject *
00185 conv_string_len_to_unicode(const XML_Char *str, int len)
00186 {
00187
00188
00189
00190 if (str == NULL) {
00191 Py_INCREF(Py_None);
00192 return Py_None;
00193 }
00194 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
00195 }
00196 #endif
00197
00198
00199
00200
00201 static PyObject *
00202 conv_string_to_utf8(const XML_Char *str)
00203 {
00204
00205
00206
00207 if (str == NULL) {
00208 Py_INCREF(Py_None);
00209 return Py_None;
00210 }
00211 return PyString_FromString(str);
00212 }
00213
00214 static PyObject *
00215 conv_string_len_to_utf8(const XML_Char *str, int len)
00216 {
00217
00218
00219
00220 if (str == NULL) {
00221 Py_INCREF(Py_None);
00222 return Py_None;
00223 }
00224 return PyString_FromStringAndSize((const char *)str, len);
00225 }
00226
00227
00228
00229 static void clear_handlers(xmlparseobject *self, int initial);
00230
00231
00232
00233
00234 static int
00235 error_external_entity_ref_handler(XML_Parser parser,
00236 const XML_Char *context,
00237 const XML_Char *base,
00238 const XML_Char *systemId,
00239 const XML_Char *publicId)
00240 {
00241 return 0;
00242 }
00243
00244 static void
00245 flag_error(xmlparseobject *self)
00246 {
00247 clear_handlers(self, 0);
00248 XML_SetExternalEntityRefHandler(self->itself,
00249 error_external_entity_ref_handler);
00250 }
00251
00252 static PyCodeObject*
00253 getcode(enum HandlerTypes slot, char* func_name, int lineno)
00254 {
00255 PyObject *code = NULL;
00256 PyObject *name = NULL;
00257 PyObject *nulltuple = NULL;
00258 PyObject *filename = NULL;
00259
00260 if (handler_info[slot].tb_code == NULL) {
00261 code = PyString_FromString("");
00262 if (code == NULL)
00263 goto failed;
00264 name = PyString_FromString(func_name);
00265 if (name == NULL)
00266 goto failed;
00267 nulltuple = PyTuple_New(0);
00268 if (nulltuple == NULL)
00269 goto failed;
00270 filename = PyString_FromString(__FILE__);
00271 handler_info[slot].tb_code =
00272 PyCode_New(0,
00273 0,
00274 0,
00275 0,
00276 code,
00277 nulltuple,
00278 nulltuple,
00279 nulltuple,
00280 #if PYTHON_API_VERSION >= 1010
00281 nulltuple,
00282 nulltuple,
00283 #endif
00284 filename,
00285 name,
00286 lineno,
00287 code
00288 );
00289 if (handler_info[slot].tb_code == NULL)
00290 goto failed;
00291 Py_DECREF(code);
00292 Py_DECREF(nulltuple);
00293 Py_DECREF(filename);
00294 Py_DECREF(name);
00295 }
00296 return handler_info[slot].tb_code;
00297 failed:
00298 Py_XDECREF(code);
00299 Py_XDECREF(name);
00300 return NULL;
00301 }
00302
00303 #ifdef FIX_TRACE
00304 static int
00305 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
00306 {
00307 int result = 0;
00308 if (!tstate->use_tracing || tstate->tracing)
00309 return 0;
00310 if (tstate->c_profilefunc != NULL) {
00311 tstate->tracing++;
00312 result = tstate->c_profilefunc(tstate->c_profileobj,
00313 f, code , val);
00314 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
00315 || (tstate->c_profilefunc != NULL));
00316 tstate->tracing--;
00317 if (result)
00318 return result;
00319 }
00320 if (tstate->c_tracefunc != NULL) {
00321 tstate->tracing++;
00322 result = tstate->c_tracefunc(tstate->c_traceobj,
00323 f, code , val);
00324 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
00325 || (tstate->c_profilefunc != NULL));
00326 tstate->tracing--;
00327 }
00328 return result;
00329 }
00330
00331 static int
00332 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
00333 {
00334 PyObject *type, *value, *traceback, *arg;
00335 int err;
00336
00337 if (tstate->c_tracefunc == NULL)
00338 return 0;
00339
00340 PyErr_Fetch(&type, &value, &traceback);
00341 if (value == NULL) {
00342 value = Py_None;
00343 Py_INCREF(value);
00344 }
00345 #if PY_VERSION_HEX < 0x02040000
00346 arg = Py_BuildValue("(OOO)", type, value, traceback);
00347 #else
00348 arg = PyTuple_Pack(3, type, value, traceback);
00349 #endif
00350 if (arg == NULL) {
00351 PyErr_Restore(type, value, traceback);
00352 return 0;
00353 }
00354 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
00355 Py_DECREF(arg);
00356 if (err == 0)
00357 PyErr_Restore(type, value, traceback);
00358 else {
00359 Py_XDECREF(type);
00360 Py_XDECREF(value);
00361 Py_XDECREF(traceback);
00362 }
00363 return err;
00364 }
00365 #endif
00366
00367 static PyObject*
00368 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
00369 xmlparseobject *self)
00370 {
00371 PyThreadState *tstate = PyThreadState_GET();
00372 PyFrameObject *f;
00373 PyObject *res;
00374
00375 if (c == NULL)
00376 return NULL;
00377
00378 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
00379 if (f == NULL)
00380 return NULL;
00381 tstate->frame = f;
00382 #ifdef FIX_TRACE
00383 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
00384 return NULL;
00385 }
00386 #endif
00387 res = PyEval_CallObject(func, args);
00388 if (res == NULL) {
00389 if (tstate->curexc_traceback == NULL)
00390 PyTraceBack_Here(f);
00391 XML_StopParser(self->itself, XML_FALSE);
00392 #ifdef FIX_TRACE
00393 if (trace_frame_exc(tstate, f) < 0) {
00394 return NULL;
00395 }
00396 }
00397 else {
00398 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
00399 Py_XDECREF(res);
00400 res = NULL;
00401 }
00402 }
00403 #else
00404 }
00405 #endif
00406 tstate->frame = f->f_back;
00407 Py_DECREF(f);
00408 return res;
00409 }
00410
00411 #ifndef Py_USING_UNICODE
00412 #define STRING_CONV_FUNC conv_string_to_utf8
00413 #else
00414
00415 #define STRING_CONV_FUNC (self->returns_unicode \
00416 ? conv_string_to_unicode : conv_string_to_utf8)
00417 #endif
00418
00419 static PyObject*
00420 string_intern(xmlparseobject *self, const char* str)
00421 {
00422 PyObject *result = STRING_CONV_FUNC(str);
00423 PyObject *value;
00424 if (!self->intern)
00425 return result;
00426 value = PyDict_GetItem(self->intern, result);
00427 if (!value) {
00428 if (PyDict_SetItem(self->intern, result, result) == 0)
00429 return result;
00430 else
00431 return NULL;
00432 }
00433 Py_INCREF(value);
00434 Py_DECREF(result);
00435 return value;
00436 }
00437
00438
00439
00440
00441 static int
00442 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
00443 {
00444 PyObject *args;
00445 PyObject *temp;
00446
00447 args = PyTuple_New(1);
00448 if (args == NULL)
00449 return -1;
00450 #ifdef Py_USING_UNICODE
00451 temp = (self->returns_unicode
00452 ? conv_string_len_to_unicode(buffer, len)
00453 : conv_string_len_to_utf8(buffer, len));
00454 #else
00455 temp = conv_string_len_to_utf8(buffer, len);
00456 #endif
00457 if (temp == NULL) {
00458 Py_DECREF(args);
00459 flag_error(self);
00460 return -1;
00461 }
00462 PyTuple_SET_ITEM(args, 0, temp);
00463
00464 self->in_callback = 1;
00465 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
00466 self->handlers[CharacterData], args, self);
00467
00468 self->in_callback = 0;
00469 Py_DECREF(args);
00470 if (temp == NULL) {
00471 flag_error(self);
00472 return -1;
00473 }
00474 Py_DECREF(temp);
00475 return 0;
00476 }
00477
00478 static int
00479 flush_character_buffer(xmlparseobject *self)
00480 {
00481 int rc;
00482 if (self->buffer == NULL || self->buffer_used == 0)
00483 return 0;
00484 rc = call_character_handler(self, self->buffer, self->buffer_used);
00485 self->buffer_used = 0;
00486 return rc;
00487 }
00488
00489 static void
00490 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
00491 {
00492 xmlparseobject *self = (xmlparseobject *) userData;
00493 if (self->buffer == NULL)
00494 call_character_handler(self, data, len);
00495 else {
00496 if ((self->buffer_used + len) > self->buffer_size) {
00497 if (flush_character_buffer(self) < 0)
00498 return;
00499
00500
00501
00502 if (!have_handler(self, CharacterData))
00503 return;
00504 }
00505 if (len > self->buffer_size) {
00506 call_character_handler(self, data, len);
00507 self->buffer_used = 0;
00508 }
00509 else {
00510 memcpy(self->buffer + self->buffer_used,
00511 data, len * sizeof(XML_Char));
00512 self->buffer_used += len;
00513 }
00514 }
00515 }
00516
00517 static void
00518 my_StartElementHandler(void *userData,
00519 const XML_Char *name, const XML_Char *atts[])
00520 {
00521 xmlparseobject *self = (xmlparseobject *)userData;
00522
00523 if (have_handler(self, StartElement)) {
00524 PyObject *container, *rv, *args;
00525 int i, max;
00526
00527 if (flush_character_buffer(self) < 0)
00528 return;
00529
00530
00531
00532 if (self->specified_attributes) {
00533 max = XML_GetSpecifiedAttributeCount(self->itself);
00534 }
00535 else {
00536 max = 0;
00537 while (atts[max] != NULL)
00538 max += 2;
00539 }
00540
00541 if (self->ordered_attributes)
00542 container = PyList_New(max);
00543 else
00544 container = PyDict_New();
00545 if (container == NULL) {
00546 flag_error(self);
00547 return;
00548 }
00549 for (i = 0; i < max; i += 2) {
00550 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
00551 PyObject *v;
00552 if (n == NULL) {
00553 flag_error(self);
00554 Py_DECREF(container);
00555 return;
00556 }
00557 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
00558 if (v == NULL) {
00559 flag_error(self);
00560 Py_DECREF(container);
00561 Py_DECREF(n);
00562 return;
00563 }
00564 if (self->ordered_attributes) {
00565 PyList_SET_ITEM(container, i, n);
00566 PyList_SET_ITEM(container, i+1, v);
00567 }
00568 else if (PyDict_SetItem(container, n, v)) {
00569 flag_error(self);
00570 Py_DECREF(n);
00571 Py_DECREF(v);
00572 return;
00573 }
00574 else {
00575 Py_DECREF(n);
00576 Py_DECREF(v);
00577 }
00578 }
00579 args = Py_BuildValue("(NN)", string_intern(self, name), container);
00580 if (args == NULL) {
00581 Py_DECREF(container);
00582 return;
00583 }
00584
00585 self->in_callback = 1;
00586 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
00587 self->handlers[StartElement], args, self);
00588 self->in_callback = 0;
00589 Py_DECREF(args);
00590 if (rv == NULL) {
00591 flag_error(self);
00592 return;
00593 }
00594 Py_DECREF(rv);
00595 }
00596 }
00597
00598 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
00599 RETURN, GETUSERDATA) \
00600 static RC \
00601 my_##NAME##Handler PARAMS {\
00602 xmlparseobject *self = GETUSERDATA ; \
00603 PyObject *args = NULL; \
00604 PyObject *rv = NULL; \
00605 INIT \
00606 \
00607 if (have_handler(self, NAME)) { \
00608 if (flush_character_buffer(self) < 0) \
00609 return RETURN; \
00610 args = Py_BuildValue PARAM_FORMAT ;\
00611 if (!args) { flag_error(self); return RETURN;} \
00612 self->in_callback = 1; \
00613 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
00614 self->handlers[NAME], args, self); \
00615 self->in_callback = 0; \
00616 Py_DECREF(args); \
00617 if (rv == NULL) { \
00618 flag_error(self); \
00619 return RETURN; \
00620 } \
00621 CONVERSION \
00622 Py_DECREF(rv); \
00623 } \
00624 return RETURN; \
00625 }
00626
00627 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
00628 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
00629 (xmlparseobject *)userData)
00630
00631 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
00632 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
00633 rc = PyInt_AsLong(rv);, rc, \
00634 (xmlparseobject *)userData)
00635
00636 VOID_HANDLER(EndElement,
00637 (void *userData, const XML_Char *name),
00638 ("(N)", string_intern(self, name)))
00639
00640 VOID_HANDLER(ProcessingInstruction,
00641 (void *userData,
00642 const XML_Char *target,
00643 const XML_Char *data),
00644 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
00645
00646 VOID_HANDLER(UnparsedEntityDecl,
00647 (void *userData,
00648 const XML_Char *entityName,
00649 const XML_Char *base,
00650 const XML_Char *systemId,
00651 const XML_Char *publicId,
00652 const XML_Char *notationName),
00653 ("(NNNNN)",
00654 string_intern(self, entityName), string_intern(self, base),
00655 string_intern(self, systemId), string_intern(self, publicId),
00656 string_intern(self, notationName)))
00657
00658 #ifndef Py_USING_UNICODE
00659 VOID_HANDLER(EntityDecl,
00660 (void *userData,
00661 const XML_Char *entityName,
00662 int is_parameter_entity,
00663 const XML_Char *value,
00664 int value_length,
00665 const XML_Char *base,
00666 const XML_Char *systemId,
00667 const XML_Char *publicId,
00668 const XML_Char *notationName),
00669 ("NiNNNNN",
00670 string_intern(self, entityName), is_parameter_entity,
00671 conv_string_len_to_utf8(value, value_length),
00672 string_intern(self, base), string_intern(self, systemId),
00673 string_intern(self, publicId),
00674 string_intern(self, notationName)))
00675 #else
00676 VOID_HANDLER(EntityDecl,
00677 (void *userData,
00678 const XML_Char *entityName,
00679 int is_parameter_entity,
00680 const XML_Char *value,
00681 int value_length,
00682 const XML_Char *base,
00683 const XML_Char *systemId,
00684 const XML_Char *publicId,
00685 const XML_Char *notationName),
00686 ("NiNNNNN",
00687 string_intern(self, entityName), is_parameter_entity,
00688 (self->returns_unicode
00689 ? conv_string_len_to_unicode(value, value_length)
00690 : conv_string_len_to_utf8(value, value_length)),
00691 string_intern(self, base), string_intern(self, systemId),
00692 string_intern(self, publicId),
00693 string_intern(self, notationName)))
00694 #endif
00695
00696 VOID_HANDLER(XmlDecl,
00697 (void *userData,
00698 const XML_Char *version,
00699 const XML_Char *encoding,
00700 int standalone),
00701 ("(O&O&i)",
00702 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
00703 standalone))
00704
00705 static PyObject *
00706 conv_content_model(XML_Content * const model,
00707 PyObject *(*conv_string)(const XML_Char *))
00708 {
00709 PyObject *result = NULL;
00710 PyObject *children = PyTuple_New(model->numchildren);
00711 int i;
00712
00713 if (children != NULL) {
00714 assert(model->numchildren < INT_MAX);
00715 for (i = 0; i < (int)model->numchildren; ++i) {
00716 PyObject *child = conv_content_model(&model->children[i],
00717 conv_string);
00718 if (child == NULL) {
00719 Py_XDECREF(children);
00720 return NULL;
00721 }
00722 PyTuple_SET_ITEM(children, i, child);
00723 }
00724 result = Py_BuildValue("(iiO&N)",
00725 model->type, model->quant,
00726 conv_string,model->name, children);
00727 }
00728 return result;
00729 }
00730
00731 static void
00732 my_ElementDeclHandler(void *userData,
00733 const XML_Char *name,
00734 XML_Content *model)
00735 {
00736 xmlparseobject *self = (xmlparseobject *)userData;
00737 PyObject *args = NULL;
00738
00739 if (have_handler(self, ElementDecl)) {
00740 PyObject *rv = NULL;
00741 PyObject *modelobj, *nameobj;
00742
00743 if (flush_character_buffer(self) < 0)
00744 goto finally;
00745 #ifdef Py_USING_UNICODE
00746 modelobj = conv_content_model(model,
00747 (self->returns_unicode
00748 ? conv_string_to_unicode
00749 : conv_string_to_utf8));
00750 #else
00751 modelobj = conv_content_model(model, conv_string_to_utf8);
00752 #endif
00753 if (modelobj == NULL) {
00754 flag_error(self);
00755 goto finally;
00756 }
00757 nameobj = string_intern(self, name);
00758 if (nameobj == NULL) {
00759 Py_DECREF(modelobj);
00760 flag_error(self);
00761 goto finally;
00762 }
00763 args = Py_BuildValue("NN", nameobj, modelobj);
00764 if (args == NULL) {
00765 Py_DECREF(modelobj);
00766 flag_error(self);
00767 goto finally;
00768 }
00769 self->in_callback = 1;
00770 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
00771 self->handlers[ElementDecl], args, self);
00772 self->in_callback = 0;
00773 if (rv == NULL) {
00774 flag_error(self);
00775 goto finally;
00776 }
00777 Py_DECREF(rv);
00778 }
00779 finally:
00780 Py_XDECREF(args);
00781 XML_FreeContentModel(self->itself, model);
00782 return;
00783 }
00784
00785 VOID_HANDLER(AttlistDecl,
00786 (void *userData,
00787 const XML_Char *elname,
00788 const XML_Char *attname,
00789 const XML_Char *att_type,
00790 const XML_Char *dflt,
00791 int isrequired),
00792 ("(NNO&O&i)",
00793 string_intern(self, elname), string_intern(self, attname),
00794 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
00795 isrequired))
00796
00797 #if XML_COMBINED_VERSION >= 19504
00798 VOID_HANDLER(SkippedEntity,
00799 (void *userData,
00800 const XML_Char *entityName,
00801 int is_parameter_entity),
00802 ("Ni",
00803 string_intern(self, entityName), is_parameter_entity))
00804 #endif
00805
00806 VOID_HANDLER(NotationDecl,
00807 (void *userData,
00808 const XML_Char *notationName,
00809 const XML_Char *base,
00810 const XML_Char *systemId,
00811 const XML_Char *publicId),
00812 ("(NNNN)",
00813 string_intern(self, notationName), string_intern(self, base),
00814 string_intern(self, systemId), string_intern(self, publicId)))
00815
00816 VOID_HANDLER(StartNamespaceDecl,
00817 (void *userData,
00818 const XML_Char *prefix,
00819 const XML_Char *uri),
00820 ("(NN)",
00821 string_intern(self, prefix), string_intern(self, uri)))
00822
00823 VOID_HANDLER(EndNamespaceDecl,
00824 (void *userData,
00825 const XML_Char *prefix),
00826 ("(N)", string_intern(self, prefix)))
00827
00828 VOID_HANDLER(Comment,
00829 (void *userData, const XML_Char *data),
00830 ("(O&)", STRING_CONV_FUNC,data))
00831
00832 VOID_HANDLER(StartCdataSection,
00833 (void *userData),
00834 ("()"))
00835
00836 VOID_HANDLER(EndCdataSection,
00837 (void *userData),
00838 ("()"))
00839
00840 #ifndef Py_USING_UNICODE
00841 VOID_HANDLER(Default,
00842 (void *userData, const XML_Char *s, int len),
00843 ("(N)", conv_string_len_to_utf8(s,len)))
00844
00845 VOID_HANDLER(DefaultHandlerExpand,
00846 (void *userData, const XML_Char *s, int len),
00847 ("(N)", conv_string_len_to_utf8(s,len)))
00848 #else
00849 VOID_HANDLER(Default,
00850 (void *userData, const XML_Char *s, int len),
00851 ("(N)", (self->returns_unicode
00852 ? conv_string_len_to_unicode(s,len)
00853 : conv_string_len_to_utf8(s,len))))
00854
00855 VOID_HANDLER(DefaultHandlerExpand,
00856 (void *userData, const XML_Char *s, int len),
00857 ("(N)", (self->returns_unicode
00858 ? conv_string_len_to_unicode(s,len)
00859 : conv_string_len_to_utf8(s,len))))
00860 #endif
00861
00862 INT_HANDLER(NotStandalone,
00863 (void *userData),
00864 ("()"))
00865
00866 RC_HANDLER(int, ExternalEntityRef,
00867 (XML_Parser parser,
00868 const XML_Char *context,
00869 const XML_Char *base,
00870 const XML_Char *systemId,
00871 const XML_Char *publicId),
00872 int rc=0;,
00873 ("(O&NNN)",
00874 STRING_CONV_FUNC,context, string_intern(self, base),
00875 string_intern(self, systemId), string_intern(self, publicId)),
00876 rc = PyInt_AsLong(rv);, rc,
00877 XML_GetUserData(parser))
00878
00879
00880
00881 VOID_HANDLER(StartDoctypeDecl,
00882 (void *userData, const XML_Char *doctypeName,
00883 const XML_Char *sysid, const XML_Char *pubid,
00884 int has_internal_subset),
00885 ("(NNNi)", string_intern(self, doctypeName),
00886 string_intern(self, sysid), string_intern(self, pubid),
00887 has_internal_subset))
00888
00889 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
00890
00891
00892
00893 static PyObject *
00894 get_parse_result(xmlparseobject *self, int rv)
00895 {
00896 if (PyErr_Occurred()) {
00897 return NULL;
00898 }
00899 if (rv == 0) {
00900 return set_error(self, XML_GetErrorCode(self->itself));
00901 }
00902 if (flush_character_buffer(self) < 0) {
00903 return NULL;
00904 }
00905 return PyInt_FromLong(rv);
00906 }
00907
00908 PyDoc_STRVAR(xmlparse_Parse__doc__,
00909 "Parse(data[, isfinal])\n\
00910 Parse XML data. `isfinal' should be true at end of input.");
00911
00912 static PyObject *
00913 xmlparse_Parse(xmlparseobject *self, PyObject *args)
00914 {
00915 char *s;
00916 int slen;
00917 int isFinal = 0;
00918
00919 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
00920 return NULL;
00921
00922 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
00923 }
00924
00925
00926
00927 #define BUF_SIZE 2048
00928
00929 static int
00930 readinst(char *buf, int buf_size, PyObject *meth)
00931 {
00932 PyObject *arg = NULL;
00933 PyObject *bytes = NULL;
00934 PyObject *str = NULL;
00935 int len = -1;
00936
00937 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
00938 goto finally;
00939
00940 if ((arg = PyTuple_New(1)) == NULL) {
00941 Py_DECREF(bytes);
00942 goto finally;
00943 }
00944
00945 PyTuple_SET_ITEM(arg, 0, bytes);
00946
00947 #if PY_VERSION_HEX < 0x02020000
00948 str = PyObject_CallObject(meth, arg);
00949 #else
00950 str = PyObject_Call(meth, arg, NULL);
00951 #endif
00952 if (str == NULL)
00953 goto finally;
00954
00955
00956 if (!PyString_Check(str)) {
00957 PyErr_Format(PyExc_TypeError,
00958 "read() did not return a string object (type=%.400s)",
00959 str->ob_type->tp_name);
00960 goto finally;
00961 }
00962 len = PyString_GET_SIZE(str);
00963 if (len > buf_size) {
00964 PyErr_Format(PyExc_ValueError,
00965 "read() returned too much data: "
00966 "%i bytes requested, %i returned",
00967 buf_size, len);
00968 goto finally;
00969 }
00970 memcpy(buf, PyString_AsString(str), len);
00971 finally:
00972 Py_XDECREF(arg);
00973 Py_XDECREF(str);
00974 return len;
00975 }
00976
00977 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
00978 "ParseFile(file)\n\
00979 Parse XML data from file-like object.");
00980
00981 static PyObject *
00982 xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
00983 {
00984 int rv = 1;
00985 PyObject *f;
00986 FILE *fp;
00987 PyObject *readmethod = NULL;
00988
00989 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
00990 return NULL;
00991
00992 if (PyFile_Check(f)) {
00993 fp = PyFile_AsFile(f);
00994 }
00995 else{
00996 fp = NULL;
00997 readmethod = PyObject_GetAttrString(f, "read");
00998 if (readmethod == NULL) {
00999 PyErr_Clear();
01000 PyErr_SetString(PyExc_TypeError,
01001 "argument must have 'read' attribute");
01002 return NULL;
01003 }
01004 }
01005 for (;;) {
01006 int bytes_read;
01007 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
01008 if (buf == NULL) {
01009 Py_XDECREF(readmethod);
01010 return PyErr_NoMemory();
01011 }
01012
01013 if (fp) {
01014 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
01015 if (bytes_read < 0) {
01016 PyErr_SetFromErrno(PyExc_IOError);
01017 return NULL;
01018 }
01019 }
01020 else {
01021 bytes_read = readinst(buf, BUF_SIZE, readmethod);
01022 if (bytes_read < 0) {
01023 Py_DECREF(readmethod);
01024 return NULL;
01025 }
01026 }
01027 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
01028 if (PyErr_Occurred()) {
01029 Py_XDECREF(readmethod);
01030 return NULL;
01031 }
01032
01033 if (!rv || bytes_read == 0)
01034 break;
01035 }
01036 Py_XDECREF(readmethod);
01037 return get_parse_result(self, rv);
01038 }
01039
01040 PyDoc_STRVAR(xmlparse_SetBase__doc__,
01041 "SetBase(base_url)\n\
01042 Set the base URL for the parser.");
01043
01044 static PyObject *
01045 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
01046 {
01047 char *base;
01048
01049 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
01050 return NULL;
01051 if (!XML_SetBase(self->itself, base)) {
01052 return PyErr_NoMemory();
01053 }
01054 Py_INCREF(Py_None);
01055 return Py_None;
01056 }
01057
01058 PyDoc_STRVAR(xmlparse_GetBase__doc__,
01059 "GetBase() -> url\n\
01060 Return base URL string for the parser.");
01061
01062 static PyObject *
01063 xmlparse_GetBase(xmlparseobject *self, PyObject *args)
01064 {
01065 if (!PyArg_ParseTuple(args, ":GetBase"))
01066 return NULL;
01067
01068 return Py_BuildValue("z", XML_GetBase(self->itself));
01069 }
01070
01071 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
01072 "GetInputContext() -> string\n\
01073 Return the untranslated text of the input that caused the current event.\n\
01074 If the event was generated by a large amount of text (such as a start tag\n\
01075 for an element with many attributes), not all of the text may be available.");
01076
01077 static PyObject *
01078 xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
01079 {
01080 PyObject *result = NULL;
01081
01082 if (PyArg_ParseTuple(args, ":GetInputContext")) {
01083 if (self->in_callback) {
01084 int offset, size;
01085 const char *buffer
01086 = XML_GetInputContext(self->itself, &offset, &size);
01087
01088 if (buffer != NULL)
01089 result = PyString_FromStringAndSize(buffer + offset, size);
01090 else {
01091 result = Py_None;
01092 Py_INCREF(result);
01093 }
01094 }
01095 else {
01096 result = Py_None;
01097 Py_INCREF(result);
01098 }
01099 }
01100 return result;
01101 }
01102
01103 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
01104 "ExternalEntityParserCreate(context[, encoding])\n\
01105 Create a parser for parsing an external entity based on the\n\
01106 information passed to the ExternalEntityRefHandler.");
01107
01108 static PyObject *
01109 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
01110 {
01111 char *context;
01112 char *encoding = NULL;
01113 xmlparseobject *new_parser;
01114 int i;
01115
01116 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
01117 &context, &encoding)) {
01118 return NULL;
01119 }
01120
01121 #ifndef Py_TPFLAGS_HAVE_GC
01122
01123 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
01124 #else
01125
01126 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
01127 #endif
01128
01129 if (new_parser == NULL)
01130 return NULL;
01131 new_parser->buffer_size = self->buffer_size;
01132 new_parser->buffer_used = 0;
01133 if (self->buffer != NULL) {
01134 new_parser->buffer = malloc(new_parser->buffer_size);
01135 if (new_parser->buffer == NULL) {
01136 #ifndef Py_TPFLAGS_HAVE_GC
01137
01138 PyObject_Del(new_parser);
01139 #else
01140
01141 PyObject_GC_Del(new_parser);
01142 #endif
01143 return PyErr_NoMemory();
01144 }
01145 }
01146 else
01147 new_parser->buffer = NULL;
01148 new_parser->returns_unicode = self->returns_unicode;
01149 new_parser->ordered_attributes = self->ordered_attributes;
01150 new_parser->specified_attributes = self->specified_attributes;
01151 new_parser->in_callback = 0;
01152 new_parser->ns_prefixes = self->ns_prefixes;
01153 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
01154 encoding);
01155 new_parser->handlers = 0;
01156 new_parser->intern = self->intern;
01157 Py_XINCREF(new_parser->intern);
01158 #ifdef Py_TPFLAGS_HAVE_GC
01159 PyObject_GC_Track(new_parser);
01160 #else
01161 PyObject_GC_Init(new_parser);
01162 #endif
01163
01164 if (!new_parser->itself) {
01165 Py_DECREF(new_parser);
01166 return PyErr_NoMemory();
01167 }
01168
01169 XML_SetUserData(new_parser->itself, (void *)new_parser);
01170
01171
01172 for (i = 0; handler_info[i].name != NULL; i++)
01173 ;
01174
01175 new_parser->handlers = malloc(sizeof(PyObject *) * i);
01176 if (!new_parser->handlers) {
01177 Py_DECREF(new_parser);
01178 return PyErr_NoMemory();
01179 }
01180 clear_handlers(new_parser, 1);
01181
01182
01183 for (i = 0; handler_info[i].name != NULL; i++) {
01184 PyObject *handler = self->handlers[i];
01185 if (handler != NULL) {
01186 Py_INCREF(handler);
01187 new_parser->handlers[i] = handler;
01188 handler_info[i].setter(new_parser->itself,
01189 handler_info[i].handler);
01190 }
01191 }
01192 return (PyObject *)new_parser;
01193 }
01194
01195 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
01196 "SetParamEntityParsing(flag) -> success\n\
01197 Controls parsing of parameter entities (including the external DTD\n\
01198 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
01199 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
01200 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
01201 was successful.");
01202
01203 static PyObject*
01204 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
01205 {
01206 int flag;
01207 if (!PyArg_ParseTuple(args, "i", &flag))
01208 return NULL;
01209 flag = XML_SetParamEntityParsing(p->itself, flag);
01210 return PyInt_FromLong(flag);
01211 }
01212
01213
01214 #if XML_COMBINED_VERSION >= 19505
01215 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
01216 "UseForeignDTD([flag])\n\
01217 Allows the application to provide an artificial external subset if one is\n\
01218 not specified as part of the document instance. This readily allows the\n\
01219 use of a 'default' document type controlled by the application, while still\n\
01220 getting the advantage of providing document type information to the parser.\n\
01221 'flag' defaults to True if not provided.");
01222
01223 static PyObject *
01224 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
01225 {
01226 PyObject *flagobj = NULL;
01227 XML_Bool flag = XML_TRUE;
01228 enum XML_Error rc;
01229 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
01230 return NULL;
01231 if (flagobj != NULL)
01232 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
01233 rc = XML_UseForeignDTD(self->itself, flag);
01234 if (rc != XML_ERROR_NONE) {
01235 return set_error(self, rc);
01236 }
01237 Py_INCREF(Py_None);
01238 return Py_None;
01239 }
01240 #endif
01241
01242 static struct PyMethodDef xmlparse_methods[] = {
01243 {"Parse", (PyCFunction)xmlparse_Parse,
01244 METH_VARARGS, xmlparse_Parse__doc__},
01245 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
01246 METH_VARARGS, xmlparse_ParseFile__doc__},
01247 {"SetBase", (PyCFunction)xmlparse_SetBase,
01248 METH_VARARGS, xmlparse_SetBase__doc__},
01249 {"GetBase", (PyCFunction)xmlparse_GetBase,
01250 METH_VARARGS, xmlparse_GetBase__doc__},
01251 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
01252 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
01253 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
01254 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
01255 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
01256 METH_VARARGS, xmlparse_GetInputContext__doc__},
01257 #if XML_COMBINED_VERSION >= 19505
01258 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
01259 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
01260 #endif
01261 {NULL, NULL}
01262 };
01263
01264
01265
01266
01267 #ifdef Py_USING_UNICODE
01268
01269
01270
01271
01272
01273 static char template_buffer[257];
01274 PyObject *template_string = NULL;
01275
01276 static void
01277 init_template_buffer(void)
01278 {
01279 int i;
01280 for (i = 0; i < 256; i++) {
01281 template_buffer[i] = i;
01282 }
01283 template_buffer[256] = 0;
01284 }
01285
01286 static int
01287 PyUnknownEncodingHandler(void *encodingHandlerData,
01288 const XML_Char *name,
01289 XML_Encoding *info)
01290 {
01291 PyUnicodeObject *_u_string = NULL;
01292 int result = 0;
01293 int i;
01294
01295
01296 _u_string = (PyUnicodeObject *)
01297 PyUnicode_Decode(template_buffer, 256, name, "replace");
01298
01299 if (_u_string == NULL)
01300 return result;
01301
01302 for (i = 0; i < 256; i++) {
01303
01304 Py_UNICODE c = _u_string->str[i];
01305 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
01306 info->map[i] = -1;
01307 else
01308 info->map[i] = c;
01309 }
01310 info->data = NULL;
01311 info->convert = NULL;
01312 info->release = NULL;
01313 result = 1;
01314 Py_DECREF(_u_string);
01315 return result;
01316 }
01317
01318 #endif
01319
01320 static PyObject *
01321 newxmlparseobject(char *