Main Page | Class List | Directories | File List | Class Members | File Members

pyexpat.c

Go to the documentation of this file.
00001 /* Based on Python's pyexpat.c, see the revision number in
00002  * get_version_string().  After integrating a new version from Python,
00003  * the version string in get_version_string() must be corrected.
00004  */
00005 #include "Python.h"
00006 #include <ctype.h>
00007 
00008 #include "compile.h"
00009 #include "frameobject.h"
00010 #include "expat.h"
00011 
00012 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
00013 
00014 #ifndef PyDoc_STRVAR
00015 
00016 /*
00017  * fdrake says:
00018  * Don't change the PyDoc_STR macro definition to (str), because
00019  * '''the parentheses cause compile failures
00020  * ("non-constant static initializer" or something like that)
00021  * on some platforms (Irix?)'''
00022  */
00023 #define PyDoc_STR(str)         str
00024 #define PyDoc_VAR(name)        static char name[]
00025 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
00026 #endif
00027 
00028 #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
00029 /* In Python 2.0 and  2.1, disabling Unicode was not possible. */
00030 #define Py_USING_UNICODE
00031 #else
00032 #define FIX_TRACE
00033 #endif
00034 
00035 enum HandlerTypes {
00036     StartElement,
00037     EndElement,
00038     ProcessingInstruction,
00039     CharacterData,
00040     UnparsedEntityDecl,
00041     NotationDecl,
00042     StartNamespaceDecl,
00043     EndNamespaceDecl,
00044     Comment,
00045     StartCdataSection,
00046     EndCdataSection,
00047     Default,
00048     DefaultHandlerExpand,
00049     NotStandalone,
00050     ExternalEntityRef,
00051     StartDoctypeDecl,
00052     EndDoctypeDecl,
00053     EntityDecl,
00054     XmlDecl,
00055     ElementDecl,
00056     AttlistDecl,
00057 #if XML_COMBINED_VERSION >= 19504
00058     SkippedEntity,
00059 #endif
00060     _DummyDecl
00061 };
00062 
00063 static PyObject *ErrorObject;
00064 
00065 /* ----------------------------------------------------- */
00066 
00067 /* Declarations for objects of type xmlparser */
00068 
00069 typedef struct {
00070     PyObject_HEAD
00071 
00072     XML_Parser itself;
00073     int returns_unicode;        /* True if Unicode strings are returned;
00074                                    if false, UTF-8 strings are returned */
00075     int ordered_attributes;     /* Return attributes as a list. */
00076     int specified_attributes;   /* Report only specified attributes. */
00077     int in_callback;            /* Is a callback active? */
00078     int ns_prefixes;            /* Namespace-triplets mode? */
00079     XML_Char *buffer;           /* Buffer used when accumulating characters */
00080                                 /* NULL if not enabled */
00081     int buffer_size;            /* Size of buffer, in XML_Char units */
00082     int buffer_used;            /* Buffer units in use */
00083     PyObject *intern;           /* Dictionary to intern strings */
00084     PyObject **handlers;
00085 } xmlparseobject;
00086 
00087 #define CHARACTER_DATA_BUFFER_SIZE 8192
00088 
00089 static PyTypeObject Xmlparsetype;
00090 
00091 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
00092 typedef void* xmlhandler;
00093 
00094 struct HandlerInfo {
00095     const char *name;
00096     xmlhandlersetter setter;
00097     xmlhandler handler;
00098     PyCodeObject *tb_code;
00099     PyObject *nameobj;
00100 };
00101 
00102 static struct HandlerInfo handler_info[64];
00103 
00104 /* Set an integer attribute on the error object; return true on success,
00105  * false on an exception.
00106  */
00107 static int
00108 set_error_attr(PyObject *err, char *name, int value)
00109 {
00110     PyObject *v = PyInt_FromLong(value);
00111 
00112     if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
00113         Py_DECREF(v);
00114         return 0;
00115     }
00116     Py_DECREF(v);
00117     return 1;
00118 }
00119 
00120 /* Build and set an Expat exception, including positioning
00121  * information.  Always returns NULL.
00122  */
00123 static PyObject *
00124 set_error(xmlparseobject *self, enum XML_Error code)
00125 {
00126     PyObject *err;
00127     char buffer[256];
00128     XML_Parser parser = self->itself;
00129     int lineno = XML_GetErrorLineNumber(parser);
00130     int column = XML_GetErrorColumnNumber(parser);
00131 
00132     /* There is no risk of overflowing this buffer, since
00133        even for 64-bit integers, there is sufficient space. */
00134     sprintf(buffer, "%.200s: line %i, column %i",
00135             XML_ErrorString(code), lineno, column);
00136     err = PyObject_CallFunction(ErrorObject, "s", buffer);
00137     if (  err != NULL
00138           && set_error_attr(err, "code", code)
00139           && set_error_attr(err, "offset", column)
00140           && set_error_attr(err, "lineno", lineno)) {
00141         PyErr_SetObject(ErrorObject, err);
00142     }
00143     Py_DECREF(err);
00144     return NULL;
00145 }
00146 
00147 static int
00148 have_handler(xmlparseobject *self, int type)
00149 {
00150     PyObject *handler = self->handlers[type];
00151     return handler != NULL;
00152 }
00153 
00154 static PyObject *
00155 get_handler_name(struct HandlerInfo *hinfo)
00156 {
00157     PyObject *name = hinfo->nameobj;
00158     if (name == NULL) {
00159         name = PyString_FromString(hinfo->name);
00160         hinfo->nameobj = name;
00161     }
00162     Py_XINCREF(name);
00163     return name;
00164 }
00165 
00166 
00167 #ifdef Py_USING_UNICODE
00168 /* Convert a string of XML_Chars into a Unicode string.
00169    Returns None if str is a null pointer. */
00170 
00171 static PyObject *
00172 conv_string_to_unicode(const XML_Char *str)
00173 {
00174     /* XXX currently this code assumes that XML_Char is 8-bit,
00175        and hence in UTF-8.  */
00176     /* UTF-8 from Expat, Unicode desired */
00177     if (str == NULL) {
00178         Py_INCREF(Py_None);
00179         return Py_None;
00180     }
00181     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
00182 }
00183 
00184 static PyObject *
00185 conv_string_len_to_unicode(const XML_Char *str, int len)
00186 {
00187     /* XXX currently this code assumes that XML_Char is 8-bit,
00188        and hence in UTF-8.  */
00189     /* UTF-8 from Expat, Unicode desired */
00190     if (str == NULL) {
00191         Py_INCREF(Py_None);
00192         return Py_None;
00193     }
00194     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
00195 }
00196 #endif
00197 
00198 /* Convert a string of XML_Chars into an 8-bit Python string.
00199    Returns None if str is a null pointer. */
00200 
00201 static PyObject *
00202 conv_string_to_utf8(const XML_Char *str)
00203 {
00204     /* XXX currently this code assumes that XML_Char is 8-bit,
00205        and hence in UTF-8.  */
00206     /* UTF-8 from Expat, UTF-8 desired */
00207     if (str == NULL) {
00208         Py_INCREF(Py_None);
00209         return Py_None;
00210     }
00211     return PyString_FromString(str);
00212 }
00213 
00214 static PyObject *
00215 conv_string_len_to_utf8(const XML_Char *str, int len)
00216 {
00217     /* XXX currently this code assumes that XML_Char is 8-bit,
00218        and hence in UTF-8.  */
00219     /* UTF-8 from Expat, UTF-8 desired */
00220     if (str == NULL) {
00221         Py_INCREF(Py_None);
00222         return Py_None;
00223     }
00224     return PyString_FromStringAndSize((const char *)str, len);
00225 }
00226 
00227 /* Callback routines */
00228 
00229 static void clear_handlers(xmlparseobject *self, int initial);
00230 
00231 /* This handler is used when an error has been detected, in the hope
00232    that actual parsing can be terminated early.  This will only help
00233    if an external entity reference is encountered. */
00234 static int
00235 error_external_entity_ref_handler(XML_Parser parser,
00236                                   const XML_Char *context,
00237                                   const XML_Char *base,
00238                                   const XML_Char *systemId,
00239                                   const XML_Char *publicId)
00240 {
00241     return 0;
00242 }
00243 
00244 static void
00245 flag_error(xmlparseobject *self)
00246 {
00247     clear_handlers(self, 0);
00248     XML_SetExternalEntityRefHandler(self->itself,
00249                                     error_external_entity_ref_handler);
00250 }
00251 
00252 static PyCodeObject*
00253 getcode(enum HandlerTypes slot, char* func_name, int lineno)
00254 {
00255     PyObject *code = NULL;
00256     PyObject *name = NULL;
00257     PyObject *nulltuple = NULL;
00258     PyObject *filename = NULL;
00259 
00260     if (handler_info[slot].tb_code == NULL) {
00261         code = PyString_FromString("");
00262         if (code == NULL)
00263             goto failed;
00264         name = PyString_FromString(func_name);
00265         if (name == NULL)
00266             goto failed;
00267         nulltuple = PyTuple_New(0);
00268         if (nulltuple == NULL)
00269             goto failed;
00270         filename = PyString_FromString(__FILE__);
00271         handler_info[slot].tb_code =
00272             PyCode_New(0,               /* argcount */
00273                        0,               /* nlocals */
00274                        0,               /* stacksize */
00275                        0,               /* flags */
00276                        code,            /* code */
00277                        nulltuple,       /* consts */
00278                        nulltuple,       /* names */
00279                        nulltuple,       /* varnames */
00280 #if PYTHON_API_VERSION >= 1010
00281                        nulltuple,       /* freevars */
00282                        nulltuple,       /* cellvars */
00283 #endif
00284                        filename,        /* filename */
00285                        name,            /* name */
00286                        lineno,          /* firstlineno */
00287                        code             /* lnotab */
00288                        );
00289         if (handler_info[slot].tb_code == NULL)
00290             goto failed;
00291         Py_DECREF(code);
00292         Py_DECREF(nulltuple);
00293         Py_DECREF(filename);
00294         Py_DECREF(name);
00295     }
00296     return handler_info[slot].tb_code;
00297  failed:
00298     Py_XDECREF(code);
00299     Py_XDECREF(name);
00300     return NULL;
00301 }
00302 
00303 #ifdef FIX_TRACE
00304 static int
00305 trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
00306 {
00307     int result = 0;
00308     if (!tstate->use_tracing || tstate->tracing)
00309         return 0;
00310     if (tstate->c_profilefunc != NULL) {
00311         tstate->tracing++;
00312         result = tstate->c_profilefunc(tstate->c_profileobj,
00313                                        f, code , val);
00314         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
00315                                || (tstate->c_profilefunc != NULL));
00316         tstate->tracing--;
00317         if (result)
00318             return result;
00319     }
00320     if (tstate->c_tracefunc != NULL) {
00321         tstate->tracing++;
00322         result = tstate->c_tracefunc(tstate->c_traceobj,
00323                                      f, code , val);
00324         tstate->use_tracing = ((tstate->c_tracefunc != NULL)
00325                                || (tstate->c_profilefunc != NULL));
00326         tstate->tracing--;
00327     }   
00328     return result;
00329 }
00330 
00331 static int
00332 trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
00333 {
00334     PyObject *type, *value, *traceback, *arg;
00335     int err;
00336 
00337     if (tstate->c_tracefunc == NULL)
00338         return 0;
00339 
00340     PyErr_Fetch(&type, &value, &traceback);
00341     if (value == NULL) {
00342         value = Py_None;
00343         Py_INCREF(value);
00344     }
00345 #if PY_VERSION_HEX < 0x02040000
00346     arg = Py_BuildValue("(OOO)", type, value, traceback);
00347 #else
00348     arg = PyTuple_Pack(3, type, value, traceback);
00349 #endif
00350     if (arg == NULL) {
00351         PyErr_Restore(type, value, traceback);
00352         return 0;
00353     }
00354     err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
00355     Py_DECREF(arg);
00356     if (err == 0)
00357         PyErr_Restore(type, value, traceback);
00358     else {
00359         Py_XDECREF(type);
00360         Py_XDECREF(value);
00361         Py_XDECREF(traceback);
00362     }
00363     return err;
00364 }
00365 #endif
00366 
00367 static PyObject*
00368 call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
00369                 xmlparseobject *self)
00370 {
00371     PyThreadState *tstate = PyThreadState_GET();
00372     PyFrameObject *f;
00373     PyObject *res;
00374 
00375     if (c == NULL)
00376         return NULL;
00377     
00378     f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
00379     if (f == NULL)
00380         return NULL;
00381     tstate->frame = f;
00382 #ifdef FIX_TRACE
00383     if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
00384         return NULL;
00385     }
00386 #endif
00387     res = PyEval_CallObject(func, args);
00388     if (res == NULL) {
00389         if (tstate->curexc_traceback == NULL)
00390             PyTraceBack_Here(f);
00391         XML_StopParser(self->itself, XML_FALSE);
00392 #ifdef FIX_TRACE
00393         if (trace_frame_exc(tstate, f) < 0) {
00394             return NULL;
00395         }
00396     }
00397     else {
00398         if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
00399             Py_XDECREF(res);
00400             res = NULL;
00401         }
00402     }
00403 #else
00404     }
00405 #endif
00406     tstate->frame = f->f_back;
00407     Py_DECREF(f);
00408     return res;
00409 }
00410 
00411 #ifndef Py_USING_UNICODE
00412 #define STRING_CONV_FUNC conv_string_to_utf8
00413 #else
00414 /* Python 2.0 and later versions, when built with Unicode support */
00415 #define STRING_CONV_FUNC (self->returns_unicode \
00416                           ? conv_string_to_unicode : conv_string_to_utf8)
00417 #endif
00418 
00419 static PyObject*
00420 string_intern(xmlparseobject *self, const char* str)
00421 {
00422     PyObject *result = STRING_CONV_FUNC(str);
00423     PyObject *value;
00424     if (!self->intern)
00425         return result;
00426     value = PyDict_GetItem(self->intern, result);
00427     if (!value) {
00428         if (PyDict_SetItem(self->intern, result, result) == 0)
00429             return result;
00430         else
00431             return NULL;
00432     }
00433     Py_INCREF(value);
00434     Py_DECREF(result);
00435     return value;
00436 }
00437 
00438 /* Return 0 on success, -1 on exception.
00439  * flag_error() will be called before return if needed.
00440  */
00441 static int
00442 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
00443 {
00444     PyObject *args;
00445     PyObject *temp;
00446 
00447     args = PyTuple_New(1);
00448     if (args == NULL)
00449         return -1;
00450 #ifdef Py_USING_UNICODE
00451     temp = (self->returns_unicode 
00452             ? conv_string_len_to_unicode(buffer, len) 
00453             : conv_string_len_to_utf8(buffer, len));
00454 #else
00455     temp = conv_string_len_to_utf8(buffer, len);
00456 #endif
00457     if (temp == NULL) {
00458         Py_DECREF(args);
00459         flag_error(self);
00460         return -1;
00461     }
00462     PyTuple_SET_ITEM(args, 0, temp);
00463     /* temp is now a borrowed reference; consider it unused. */
00464     self->in_callback = 1;
00465     temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
00466                            self->handlers[CharacterData], args, self);
00467     /* temp is an owned reference again, or NULL */
00468     self->in_callback = 0;
00469     Py_DECREF(args);
00470     if (temp == NULL) {
00471         flag_error(self);
00472         return -1;
00473     }
00474     Py_DECREF(temp);
00475     return 0;
00476 }
00477 
00478 static int
00479 flush_character_buffer(xmlparseobject *self)
00480 {
00481     int rc;
00482     if (self->buffer == NULL || self->buffer_used == 0)
00483         return 0;
00484     rc = call_character_handler(self, self->buffer, self->buffer_used);
00485     self->buffer_used = 0;
00486     return rc;
00487 }
00488 
00489 static void
00490 my_CharacterDataHandler(void *userData, const XML_Char *data, int len) 
00491 {
00492     xmlparseobject *self = (xmlparseobject *) userData;
00493     if (self->buffer == NULL)
00494         call_character_handler(self, data, len);
00495     else {
00496         if ((self->buffer_used + len) > self->buffer_size) {
00497             if (flush_character_buffer(self) < 0)
00498                 return;
00499             /* handler might have changed; drop the rest on the floor
00500              * if there isn't a handler anymore
00501              */
00502             if (!have_handler(self, CharacterData))
00503                 return;
00504         }
00505         if (len > self->buffer_size) {
00506             call_character_handler(self, data, len);
00507             self->buffer_used = 0;
00508         }
00509         else {
00510             memcpy(self->buffer + self->buffer_used,
00511                    data, len * sizeof(XML_Char));
00512             self->buffer_used += len;
00513         }
00514     }
00515 }
00516 
00517 static void
00518 my_StartElementHandler(void *userData,
00519                        const XML_Char *name, const XML_Char *atts[])
00520 {
00521     xmlparseobject *self = (xmlparseobject *)userData;
00522 
00523     if (have_handler(self, StartElement)) {
00524         PyObject *container, *rv, *args;
00525         int i, max;
00526 
00527         if (flush_character_buffer(self) < 0)
00528             return;
00529         /* Set max to the number of slots filled in atts[]; max/2 is
00530          * the number of attributes we need to process.
00531          */
00532         if (self->specified_attributes) {
00533             max = XML_GetSpecifiedAttributeCount(self->itself);
00534         }
00535         else {
00536             max = 0;
00537             while (atts[max] != NULL)
00538                 max += 2;
00539         }
00540         /* Build the container. */
00541         if (self->ordered_attributes)
00542             container = PyList_New(max);
00543         else
00544             container = PyDict_New();
00545         if (container == NULL) {
00546             flag_error(self);
00547             return;
00548         }
00549         for (i = 0; i < max; i += 2) {
00550             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
00551             PyObject *v;
00552             if (n == NULL) {
00553                 flag_error(self);
00554                 Py_DECREF(container);
00555                 return;
00556             }
00557             v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
00558             if (v == NULL) {
00559                 flag_error(self);
00560                 Py_DECREF(container);
00561                 Py_DECREF(n);
00562                 return;
00563             }
00564             if (self->ordered_attributes) {
00565                 PyList_SET_ITEM(container, i, n);
00566                 PyList_SET_ITEM(container, i+1, v);
00567             }
00568             else if (PyDict_SetItem(container, n, v)) {
00569                 flag_error(self);
00570                 Py_DECREF(n);
00571                 Py_DECREF(v);
00572                 return;
00573             }
00574             else {
00575                 Py_DECREF(n);
00576                 Py_DECREF(v);
00577             }
00578         }
00579         args = Py_BuildValue("(NN)", string_intern(self, name), container);
00580         if (args == NULL) {
00581             Py_DECREF(container);
00582             return;
00583         }
00584         /* Container is now a borrowed reference; ignore it. */
00585         self->in_callback = 1;
00586         rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
00587                              self->handlers[StartElement], args, self);
00588         self->in_callback = 0;
00589         Py_DECREF(args);
00590         if (rv == NULL) {
00591             flag_error(self);
00592             return;
00593         }
00594         Py_DECREF(rv);
00595     }
00596 }
00597 
00598 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
00599                 RETURN, GETUSERDATA) \
00600 static RC \
00601 my_##NAME##Handler PARAMS {\
00602     xmlparseobject *self = GETUSERDATA ; \
00603     PyObject *args = NULL; \
00604     PyObject *rv = NULL; \
00605     INIT \
00606 \
00607     if (have_handler(self, NAME)) { \
00608         if (flush_character_buffer(self) < 0) \
00609             return RETURN; \
00610         args = Py_BuildValue PARAM_FORMAT ;\
00611         if (!args) { flag_error(self); return RETURN;} \
00612         self->in_callback = 1; \
00613         rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
00614                              self->handlers[NAME], args, self); \
00615         self->in_callback = 0; \
00616         Py_DECREF(args); \
00617         if (rv == NULL) { \
00618             flag_error(self); \
00619             return RETURN; \
00620         } \
00621         CONVERSION \
00622         Py_DECREF(rv); \
00623     } \
00624     return RETURN; \
00625 }
00626 
00627 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
00628         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
00629         (xmlparseobject *)userData)
00630 
00631 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
00632         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
00633                         rc = PyInt_AsLong(rv);, rc, \
00634         (xmlparseobject *)userData)
00635 
00636 VOID_HANDLER(EndElement,
00637              (void *userData, const XML_Char *name),
00638              ("(N)", string_intern(self, name)))
00639 
00640 VOID_HANDLER(ProcessingInstruction,
00641              (void *userData,
00642               const XML_Char *target,
00643               const XML_Char *data),
00644              ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
00645 
00646 VOID_HANDLER(UnparsedEntityDecl,
00647              (void *userData,
00648               const XML_Char *entityName,
00649               const XML_Char *base,
00650               const XML_Char *systemId,
00651               const XML_Char *publicId,
00652               const XML_Char *notationName),
00653              ("(NNNNN)",
00654               string_intern(self, entityName), string_intern(self, base),
00655               string_intern(self, systemId), string_intern(self, publicId),
00656               string_intern(self, notationName)))
00657 
00658 #ifndef Py_USING_UNICODE
00659 VOID_HANDLER(EntityDecl,
00660              (void *userData,
00661               const XML_Char *entityName,
00662               int is_parameter_entity,
00663               const XML_Char *value,
00664               int value_length,
00665               const XML_Char *base,
00666               const XML_Char *systemId,
00667               const XML_Char *publicId,
00668               const XML_Char *notationName),
00669              ("NiNNNNN",
00670               string_intern(self, entityName), is_parameter_entity,
00671               conv_string_len_to_utf8(value, value_length),
00672               string_intern(self, base), string_intern(self, systemId),
00673               string_intern(self, publicId),
00674               string_intern(self, notationName)))
00675 #else
00676 VOID_HANDLER(EntityDecl,
00677              (void *userData,
00678               const XML_Char *entityName,
00679               int is_parameter_entity,
00680               const XML_Char *value,
00681               int value_length,
00682               const XML_Char *base,
00683               const XML_Char *systemId,
00684               const XML_Char *publicId,
00685               const XML_Char *notationName),
00686              ("NiNNNNN",
00687               string_intern(self, entityName), is_parameter_entity,
00688               (self->returns_unicode
00689                ? conv_string_len_to_unicode(value, value_length)
00690                : conv_string_len_to_utf8(value, value_length)),
00691               string_intern(self, base), string_intern(self, systemId),
00692               string_intern(self, publicId),
00693               string_intern(self, notationName)))
00694 #endif
00695 
00696 VOID_HANDLER(XmlDecl,
00697              (void *userData,
00698               const XML_Char *version,
00699               const XML_Char *encoding,
00700               int standalone),
00701              ("(O&O&i)",
00702               STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
00703               standalone))
00704 
00705 static PyObject *
00706 conv_content_model(XML_Content * const model,
00707                    PyObject *(*conv_string)(const XML_Char *))
00708 {
00709     PyObject *result = NULL;
00710     PyObject *children = PyTuple_New(model->numchildren);
00711     int i;
00712 
00713     if (children != NULL) {
00714         assert(model->numchildren < INT_MAX);
00715         for (i = 0; i < (int)model->numchildren; ++i) {
00716             PyObject *child = conv_content_model(&model->children[i],
00717                                                  conv_string);
00718             if (child == NULL) {
00719                 Py_XDECREF(children);
00720                 return NULL;
00721             }
00722             PyTuple_SET_ITEM(children, i, child);
00723         }
00724         result = Py_BuildValue("(iiO&N)",
00725                                model->type, model->quant,
00726                                conv_string,model->name, children);
00727     }
00728     return result;
00729 }
00730 
00731 static void
00732 my_ElementDeclHandler(void *userData,
00733                       const XML_Char *name,
00734                       XML_Content *model)
00735 {
00736     xmlparseobject *self = (xmlparseobject *)userData;
00737     PyObject *args = NULL;
00738 
00739     if (have_handler(self, ElementDecl)) {
00740         PyObject *rv = NULL;
00741         PyObject *modelobj, *nameobj;
00742 
00743         if (flush_character_buffer(self) < 0)
00744             goto finally;
00745 #ifdef Py_USING_UNICODE
00746         modelobj = conv_content_model(model,
00747                                       (self->returns_unicode
00748                                        ? conv_string_to_unicode
00749                                        : conv_string_to_utf8));
00750 #else
00751         modelobj = conv_content_model(model, conv_string_to_utf8);
00752 #endif
00753         if (modelobj == NULL) {
00754             flag_error(self);
00755             goto finally;
00756         }
00757         nameobj = string_intern(self, name);
00758         if (nameobj == NULL) {
00759             Py_DECREF(modelobj);
00760             flag_error(self);
00761             goto finally;
00762         }
00763         args = Py_BuildValue("NN", nameobj, modelobj);
00764         if (args == NULL) {
00765             Py_DECREF(modelobj);
00766             flag_error(self);
00767             goto finally;
00768         }
00769         self->in_callback = 1;
00770         rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
00771                              self->handlers[ElementDecl], args, self);
00772         self->in_callback = 0;
00773         if (rv == NULL) {
00774             flag_error(self);
00775             goto finally;
00776         }
00777         Py_DECREF(rv);
00778     }
00779  finally:
00780     Py_XDECREF(args);
00781     XML_FreeContentModel(self->itself, model);
00782     return;
00783 }
00784 
00785 VOID_HANDLER(AttlistDecl,
00786              (void *userData,
00787               const XML_Char *elname,
00788               const XML_Char *attname,
00789               const XML_Char *att_type,
00790               const XML_Char *dflt,
00791               int isrequired),
00792              ("(NNO&O&i)",
00793               string_intern(self, elname), string_intern(self, attname),
00794               STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
00795               isrequired))
00796 
00797 #if XML_COMBINED_VERSION >= 19504
00798 VOID_HANDLER(SkippedEntity,
00799              (void *userData,
00800               const XML_Char *entityName,
00801               int is_parameter_entity),
00802              ("Ni",
00803               string_intern(self, entityName), is_parameter_entity))
00804 #endif
00805 
00806 VOID_HANDLER(NotationDecl,
00807                 (void *userData,
00808                         const XML_Char *notationName,
00809                         const XML_Char *base,
00810                         const XML_Char *systemId,
00811                         const XML_Char *publicId),
00812                 ("(NNNN)",
00813                  string_intern(self, notationName), string_intern(self, base),
00814                  string_intern(self, systemId), string_intern(self, publicId)))
00815 
00816 VOID_HANDLER(StartNamespaceDecl,
00817                 (void *userData,
00818                       const XML_Char *prefix,
00819                       const XML_Char *uri),
00820                 ("(NN)",
00821                  string_intern(self, prefix), string_intern(self, uri)))
00822 
00823 VOID_HANDLER(EndNamespaceDecl,
00824                 (void *userData,
00825                     const XML_Char *prefix),
00826                 ("(N)", string_intern(self, prefix)))
00827 
00828 VOID_HANDLER(Comment,
00829                (void *userData, const XML_Char *data),
00830                 ("(O&)", STRING_CONV_FUNC,data))
00831 
00832 VOID_HANDLER(StartCdataSection,
00833                (void *userData),
00834                 ("()"))
00835 
00836 VOID_HANDLER(EndCdataSection,
00837                (void *userData),
00838                 ("()"))
00839 
00840 #ifndef Py_USING_UNICODE
00841 VOID_HANDLER(Default,
00842               (void *userData, const XML_Char *s, int len),
00843               ("(N)", conv_string_len_to_utf8(s,len)))
00844 
00845 VOID_HANDLER(DefaultHandlerExpand,
00846               (void *userData, const XML_Char *s, int len),
00847               ("(N)", conv_string_len_to_utf8(s,len)))
00848 #else
00849 VOID_HANDLER(Default,
00850               (void *userData, const XML_Char *s, int len),
00851               ("(N)", (self->returns_unicode
00852                        ? conv_string_len_to_unicode(s,len)
00853                        : conv_string_len_to_utf8(s,len))))
00854 
00855 VOID_HANDLER(DefaultHandlerExpand,
00856               (void *userData, const XML_Char *s, int len),
00857               ("(N)", (self->returns_unicode
00858                        ? conv_string_len_to_unicode(s,len)
00859                        : conv_string_len_to_utf8(s,len))))
00860 #endif
00861 
00862 INT_HANDLER(NotStandalone,
00863                 (void *userData),
00864                 ("()"))
00865 
00866 RC_HANDLER(int, ExternalEntityRef,
00867                 (XML_Parser parser,
00868                     const XML_Char *context,
00869                     const XML_Char *base,
00870                     const XML_Char *systemId,
00871                     const XML_Char *publicId),
00872                 int rc=0;,
00873                 ("(O&NNN)",
00874                  STRING_CONV_FUNC,context, string_intern(self, base),
00875                  string_intern(self, systemId), string_intern(self, publicId)),
00876                 rc = PyInt_AsLong(rv);, rc,
00877                 XML_GetUserData(parser))
00878 
00879 /* XXX UnknownEncodingHandler */
00880 
00881 VOID_HANDLER(StartDoctypeDecl,
00882              (void *userData, const XML_Char *doctypeName,
00883               const XML_Char *sysid, const XML_Char *pubid,
00884               int has_internal_subset),
00885              ("(NNNi)", string_intern(self, doctypeName),
00886               string_intern(self, sysid), string_intern(self, pubid),
00887               has_internal_subset))
00888 
00889 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
00890 
00891 /* ---------------------------------------------------------------- */
00892 
00893 static PyObject *
00894 get_parse_result(xmlparseobject *self, int rv)
00895 {
00896     if (PyErr_Occurred()) {
00897         return NULL;
00898     }
00899     if (rv == 0) {
00900         return set_error(self, XML_GetErrorCode(self->itself));
00901     }
00902     if (flush_character_buffer(self) < 0) {
00903         return NULL;
00904     }
00905     return PyInt_FromLong(rv);
00906 }
00907 
00908 PyDoc_STRVAR(xmlparse_Parse__doc__,
00909 "Parse(data[, isfinal])\n\
00910 Parse XML data.  `isfinal' should be true at end of input.");
00911 
00912 static PyObject *
00913 xmlparse_Parse(xmlparseobject *self, PyObject *args)
00914 {
00915     char *s;
00916     int slen;
00917     int isFinal = 0;
00918 
00919     if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
00920         return NULL;
00921 
00922     return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
00923 }
00924 
00925 /* File reading copied from cPickle */
00926 
00927 #define BUF_SIZE 2048
00928 
00929 static int
00930 readinst(char *buf, int buf_size, PyObject *meth)
00931 {
00932     PyObject *arg = NULL;
00933     PyObject *bytes = NULL;
00934     PyObject *str = NULL;
00935     int len = -1;
00936 
00937     if ((bytes = PyInt_FromLong(buf_size)) == NULL)
00938         goto finally;
00939 
00940     if ((arg = PyTuple_New(1)) == NULL) {
00941         Py_DECREF(bytes);
00942         goto finally;
00943     }
00944 
00945     PyTuple_SET_ITEM(arg, 0, bytes);
00946 
00947 #if PY_VERSION_HEX < 0x02020000
00948     str = PyObject_CallObject(meth, arg);
00949 #else
00950     str = PyObject_Call(meth, arg, NULL);
00951 #endif
00952     if (str == NULL)
00953         goto finally;
00954 
00955     /* XXX what to do if it returns a Unicode string? */
00956     if (!PyString_Check(str)) {
00957         PyErr_Format(PyExc_TypeError,
00958                      "read() did not return a string object (type=%.400s)",
00959                      str->ob_type->tp_name);
00960         goto finally;
00961     }
00962     len = PyString_GET_SIZE(str);
00963     if (len > buf_size) {
00964         PyErr_Format(PyExc_ValueError,
00965                      "read() returned too much data: "
00966                      "%i bytes requested, %i returned",
00967                      buf_size, len);
00968         goto finally;
00969     }
00970     memcpy(buf, PyString_AsString(str), len);
00971 finally:
00972     Py_XDECREF(arg);
00973     Py_XDECREF(str);
00974     return len;
00975 }
00976 
00977 PyDoc_STRVAR(xmlparse_ParseFile__doc__,
00978 "ParseFile(file)\n\
00979 Parse XML data from file-like object.");
00980 
00981 static PyObject *
00982 xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
00983 {
00984     int rv = 1;
00985     PyObject *f;
00986     FILE *fp;
00987     PyObject *readmethod = NULL;
00988 
00989     if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
00990         return NULL;
00991 
00992     if (PyFile_Check(f)) {
00993         fp = PyFile_AsFile(f);
00994     }
00995     else{
00996         fp = NULL;
00997         readmethod = PyObject_GetAttrString(f, "read");
00998         if (readmethod == NULL) {
00999             PyErr_Clear();
01000             PyErr_SetString(PyExc_TypeError,
01001                             "argument must have 'read' attribute");
01002             return NULL;
01003         }
01004     }
01005     for (;;) {
01006         int bytes_read;
01007         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
01008         if (buf == NULL) {
01009             Py_XDECREF(readmethod);
01010             return PyErr_NoMemory();
01011        }
01012 
01013         if (fp) {
01014             bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
01015             if (bytes_read < 0) {
01016                 PyErr_SetFromErrno(PyExc_IOError);
01017                 return NULL;
01018             }
01019         }
01020         else {
01021             bytes_read = readinst(buf, BUF_SIZE, readmethod);
01022             if (bytes_read < 0) {
01023                 Py_DECREF(readmethod);
01024                 return NULL;
01025             }
01026         }
01027         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
01028         if (PyErr_Occurred()) {
01029             Py_XDECREF(readmethod);
01030             return NULL;
01031         }
01032 
01033         if (!rv || bytes_read == 0)
01034             break;
01035     }
01036     Py_XDECREF(readmethod);
01037     return get_parse_result(self, rv);
01038 }
01039 
01040 PyDoc_STRVAR(xmlparse_SetBase__doc__,
01041 "SetBase(base_url)\n\
01042 Set the base URL for the parser.");
01043 
01044 static PyObject *
01045 xmlparse_SetBase(xmlparseobject *self, PyObject *args)
01046 {
01047     char *base;
01048 
01049     if (!PyArg_ParseTuple(args, "s:SetBase", &base))
01050         return NULL;
01051     if (!XML_SetBase(self->itself, base)) {
01052         return PyErr_NoMemory();
01053     }
01054     Py_INCREF(Py_None);
01055     return Py_None;
01056 }
01057 
01058 PyDoc_STRVAR(xmlparse_GetBase__doc__,
01059 "GetBase() -> url\n\
01060 Return base URL string for the parser.");
01061 
01062 static PyObject *
01063 xmlparse_GetBase(xmlparseobject *self, PyObject *args)
01064 {
01065     if (!PyArg_ParseTuple(args, ":GetBase"))
01066         return NULL;
01067 
01068     return Py_BuildValue("z", XML_GetBase(self->itself));
01069 }
01070 
01071 PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
01072 "GetInputContext() -> string\n\
01073 Return the untranslated text of the input that caused the current event.\n\
01074 If the event was generated by a large amount of text (such as a start tag\n\
01075 for an element with many attributes), not all of the text may be available.");
01076 
01077 static PyObject *
01078 xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
01079 {
01080     PyObject *result = NULL;
01081 
01082     if (PyArg_ParseTuple(args, ":GetInputContext")) {
01083         if (self->in_callback) {
01084             int offset, size;
01085             const char *buffer
01086                 = XML_GetInputContext(self->itself, &offset, &size);
01087 
01088             if (buffer != NULL)
01089                 result = PyString_FromStringAndSize(buffer + offset, size);
01090             else {
01091                 result = Py_None;
01092                 Py_INCREF(result);
01093             }
01094         }
01095         else {
01096             result = Py_None;
01097             Py_INCREF(result);
01098         }
01099     }
01100     return result;
01101 }
01102 
01103 PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
01104 "ExternalEntityParserCreate(context[, encoding])\n\
01105 Create a parser for parsing an external entity based on the\n\
01106 information passed to the ExternalEntityRefHandler.");
01107 
01108 static PyObject *
01109 xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
01110 {
01111     char *context;
01112     char *encoding = NULL;
01113     xmlparseobject *new_parser;
01114     int i;
01115 
01116     if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
01117                           &context, &encoding)) {
01118         return NULL;
01119     }
01120 
01121 #ifndef Py_TPFLAGS_HAVE_GC
01122     /* Python versions 2.0 and 2.1 */
01123     new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
01124 #else
01125     /* Python versions 2.2 and later */
01126     new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
01127 #endif
01128 
01129     if (new_parser == NULL)
01130         return NULL;
01131     new_parser->buffer_size = self->buffer_size;
01132     new_parser->buffer_used = 0;
01133     if (self->buffer != NULL) {
01134         new_parser->buffer = malloc(new_parser->buffer_size);
01135         if (new_parser->buffer == NULL) {
01136 #ifndef Py_TPFLAGS_HAVE_GC
01137             /* Code for versions 2.0 and 2.1 */
01138             PyObject_Del(new_parser);
01139 #else
01140             /* Code for versions 2.2 and later. */
01141             PyObject_GC_Del(new_parser);
01142 #endif
01143             return PyErr_NoMemory();
01144         }
01145     }
01146     else
01147         new_parser->buffer = NULL;
01148     new_parser->returns_unicode = self->returns_unicode;
01149     new_parser->ordered_attributes = self->ordered_attributes;
01150     new_parser->specified_attributes = self->specified_attributes;
01151     new_parser->in_callback = 0;
01152     new_parser->ns_prefixes = self->ns_prefixes;
01153     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
01154                                                         encoding);
01155     new_parser->handlers = 0;
01156     new_parser->intern = self->intern;
01157     Py_XINCREF(new_parser->intern);
01158 #ifdef Py_TPFLAGS_HAVE_GC
01159     PyObject_GC_Track(new_parser);
01160 #else
01161     PyObject_GC_Init(new_parser);
01162 #endif
01163 
01164     if (!new_parser->itself) {
01165         Py_DECREF(new_parser);
01166         return PyErr_NoMemory();
01167     }
01168 
01169     XML_SetUserData(new_parser->itself, (void *)new_parser);
01170 
01171     /* allocate and clear handlers first */
01172     for (i = 0; handler_info[i].name != NULL; i++)
01173         /* do nothing */;
01174 
01175     new_parser->handlers = malloc(sizeof(PyObject *) * i);
01176     if (!new_parser->handlers) {
01177         Py_DECREF(new_parser);
01178         return PyErr_NoMemory();
01179     }
01180     clear_handlers(new_parser, 1);
01181 
01182     /* then copy handlers from self */
01183     for (i = 0; handler_info[i].name != NULL; i++) {
01184         PyObject *handler = self->handlers[i];
01185         if (handler != NULL) {
01186             Py_INCREF(handler);
01187             new_parser->handlers[i] = handler;
01188             handler_info[i].setter(new_parser->itself,
01189                                    handler_info[i].handler);
01190         }
01191     }
01192     return (PyObject *)new_parser;
01193 }
01194 
01195 PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
01196 "SetParamEntityParsing(flag) -> success\n\
01197 Controls parsing of parameter entities (including the external DTD\n\
01198 subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
01199 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
01200 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
01201 was successful.");
01202 
01203 static PyObject*
01204 xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
01205 {
01206     int flag;
01207     if (!PyArg_ParseTuple(args, "i", &flag))
01208         return NULL;
01209     flag = XML_SetParamEntityParsing(p->itself, flag);
01210     return PyInt_FromLong(flag);
01211 }
01212 
01213 
01214 #if XML_COMBINED_VERSION >= 19505
01215 PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
01216 "UseForeignDTD([flag])\n\
01217 Allows the application to provide an artificial external subset if one is\n\
01218 not specified as part of the document instance.  This readily allows the\n\
01219 use of a 'default' document type controlled by the application, while still\n\
01220 getting the advantage of providing document type information to the parser.\n\
01221 'flag' defaults to True if not provided.");
01222 
01223 static PyObject *
01224 xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
01225 {
01226     PyObject *flagobj = NULL;
01227     XML_Bool flag = XML_TRUE;
01228     enum XML_Error rc;
01229     if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
01230         return NULL;
01231     if (flagobj != NULL)
01232         flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
01233     rc = XML_UseForeignDTD(self->itself, flag);
01234     if (rc != XML_ERROR_NONE) {
01235         return set_error(self, rc);
01236     }
01237     Py_INCREF(Py_None);
01238     return Py_None;
01239 }
01240 #endif
01241 
01242 static struct PyMethodDef xmlparse_methods[] = {
01243     {"Parse",     (PyCFunction)xmlparse_Parse,
01244                   METH_VARARGS, xmlparse_Parse__doc__},
01245     {"ParseFile", (PyCFunction)xmlparse_ParseFile,
01246                   METH_VARARGS, xmlparse_ParseFile__doc__},
01247     {"SetBase",   (PyCFunction)xmlparse_SetBase,
01248                   METH_VARARGS, xmlparse_SetBase__doc__},
01249     {"GetBase",   (PyCFunction)xmlparse_GetBase,
01250                   METH_VARARGS, xmlparse_GetBase__doc__},
01251     {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
01252                   METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
01253     {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
01254                   METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
01255     {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
01256                   METH_VARARGS, xmlparse_GetInputContext__doc__},
01257 #if XML_COMBINED_VERSION >= 19505
01258     {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
01259                   METH_VARARGS, xmlparse_UseForeignDTD__doc__},
01260 #endif
01261     {NULL,        NULL}         /* sentinel */
01262 };
01263 
01264 /* ---------- */
01265 
01266 
01267 #ifdef Py_USING_UNICODE
01268 
01269 /* pyexpat international encoding support.
01270    Make it as simple as possible.
01271 */
01272 
01273 static char template_buffer[257];
01274 PyObject *template_string = NULL;
01275 
01276 static void
01277 init_template_buffer(void)
01278 {
01279     int i;
01280     for (i = 0; i < 256; i++) {
01281         template_buffer[i] = i;
01282     }
01283     template_buffer[256] = 0;
01284 }
01285 
01286 static int
01287 PyUnknownEncodingHandler(void *encodingHandlerData,
01288                          const XML_Char *name,
01289                          XML_Encoding *info)
01290 {
01291     PyUnicodeObject *_u_string = NULL;
01292     int result = 0;
01293     int i;
01294 
01295     /* Yes, supports only 8bit encodings */
01296     _u_string = (PyUnicodeObject *)
01297         PyUnicode_Decode(template_buffer, 256, name, "replace");
01298 
01299     if (_u_string == NULL)
01300         return result;
01301 
01302     for (i = 0; i < 256; i++) {
01303         /* Stupid to access directly, but fast */
01304         Py_UNICODE c = _u_string->str[i];
01305         if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
01306             info->map[i] = -1;
01307         else
01308             info->map[i] = c;
01309     }
01310     info->data = NULL;
01311     info->convert = NULL;
01312     info->release = NULL;
01313     result = 1;
01314     Py_DECREF(_u_string);
01315     return result;
01316 }
01317 
01318 #endif
01319 
01320 static PyObject *
01321 newxmlparseobject(char *