#include "Python.h"#include <ctype.h>Go to the source code of this file.
Classes | |
| struct | FastSGMLParserObject |
| struct | ElementObject |
| struct | TreeBuilderObject |
Defines | |
| #define | CHAR_T char |
| #define | ISALNUM isalnum |
| #define | ISSPACE isspace |
| #define | TOLOWER tolower |
| #define | ALLOC(size, comment) |
| #define | RELEASE(size, comment) |
| #define | MAYBE 1 |
| #define | SURE 2 |
| #define | callHandleData(self, data, len) callWithString((self), (self)->handle_data, (data), (len)) |
| #define | callHandleCData(self, data, len) callWithString((self), (self)->handle_cdata, (data), (len)) |
| #define | callHandleComment(self, data, len) callWithString((self), (self)->handle_comment, (data), (len)) |
| #define | callHandleEntityRef(self, data, len) callWithString((self), (self)->handle_entityref, (data), (len)) |
| #define | callHandleCharRef(self, data, len) callWithString((self), (self)->handle_charref, (data), (len)) |
| #define | callHandleSpecial(self, data, len) callWithString((self), (self)->handle_special, (data), (len)) |
| #define | callHandleProc(self, data1, len1, data2, len2) callWith2Strings((self), (self)->handle_proc, (data1), (len1), (data2), (len2)) |
| #define | callFinishStartTag(self, data, len, obj) callWithStringAndObj((self), (self)->finish_starttag, (data), (len), (obj)) |
| #define | callFinishEndTag(self, data, len) callWithString((self), (self)->finish_endtag, (data), (len)) |
| #define | GETCB(member, name) |
| #define | TAG 0x100 |
| #define | TAG_START 0x101 |
| #define | TAG_END 0x102 |
| #define | TAG_EMPTY 0x103 |
| #define | DIRECTIVE 0x104 |
| #define | DOCTYPE 0x105 |
| #define | PI 0x106 |
| #define | DTD_START 0x107 |
| #define | DTD_END 0x108 |
| #define | DTD_ENTITY 0x109 |
| #define | CDATA 0x200 |
| #define | ENTITYREF 0x400 |
| #define | CHARREF 0x401 |
| #define | COMMENT 0x800 |
Functions | |
| static int | fastfeed (FastSGMLParserObject *self) |
| static PyObject * | attrparse (FastSGMLParserObject *self, const CHAR_T *p, int len) |
| static int | fetchEncoding (FastSGMLParserObject *self, const CHAR_T *data, int len) |
| static PyObject * | stringFromData (FastSGMLParserObject *self, const CHAR_T *data, int len) |
| static int | callWithString (FastSGMLParserObject *self, PyObject *callback, const CHAR_T *data, int len) |
| static int | callWith2Strings (FastSGMLParserObject *self, PyObject *callback, const CHAR_T *data1, int len1, const CHAR_T *data2, int len2) |
| static int | callWithStringAndObj (FastSGMLParserObject *self, PyObject *callback, const CHAR_T *data, int len, PyObject *obj) |
| static PyObject * | _sgmlop_new (int xml, int unicode) |
| static PyObject * | _sgmlop_sgmlparser (PyObject *self, PyObject *args) |
| static PyObject * | _sgmlop_xmlparser (PyObject *self, PyObject *args) |
| static PyObject * | _sgmlop_xmlunicodeparser (PyObject *self, PyObject *args) |
| static void | _sgmlop_dealloc (FastSGMLParserObject *self) |
| static PyObject * | _sgmlop_register (FastSGMLParserObject *self, PyObject *args) |
| static PyObject * | feed (FastSGMLParserObject *self, char *string, int stringlen, int last) |
| static PyObject * | _sgmlop_feed (FastSGMLParserObject *self, PyObject *args) |
| static PyObject * | _sgmlop_close (FastSGMLParserObject *self, PyObject *args) |
| static PyObject * | _sgmlop_parse (FastSGMLParserObject *self, PyObject *args) |
| static PyObject * | _sgmlop_getattr (FastSGMLParserObject *self, char *name) |
| static PyObject * | element_new (PyObject *_self, PyObject *args) |
| static void | element_dealloc (ElementObject *self) |
| static PyObject * | element_append (ElementObject *self, PyObject *args) |
| static PyObject * | element_destroy (ElementObject *self, PyObject *args) |
| static PyObject * | element_get (ElementObject *self, PyObject *args) |
| static PyObject * | element_getitem (ElementObject *self, int index) |
| static int | element_length (ElementObject *self) |
| static PyObject * | element_repr (ElementObject *self) |
| static PyObject * | element_getattr (ElementObject *self, char *name) |
| static int | element_setattr (ElementObject *self, const char *name, PyObject *value) |
| static PyObject * | treebuilder_new (PyObject *_self, PyObject *args) |
| static void | treebuilder_dealloc (TreeBuilderObject *self) |
| static PyObject * | treebuilder_start (TreeBuilderObject *self, PyObject *args) |
| static PyObject * | treebuilder_end (TreeBuilderObject *self, PyObject *args) |
| static PyObject * | treebuilder_data (TreeBuilderObject *self, PyObject *args) |
| static PyObject * | treebuilder_getattr (ElementObject *self, char *name) |
| initsgmlop (void) | |
Variables | |
| staticforward PyTypeObject | FastSGMLParser_Type |
| static PyMethodDef | _sgmlop_methods [] |
| staticforward PyTypeObject | Element_Type |
| static PyMethodDef | element_methods [] |
| static PySequenceMethods | element_as_sequence |
| staticforward PyTypeObject | TreeBuilder_Type |
| static PyMethodDef | treebuilder_methods [] |
| static PyMethodDef | _functions [] |
| static char * | defaultEncoding = "utf-8" |
|
|
Definition at line 91 of file sgmlop.c. Referenced by element_new(). |
|
|
Definition at line 153 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 152 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 146 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 149 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 147 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 145 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 148 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 151 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 150 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 911 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 78 of file sgmlop.c. Referenced by attrparse(), and fastfeed(). |
|
|
Definition at line 913 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 914 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 905 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 906 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 909 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 910 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 908 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 912 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Value: Py_XDECREF(self->member);\
self->member = PyObject_GetAttrString(item, name);
Definition at line 238 of file sgmlop.c. Referenced by _sgmlop_register(). |
|
|
Definition at line 79 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 80 of file sgmlop.c. Referenced by attrparse(), and fastfeed(). |
|
|
Definition at line 99 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 907 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 92 of file sgmlop.c. Referenced by element_dealloc(). |
|
|
Definition at line 100 of file sgmlop.c. Referenced by fastfeed(). |
|
|
|
|
|
Definition at line 904 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 903 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 902 of file sgmlop.c. Referenced by fastfeed(). |
|
|
Definition at line 81 of file sgmlop.c. Referenced by fastfeed(). |
|
||||||||||||
|
Definition at line 349 of file sgmlop.c. References feed(). 00350 { 00351 /* flush parser buffers */ 00352 00353 if (!PyArg_NoArgs(args)) 00354 return NULL; 00355 00356 return feed(self, "", 0, 1); 00357 }
|
|
|
Definition at line 220 of file sgmlop.c. References FastSGMLParserObject::buffer, FastSGMLParserObject::encoding, FastSGMLParserObject::finish_endtag, FastSGMLParserObject::finish_starttag, FastSGMLParserObject::handle_cdata, FastSGMLParserObject::handle_charref, FastSGMLParserObject::handle_comment, FastSGMLParserObject::handle_data, FastSGMLParserObject::handle_entityref, FastSGMLParserObject::handle_proc, and FastSGMLParserObject::handle_special. 00221 { 00222 if (self->buffer) 00223 free(self->buffer); 00224 if (self->encoding) 00225 free(self->encoding); 00226 Py_XDECREF(self->finish_starttag); 00227 Py_XDECREF(self->finish_endtag); 00228 Py_XDECREF(self->handle_proc); 00229 Py_XDECREF(self->handle_special); 00230 Py_XDECREF(self->handle_charref); 00231 Py_XDECREF(self->handle_entityref); 00232 Py_XDECREF(self->handle_data); 00233 Py_XDECREF(self->handle_cdata); 00234 Py_XDECREF(self->handle_comment); 00235 PyMem_DEL(self); 00236 }
|
|
||||||||||||
|
Definition at line 336 of file sgmlop.c. References feed(). 00337 { 00338 /* feed a chunk of data to the parser */ 00339 00340 char* string; 00341 int stringlen; 00342 if (!PyArg_ParseTuple(args, "t#", &string, &stringlen)) 00343 return NULL; 00344 00345 return feed(self, string, stringlen, 0); 00346 }
|
|
||||||||||||
|
Definition at line 388 of file sgmlop.c. References _sgmlop_methods. 00389 { 00390 return Py_FindMethod(_sgmlop_methods, (PyObject*) self, name); 00391 }
|
|
||||||||||||
|
Definition at line 159 of file sgmlop.c. References FastSGMLParser_Type. Referenced by _sgmlop_sgmlparser(), _sgmlop_xmlparser(), and _sgmlop_xmlunicodeparser(). 00160 { 00161 FastSGMLParserObject* self; 00162 00163 self = PyObject_NEW(FastSGMLParserObject, &FastSGMLParser_Type); 00164 if (self == NULL) 00165 return NULL; 00166 00167 self->xml = xml; 00168 self->unicode = unicode; 00169 self->encoding = NULL; 00170 00171 self->feed = 0; 00172 self->shorttag = 0; 00173 self->doctype = 0; 00174 00175 self->buffer = NULL; 00176 self->bufferlen = 0; 00177 self->buffertotal = 0; 00178 00179 self->finish_starttag = NULL; 00180 self->finish_endtag = NULL; 00181 self->handle_proc = NULL; 00182 self->handle_special = NULL; 00183 self->handle_charref = NULL; 00184 self->handle_entityref = NULL; 00185 self->handle_data = NULL; 00186 self->handle_cdata = NULL; 00187 self->handle_comment = NULL; 00188 00189 return (PyObject*) self; 00190 }
|
|
||||||||||||
|
Definition at line 360 of file sgmlop.c. References feed(). 00361 { 00362 /* feed a single chunk of data to the parser */ 00363 00364 char* string; 00365 int stringlen; 00366 if (!PyArg_ParseTuple(args, "t#", &string, &stringlen)) 00367 return NULL; 00368 00369 return feed(self, string, stringlen, 1); 00370 }
|
|
||||||||||||
|
Definition at line 243 of file sgmlop.c. References GETCB. 00244 { 00245 /* register a callback object */ 00246 PyObject* item; 00247 if (!PyArg_ParseTuple(args, "O", &item)) 00248 return NULL; 00249 00250 GETCB(finish_starttag, "finish_starttag"); 00251 GETCB(finish_endtag, "finish_endtag"); 00252 GETCB(handle_proc, "handle_proc"); 00253 GETCB(handle_special, "handle_special"); 00254 GETCB(handle_charref, "handle_charref"); 00255 GETCB(handle_entityref, "handle_entityref"); 00256 GETCB(handle_data, "handle_data"); 00257 GETCB(handle_cdata, "handle_cdata"); 00258 GETCB(handle_comment, "handle_comment"); 00259 00260 PyErr_Clear(); 00261 00262 Py_INCREF(Py_None); 00263 return Py_None; 00264 }
|
|
||||||||||||
|
Definition at line 193 of file sgmlop.c. References _sgmlop_new(). 00194 { 00195 if (!PyArg_NoArgs(args)) 00196 return NULL; 00197 00198 return _sgmlop_new(0, 0); 00199 }
|
|
||||||||||||
|
Definition at line 202 of file sgmlop.c. References _sgmlop_new(). 00203 { 00204 if (!PyArg_NoArgs(args)) 00205 return NULL; 00206 00207 return _sgmlop_new(1, 0); 00208 }
|
|
||||||||||||
|
Definition at line 211 of file sgmlop.c. References _sgmlop_new(). 00212 { 00213 if (!PyArg_NoArgs(args)) 00214 return NULL; 00215 00216 return _sgmlop_new(1, 1); 00217 }
|
|
||||||||||||||||
|
Definition at line 1304 of file sgmlop.c. References CHAR_T, ISSPACE, stringFromData(), and FastSGMLParserObject::xml. Referenced by fastfeed(). 01305 { 01306 PyObject* attrs; 01307 PyObject* key = NULL; 01308 PyObject* value = NULL; 01309 const CHAR_T* end = p + len; 01310 const CHAR_T* q; 01311 01312 if (self->xml) 01313 attrs = PyDict_New(); 01314 else 01315 attrs = PyList_New(0); 01316 01317 while (p < end) { 01318 01319 /* skip leading space */ 01320 while (p < end && ISSPACE(*p)) 01321 p++; 01322 if (p >= end) 01323 break; 01324 01325 /* get attribute name (key) */ 01326 q = p; 01327 while (p < end && *p != '=' && !ISSPACE(*p)) 01328 p++; 01329 01330 key = stringFromData(self, q, p-q); 01331 if (key == NULL) 01332 goto err; 01333 01334 if (self->xml) 01335 value = Py_None; 01336 else 01337 value = key; /* in SGML mode, default is same as key */ 01338 01339 Py_INCREF(value); 01340 01341 while (p < end && ISSPACE(*p)) 01342 p++; 01343 01344 if (p < end && *p == '=') { 01345 01346 /* attribute value found */ 01347 Py_DECREF(value); 01348 01349 if (p < end) 01350 p++; 01351 while (p < end && ISSPACE(*p)) 01352 p++; 01353 01354 q = p; 01355 if (p < end && (*p == '"' || *p == '\'')) { 01356 p++; 01357 while (p < end && *p != *q) 01358 p++; 01359 value = stringFromData(self, q+1, p-q-1); 01360 if (p < end && *p == *q) 01361 p++; 01362 } else { 01363 while (p < end && !ISSPACE(*p)) 01364 p++; 01365 value = stringFromData(self, q, p-q); 01366 } 01367 01368 if (value == NULL) 01369 goto err; 01370 01371 } 01372 01373 if (self->xml) { 01374 01375 /* add to dictionary */ 01376 01377 /* PyString_InternInPlace(&key); */ 01378 if (PyDict_SetItem(attrs, key, value) < 0) 01379 goto err; 01380 Py_DECREF(key); 01381 Py_DECREF(value); 01382 01383 } else { 01384 01385 /* add to list */ 01386 01387 PyObject* res; 01388 res = PyTuple_New(2); 01389 if (!res) 01390 goto err; 01391 PyTuple_SET_ITEM(res, 0, key); 01392 PyTuple_SET_ITEM(res, 1, value); 01393 if (PyList_Append(attrs, res) < 0) { 01394 Py_DECREF(res); 01395 goto err; 01396 } 01397 Py_DECREF(res); 01398 01399 } 01400 01401 key = NULL; 01402 value = NULL; 01403 01404 } 01405 01406 return attrs; 01407 01408 err: 01409 Py_XDECREF(key); 01410 Py_XDECREF(value); 01411 Py_DECREF(attrs); 01412 return NULL; 01413 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 1532 of file sgmlop.c. References stringFromData(). 01533 { 01534 PyObject* res; 01535 PyObject* str1; 01536 PyObject* str2; 01537 01538 str1 = stringFromData(self, data1, len1); 01539 01540 if (!str1) 01541 return -1; 01542 01543 str2 = stringFromData(self, data2, len2); 01544 01545 if (!str2) { 01546 Py_DECREF(str1); 01547 return -1; 01548 } 01549 01550 res = PyObject_CallFunction(callback, "OO", str1, str2); 01551 Py_DECREF(str1); 01552 Py_DECREF(str2); 01553 if (res) 01554 { 01555 Py_DECREF(res); 01556 return 0; 01557 } 01558 else 01559 return -1; 01560 }
|
|
||||||||||||||||||||
|
Definition at line 1504 of file sgmlop.c. References stringFromData(). 01505 { 01506 PyObject* str = stringFromData(self, data, len); 01507 PyObject* res; 01508 01509 if (!str) 01510 return -1; 01511 01512 res = PyObject_CallFunction(callback, "O", str); 01513 Py_DECREF(str); 01514 01515 if (res) 01516 { 01517 Py_DECREF(res); 01518 return 0; 01519 } 01520 else 01521 return -1; 01522 }
|
|
||||||||||||||||||||||||
|
Definition at line 1571 of file sgmlop.c. References stringFromData(). 01572 { 01573 PyObject* res; 01574 PyObject* str = stringFromData(self, data, len); 01575 01576 if (!str) 01577 return -1; 01578 01579 res = PyObject_CallFunction(callback, "OO", str, obj); 01580 Py_DECREF(str); 01581 if (res) 01582 { 01583 Py_XDECREF(res); 01584 return 0; 01585 } 01586 else 01587 return -1; 01588 }
|
|
||||||||||||
|
Definition at line 519 of file sgmlop.c. References ElementObject::child_count, ElementObject::child_total, ElementObject::children, and Element_Type. 00520 { 00521 int total; 00522 00523 PyObject* element; 00524 if (!PyArg_ParseTuple(args, "O!", &Element_Type, &element)) 00525 return NULL; 00526 00527 if (!self->children) { 00528 total = 10; 00529 self->children = malloc(total * sizeof(PyObject*)); 00530 self->child_total = total; 00531 } else if (self->child_count >= self->child_total) { 00532 total = self->child_total + 10; 00533 self->children = realloc(self->children, total * sizeof(PyObject*)); 00534 self->child_total = total; 00535 } 00536 if (!self->children) { 00537 PyErr_NoMemory(); 00538 return NULL; 00539 } 00540 00541 Py_INCREF(element); 00542 self->children[self->child_count++] = element; 00543 00544 Py_INCREF(Py_None); 00545 return Py_None; 00546 }
|
|
|
Definition at line 487 of file sgmlop.c. References ElementObject::attrib, ElementObject::children, ElementObject::parent, RELEASE, ElementObject::suffix, ElementObject::tag, and ElementObject::text. 00488 { 00489 int i; 00490 00491 /* FIXME: the parent attribute means that a tree will contain 00492 circular references. this will be fixed ("how?" is the big 00493 question...) */ 00494 00495 if (self->children) { 00496 for (i = 0; i < self->child_count; i++) 00497 Py_DECREF(self->children[i]); 00498 free(self->children); 00499 } 00500 00501 /* break the backlink */ 00502 Py_DECREF(self->parent); 00503 00504 /* discard attributes */ 00505 Py_DECREF(self->tag); 00506 Py_XDECREF(self->attrib); 00507 Py_XDECREF(self->text); 00508 Py_XDECREF(self->suffix); 00509 00510 RELEASE(sizeof(ElementObject), "destroy element"); 00511 00512 PyMem_DEL(self); 00513 }
|
|
||||||||||||
|
Definition at line 549 of file sgmlop.c. References ElementObject::children, and ElementObject::parent. 00550 { 00551 int i; 00552 PyObject* res; 00553 00554 if (!PyArg_NoArgs(args)) 00555 return NULL; 00556 00557 /* break the backlink */ 00558 if (self->parent != Py_None) { 00559 Py_DECREF(self->parent); 00560 self->parent = Py_None; 00561 Py_INCREF(self->parent); 00562 } 00563 00564 /* destroy element children */ 00565 if (self->children) { 00566 for (i = 0; i < self->child_count; i++) { 00567 res = element_destroy((ElementObject*) self->children[i], args); 00568 Py_DECREF(res); 00569 Py_DECREF(self->children[i]); 00570 } 00571 self->child_count = 0; 00572 } 00573 00574 /* leave the rest to the garbage collector... */ 00575 00576 Py_INCREF(Py_None); 00577 return Py_None; 00578 }
|
|
||||||||||||
|
Definition at line 581 of file sgmlop.c. References ElementObject::attrib. 00582 { 00583 PyObject* value; 00584 00585 PyObject* key; 00586 PyObject* default_value = Py_None; 00587 if (!PyArg_ParseTuple(args, "O|O", &key, &default_value)) 00588 return NULL; 00589 00590 value = PyDict_GetItem(self->attrib, key); 00591 if (!value) { 00592 value = default_value; 00593 PyErr_Clear(); 00594 } 00595 00596 Py_INCREF(value); 00597 return value; 00598 }
|
|
||||||||||||
|
Definition at line 648 of file sgmlop.c. References element_methods. 00649 { 00650 PyObject* res; 00651 00652 res = Py_FindMethod(element_methods, (PyObject*) self, name); 00653 if (res) 00654 return res; 00655 00656 PyErr_Clear(); 00657 00658 if (strcmp(name, "tag") == 0) 00659 res = self->tag; 00660 else if (strcmp(name, "text") == 0) 00661 res = self->text; 00662 else if (strcmp(name, "suffix") == 0) 00663 res = self->suffix; 00664 else if (strcmp(name, "attrib") == 0) 00665 res = self->attrib; 00666 else if (strcmp(name, "parent") == 0) 00667 res = self->parent; 00668 else { 00669 PyErr_SetString(PyExc_AttributeError, name); 00670 return NULL; 00671 } 00672 00673 Py_INCREF(res); 00674 return res; 00675 }
|
|
||||||||||||
|
Definition at line 601 of file sgmlop.c. References ElementObject::child_count, and ElementObject::children. 00602 { 00603 if (index < 0 || index >= self->child_count) { 00604 PyErr_SetString(PyExc_IndexError, "child index out of range"); 00605 return NULL; 00606 } 00607 00608 Py_INCREF(self->children[index]); 00609 return self->children[index]; 00610 }
|
|
|
Definition at line 613 of file sgmlop.c.
|
|
||||||||||||
|
Definition at line 440 of file sgmlop.c. References ALLOC, and Element_Type. 00441 { 00442 ElementObject* self; 00443 00444 PyObject* parent; 00445 PyObject* tag; 00446 PyObject* attrib = Py_None; 00447 PyObject* text = Py_None; 00448 PyObject* suffix = Py_None; 00449 if (!PyArg_ParseTuple(args, "OO|OOO", &parent, &tag, 00450 &attrib, &text, &suffix)) 00451 |