00001 #include "config.h"
00002
00003 #include <stdio.h>
00004 #include <slang.h>
00005 #include <string.h>
00006 #include <pcre.h>
00007
00008 SLANG_MODULE(pcre);
00009
00010 static int PCRE_Type_Id = 0;
00011
00012 typedef struct
00013 {
00014 pcre *p;
00015 pcre_extra *extra;
00016 int *ovector;
00017 unsigned int ovector_len;
00018 unsigned int num_matches;
00019 }
00020 PCRE_Type;
00021
00022 static void free_pcre_type (PCRE_Type *pt)
00023 {
00024 if (pt->ovector != NULL)
00025 SLfree ((char *) pt->ovector);
00026
00027 SLfree ((char *) pt);
00028 }
00029
00030 static SLang_MMT_Type *allocate_pcre_type (pcre *p, pcre_extra *extra)
00031 {
00032 PCRE_Type *pt;
00033 SLang_MMT_Type *mmt;
00034 int ovector_len;
00035
00036 pt = (PCRE_Type *) SLmalloc (sizeof (PCRE_Type));
00037 if (pt == NULL)
00038 return NULL;
00039 memset ((char *) pt, 0, sizeof (PCRE_Type));
00040
00041 pt->p = p;
00042 pt->extra = extra;
00043
00044 if (0 != pcre_fullinfo (p, extra, PCRE_INFO_CAPTURECOUNT, &ovector_len))
00045 {
00046 free_pcre_type (pt);
00047 SLang_verror (SL_INTRINSIC_ERROR, "pcre_fullinfo failed");
00048 return NULL;
00049 }
00050
00051 ovector_len += 1;
00052 ovector_len *= 3;
00053 if (NULL == (pt->ovector = (int *)SLmalloc (ovector_len * sizeof (int))))
00054 {
00055 free_pcre_type (pt);
00056 return NULL;
00057 }
00058 pt->ovector_len = ovector_len;
00059
00060 if (NULL == (mmt = SLang_create_mmt (PCRE_Type_Id, (VOID_STAR) pt)))
00061 {
00062 free_pcre_type (pt);
00063 return NULL;
00064 }
00065 return mmt;
00066 }
00067
00068 static int _pcre_compile_1 (char *pattern, int options)
00069 {
00070 pcre *p;
00071 pcre_extra *extra;
00072 SLCONST char *err;
00073 int erroffset;
00074 unsigned char *table;
00075 SLang_MMT_Type *mmt;
00076
00077 table = NULL;
00078 p = pcre_compile (pattern, options, &err, &erroffset, table);
00079 if (NULL == p)
00080 {
00081 SLang_verror (SL_Parse_Error, "Error compiling pattern '%s' at offset %d: %s",
00082 pattern, erroffset, err);
00083 return -1;
00084 }
00085
00086 extra = pcre_study (p, 0, &err);
00087
00088 if (err != NULL)
00089 {
00090 SLang_verror (SL_INTRINSIC_ERROR, "pcre_study failed: %s", err);
00091 pcre_free (p);
00092 return -1;
00093 }
00094
00095 if (NULL == (mmt = allocate_pcre_type (p, extra)))
00096 {
00097 pcre_free ((char *) p);
00098 pcre_free ((char *) extra);
00099 return -1;
00100 }
00101
00102 if (-1 == SLang_push_mmt (mmt))
00103 {
00104 SLang_free_mmt (mmt);
00105 return -1;
00106 }
00107 return 0;
00108 }
00109
00110 static void _pcre_compile (void)
00111 {
00112 char *pattern;
00113 int options = 0;
00114
00115 switch (SLang_Num_Function_Args)
00116 {
00117 case 2:
00118 if (-1 == SLang_pop_integer (&options))
00119 return;
00120
00121 case 1:
00122 default:
00123 if (-1 == SLang_pop_slstring (&pattern))
00124 return;
00125 }
00126 (void) _pcre_compile_1 (pattern, options);
00127 SLang_free_slstring (pattern);
00128 }
00129
00130
00131
00132
00133 static int _pcre_exec_1 (PCRE_Type *pt, char *str, int pos, int options)
00134 {
00135 int rc;
00136 unsigned int len;
00137
00138 pt->num_matches = 0;
00139 len = strlen (str);
00140 if ((unsigned int) pos > len)
00141 return 0;
00142
00143 rc = pcre_exec (pt->p, pt->extra, str, len, pos,
00144 options, pt->ovector, pt->ovector_len);
00145
00146 if (rc == PCRE_ERROR_NOMATCH)
00147 return 0;
00148
00149 if (rc <= 0)
00150 {
00151 SLang_verror (SL_INTRINSIC_ERROR, "pcre_exec returned %d", rc);
00152 return -1;
00153 }
00154 pt->num_matches = (unsigned int) rc;
00155 return rc;
00156 }
00157
00158 static int _pcre_exec (void)
00159 {
00160 PCRE_Type *p;
00161 SLang_MMT_Type *mmt;
00162 char *str;
00163 int pos = 0;
00164 int options = 0;
00165 int ret = -1;
00166
00167 switch (SLang_Num_Function_Args)
00168 {
00169 case 4:
00170 if (-1 == SLang_pop_integer (&options))
00171 return -1;
00172 case 3:
00173 if (-1 == SLang_pop_integer (&pos))
00174 return -1;
00175 default:
00176 if (-1 == SLang_pop_slstring (&str))
00177 return -1;
00178
00179 if (NULL == (mmt = SLang_pop_mmt (PCRE_Type_Id)))
00180 goto free_and_return;
00181 p = (PCRE_Type *)SLang_object_from_mmt (mmt);
00182 }
00183 ret = _pcre_exec_1 (p, str, pos, options);
00184
00185 free_and_return:
00186 SLang_free_slstring (str);
00187 SLang_free_mmt (mmt);
00188 return ret;
00189 }
00190
00191
00192 static int get_nth_start_stop (PCRE_Type *pt, unsigned int n,
00193 unsigned int *a, unsigned int *b)
00194 {
00195 int start, stop;
00196
00197 if (n >= pt->num_matches)
00198 return -1;
00199
00200 start = pt->ovector[2*n];
00201 stop = pt->ovector[2*n+1];
00202 if ((start < 0) || (stop < start))
00203 return -1;
00204
00205 *a = (unsigned int) start;
00206 *b = (unsigned int) stop;
00207 return 0;
00208 }
00209
00210 static void _pcre_nth_match (PCRE_Type *pt, int *np)
00211 {
00212 unsigned int start, stop;
00213 SLang_Array_Type *at;
00214 SLindex_Type two = 2;
00215 int *data;
00216
00217 if (-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
00218 {
00219 SLang_push_null ();
00220 return;
00221 }
00222
00223 if (NULL == (at = SLang_create_array (SLANG_INT_TYPE, 0, NULL, &two, 1)))
00224 return;
00225
00226 data = (int *)at->data;
00227 data[0] = (int)start;
00228 data[1] = (int)stop;
00229 (void) SLang_push_array (at, 1);
00230 }
00231
00232 static void _pcre_nth_substr (PCRE_Type *pt, char *str, int *np)
00233 {
00234 unsigned int start, stop;
00235 unsigned int len;
00236
00237 len = strlen (str);
00238
00239 if ((-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
00240 || (start > len) || (stop > len))
00241 {
00242 SLang_push_null ();
00243 return;
00244 }
00245
00246 str = SLang_create_nslstring (str + start, stop - start);
00247 (void) SLang_push_string (str);
00248 SLang_free_slstring (str);
00249 }
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 static char *_slang_to_pcre (char *slpattern)
00270 {
00271 char *pattern, *p, *s;
00272 unsigned int len;
00273 int in_bracket;
00274 char ch;
00275
00276 len = strlen (slpattern);
00277 pattern = SLmalloc (3*len + 1);
00278 if (pattern == NULL)
00279 return NULL;
00280
00281 p = pattern;
00282 s = slpattern;
00283 in_bracket = 0;
00284 while ((ch = *s++) != 0)
00285 {
00286 switch (ch)
00287 {
00288 case '{':
00289 case '}':
00290 case '(':
00291 case ')':
00292 case '#':
00293 case '|':
00294 if (0 == in_bracket) *p++ = '\\';
00295 *p++ = ch;
00296 break;
00297
00298 case '[':
00299 in_bracket = 1;
00300 *p++ = ch;
00301 break;
00302
00303 case ']':
00304 in_bracket = 0;
00305 *p++ = ch;
00306 break;
00307
00308 case '\\':
00309 ch = *s++;
00310 switch (ch)
00311 {
00312 case 0:
00313 s--;
00314 break;
00315
00316 case '<':
00317 case '>':
00318 *p++ = '\\'; *p++ = 'b';
00319 break;
00320
00321 case '(':
00322 case ')':
00323 case '{':
00324 case '}':
00325 *p++ = ch;
00326 break;
00327
00328 case 'C':
00329 *p++ = '('; *p++ = '?'; *p++ = 'i'; *p++ = ')';
00330 break;
00331 case 'c':
00332 *p++ = '('; *p++ = '?'; *p++ = '-'; *p++ = 'i'; *p++ = ')';
00333 break;
00334
00335 default:
00336 *p++ = '\\';
00337 *p++ = ch;
00338 }
00339 break;
00340
00341 default:
00342 *p++ = ch;
00343 break;
00344 }
00345 }
00346 *p = 0;
00347
00348 s = SLang_create_slstring (pattern);
00349 SLfree (pattern);
00350 return s;
00351 }
00352
00353 static void slang_to_pcre (char *pattern)
00354 {
00355
00356 pattern = _slang_to_pcre (pattern);
00357 (void) SLang_push_string (pattern);
00358 SLang_free_slstring (pattern);
00359 }
00360
00361 static void destroy_pcre (SLtype type, VOID_STAR f)
00362 {
00363 PCRE_Type *pt;
00364 (void) type;
00365
00366 pt = (PCRE_Type *) f;
00367 if (pt->extra != NULL)
00368 pcre_free ((char *) pt->extra);
00369 if (pt->p != NULL)
00370 pcre_free ((char *) pt->p);
00371 free_pcre_type (pt);
00372 }
00373
00374 #define DUMMY_PCRE_TYPE ((unsigned int)-1)
00375 #define P DUMMY_PCRE_TYPE
00376 #define I SLANG_INT_TYPE
00377 #define V SLANG_VOID_TYPE
00378 #define S SLANG_STRING_TYPE
00379 static SLang_Intrin_Fun_Type PCRE_Intrinsics [] =
00380 {
00381 MAKE_INTRINSIC_0("pcre_exec", _pcre_exec, I),
00382 MAKE_INTRINSIC_0("pcre_compile", _pcre_compile, V),
00383 MAKE_INTRINSIC_2("pcre_nth_match", _pcre_nth_match, V, P, I),
00384 MAKE_INTRINSIC_3("pcre_nth_substr", _pcre_nth_substr, V, P, S, I),
00385 MAKE_INTRINSIC_1("slang_to_pcre", slang_to_pcre, V, S),
00386 SLANG_END_INTRIN_FUN_TABLE
00387 };
00388
00389 static SLang_IConstant_Type PCRE_Consts [] =
00390 {
00391
00392 MAKE_ICONSTANT("PCRE_ANCHORED", PCRE_ANCHORED),
00393 MAKE_ICONSTANT("PCRE_CASELESS", PCRE_CASELESS),
00394 MAKE_ICONSTANT("PCRE_DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY),
00395 MAKE_ICONSTANT("PCRE_DOTALL", PCRE_DOTALL),
00396 MAKE_ICONSTANT("PCRE_EXTENDED", PCRE_EXTENDED),
00397 MAKE_ICONSTANT("PCRE_EXTRA", PCRE_EXTRA),
00398 MAKE_ICONSTANT("PCRE_MULTILINE", PCRE_MULTILINE),
00399 MAKE_ICONSTANT("PCRE_UNGREEDY", PCRE_UNGREEDY),
00400 MAKE_ICONSTANT("PCRE_UTF8", PCRE_UTF8),
00401
00402
00403 MAKE_ICONSTANT("PCRE_NOTBOL", PCRE_NOTBOL),
00404 MAKE_ICONSTANT("PCRE_NOTEOL", PCRE_NOTEOL),
00405 MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
00406 SLANG_END_ICONST_TABLE
00407 };
00408
00409 #undef P
00410 #undef I
00411 #undef V
00412 #undef S
00413
00414 static void patchup_intrinsic_table (SLang_Intrin_Fun_Type *table,
00415 SLtype dummy, SLtype type)
00416 {
00417 while (table->name != NULL)
00418 {
00419 unsigned int i, nargs;
00420 SLtype *args;
00421
00422 nargs = table->num_args;
00423 args = table->arg_types;
00424 for (i = 0; i < nargs; i++)
00425 {
00426 if (args[i] == dummy)
00427 args[i] = type;
00428 }
00429
00430
00431 if (table->return_type == dummy)
00432 table->return_type = type;
00433
00434 table++;
00435 }
00436 }
00437
00438
00439 static int register_pcre_type (void)
00440 {
00441 SLang_Class_Type *cl;
00442
00443 if (PCRE_Type_Id != 0)
00444 return 0;
00445
00446 if (NULL == (cl = SLclass_allocate_class ("PCRE_Type")))
00447 return -1;
00448
00449 if (-1 == SLclass_set_destroy_function (cl, destroy_pcre))
00450 return -1;
00451
00452
00453
00454
00455 if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (PCRE_Type), SLANG_CLASS_TYPE_MMT))
00456 return -1;
00457
00458 PCRE_Type_Id = SLclass_get_class_id (cl);
00459 patchup_intrinsic_table (PCRE_Intrinsics, DUMMY_PCRE_TYPE, PCRE_Type_Id);
00460
00461 return 0;
00462 }
00463
00464 static void *do_malloc (size_t n)
00465 {
00466 return (void *) SLmalloc (n);
00467 }
00468
00469 static void do_free (void *x)
00470 {
00471 SLfree ((char *) x);
00472 }
00473
00474 int init_pcre_module_ns (char *ns_name)
00475 {
00476 SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
00477 if (ns == NULL)
00478 return -1;
00479
00480 if (-1 == register_pcre_type ())
00481 return -1;
00482
00483 pcre_malloc = do_malloc;
00484 pcre_free = do_free;
00485
00486 if ((-1 == SLns_add_intrin_fun_table (ns, PCRE_Intrinsics, "__PCRE__"))
00487 || (-1 == SLns_add_iconstant_table (ns, PCRE_Consts, NULL)))
00488 return -1;
00489
00490 return 0;
00491 }
00492
00493
00494
00495 void deinit_pcre_module (void)
00496 {
00497 }
00498