Main Page | Class List | Directories | File List | Class Members | File Members

pcre-module.c

Go to the documentation of this file.
00001 #include "config.h"
00002 
00003 #include <stdio.h>
00004 #include <slang.h>
00005 #include <string.h>
00006 #include <pcre.h>
00007 
00008 SLANG_MODULE(pcre);
00009 
00010 static int PCRE_Type_Id = 0;
00011 
00012 typedef struct
00013 {
00014    pcre *p;
00015    pcre_extra *extra;
00016    int *ovector;
00017    unsigned int ovector_len;           /* must be a multiple of 3 */
00018    unsigned int num_matches;           /* return value of pcre_exec (>= 1)*/
00019 }
00020 PCRE_Type;
00021 
00022 static void free_pcre_type (PCRE_Type *pt)
00023 {
00024    if (pt->ovector != NULL)
00025      SLfree ((char *) pt->ovector);
00026 
00027    SLfree ((char *) pt);
00028 }
00029 
00030 static SLang_MMT_Type *allocate_pcre_type (pcre *p, pcre_extra *extra)
00031 {
00032    PCRE_Type *pt;
00033    SLang_MMT_Type *mmt;
00034    int ovector_len;
00035 
00036    pt = (PCRE_Type *) SLmalloc (sizeof (PCRE_Type));
00037    if (pt == NULL)
00038      return NULL;
00039    memset ((char *) pt, 0, sizeof (PCRE_Type));
00040 
00041    pt->p = p;
00042    pt->extra = extra;
00043 
00044    if (0 != pcre_fullinfo (p, extra, PCRE_INFO_CAPTURECOUNT, &ovector_len))
00045      {
00046         free_pcre_type (pt);
00047         SLang_verror (SL_INTRINSIC_ERROR, "pcre_fullinfo failed");
00048         return NULL;
00049      }
00050 
00051    ovector_len += 1;                   /* allow for pattern matched */
00052    ovector_len *= 3;                   /* required to be multiple of 3 */
00053    if (NULL == (pt->ovector = (int *)SLmalloc (ovector_len * sizeof (int))))
00054      {
00055         free_pcre_type (pt);
00056         return NULL;
00057      }
00058    pt->ovector_len = ovector_len;
00059 
00060    if (NULL == (mmt = SLang_create_mmt (PCRE_Type_Id, (VOID_STAR) pt)))
00061      {
00062         free_pcre_type (pt);
00063         return NULL;
00064      }   
00065    return mmt;
00066 }
00067 
00068 static int _pcre_compile_1 (char *pattern, int options)
00069 {
00070    pcre *p;
00071    pcre_extra *extra;
00072    SLCONST char *err;
00073    int erroffset;
00074    unsigned char *table;
00075    SLang_MMT_Type *mmt;
00076 
00077    table = NULL;
00078    p = pcre_compile (pattern, options, &err, &erroffset, table);
00079    if (NULL == p)
00080      {
00081         SLang_verror (SL_Parse_Error, "Error compiling pattern '%s' at offset %d: %s", 
00082                       pattern, erroffset, err);
00083         return -1;
00084      }
00085 
00086    extra = pcre_study (p, 0, &err);
00087    /* apparantly, a NULL return is ok */
00088    if (err != NULL)
00089      {
00090         SLang_verror (SL_INTRINSIC_ERROR, "pcre_study failed: %s", err);
00091         pcre_free (p);
00092         return -1;
00093      }
00094    
00095    if (NULL == (mmt = allocate_pcre_type (p, extra)))
00096      {
00097         pcre_free ((char *) p);
00098         pcre_free ((char *) extra);
00099         return -1;
00100      }
00101 
00102    if (-1 == SLang_push_mmt (mmt))
00103      {
00104         SLang_free_mmt (mmt);
00105         return -1;
00106      }
00107    return 0;
00108 }
00109 
00110 static void _pcre_compile (void)
00111 {
00112    char *pattern;
00113    int options = 0;
00114    
00115    switch (SLang_Num_Function_Args)
00116      {
00117       case 2:
00118         if (-1 == SLang_pop_integer (&options))
00119           return;
00120         /* drop */
00121       case 1:
00122       default:
00123         if (-1 == SLang_pop_slstring (&pattern))
00124           return;
00125      }
00126    (void) _pcre_compile_1 (pattern, options);
00127    SLang_free_slstring (pattern);
00128 }
00129 
00130 
00131 
00132 /* returns number of matches */
00133 static int _pcre_exec_1 (PCRE_Type *pt, char *str, int pos, int options)
00134 {
00135    int rc;
00136    unsigned int len;
00137    
00138    pt->num_matches = 0;
00139    len = strlen (str);
00140    if ((unsigned int) pos > len)
00141      return 0;
00142 
00143    rc = pcre_exec (pt->p, pt->extra, str, len, pos,
00144                    options, pt->ovector, pt->ovector_len);
00145 
00146    if (rc == PCRE_ERROR_NOMATCH)
00147      return 0;
00148 
00149    if (rc <= 0)
00150      {
00151         SLang_verror (SL_INTRINSIC_ERROR, "pcre_exec returned %d", rc);
00152         return -1;
00153      }
00154    pt->num_matches = (unsigned int) rc;
00155    return rc;
00156 }
00157 
00158 static int _pcre_exec (void)
00159 {
00160    PCRE_Type *p;
00161    SLang_MMT_Type *mmt;
00162    char *str;
00163    int pos = 0;
00164    int options = 0;
00165    int ret = -1;
00166 
00167    switch (SLang_Num_Function_Args)
00168      {
00169       case 4:
00170         if (-1 == SLang_pop_integer (&options))
00171           return -1;
00172       case 3:
00173         if (-1 == SLang_pop_integer (&pos))
00174           return -1;
00175       default:
00176         if (-1 == SLang_pop_slstring (&str))
00177           return -1;
00178 
00179         if (NULL == (mmt = SLang_pop_mmt (PCRE_Type_Id)))
00180           goto free_and_return;
00181         p = (PCRE_Type *)SLang_object_from_mmt (mmt);
00182      }
00183    ret = _pcre_exec_1 (p, str, pos, options);
00184    
00185    free_and_return:
00186    SLang_free_slstring (str);
00187    SLang_free_mmt (mmt);
00188    return ret;
00189 }
00190 
00191 
00192 static int get_nth_start_stop (PCRE_Type *pt, unsigned int n, 
00193                                unsigned int *a, unsigned int *b)
00194 {
00195    int start, stop;
00196 
00197    if (n >= pt->num_matches) 
00198      return -1;
00199    
00200    start = pt->ovector[2*n];
00201    stop = pt->ovector[2*n+1];
00202    if ((start < 0) || (stop < start))
00203      return -1;
00204    
00205    *a = (unsigned int) start;
00206    *b = (unsigned int) stop;
00207    return 0;
00208 }
00209 
00210 static void _pcre_nth_match (PCRE_Type *pt, int *np)
00211 {
00212    unsigned int start, stop;
00213    SLang_Array_Type *at;
00214    SLindex_Type two = 2;
00215    int *data;
00216 
00217    if (-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
00218      {
00219         SLang_push_null ();
00220         return;
00221      }
00222    
00223    if (NULL == (at = SLang_create_array (SLANG_INT_TYPE, 0, NULL, &two, 1)))
00224      return;
00225    
00226    data = (int *)at->data;
00227    data[0] = (int)start;
00228    data[1] = (int)stop;
00229    (void) SLang_push_array (at, 1);
00230 }
00231 
00232 static void _pcre_nth_substr (PCRE_Type *pt, char *str, int *np)
00233 {
00234    unsigned int start, stop;
00235    unsigned int len;
00236    
00237    len = strlen (str);
00238 
00239    if ((-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
00240        || (start > len) || (stop > len))
00241      {
00242         SLang_push_null ();
00243         return;
00244      }
00245    
00246    str = SLang_create_nslstring (str + start, stop - start);
00247    (void) SLang_push_string (str);
00248    SLang_free_slstring (str);
00249 }
00250 
00251 /* This function converts a slang RE to a pcre expression.  It performs the
00252  * following transformations:
00253  *    (     -->   \(
00254  *    )     -->   \)
00255  *    #     -->   \#
00256  *    |     -->   \|
00257  *    {     -->   \{
00258  *    }     -->   \}
00259  *   <     -->   \b
00260  *   >     -->   \b
00261  *   \C     -->   (?i)
00262  *   \c     -->   (?-i)
00263  *   \(     -->   (
00264  *   \)     -->   )
00265  *   \{     -->   {
00266  *   \}     -->   }
00267  * Anything else?
00268  */
00269 static char *_slang_to_pcre (char *slpattern)
00270 {
00271    char *pattern, *p, *s;
00272    unsigned int len;
00273    int in_bracket;
00274    char ch;
00275 
00276    len = strlen (slpattern);
00277    pattern = SLmalloc (3*len + 1);
00278    if (pattern == NULL)
00279      return NULL;
00280    
00281    p = pattern;
00282    s = slpattern;
00283    in_bracket = 0;
00284    while ((ch = *s++) != 0)
00285      {
00286         switch (ch)
00287           {
00288            case '{':
00289            case '}':
00290            case '(':
00291            case ')':
00292            case '#':
00293            case '|':
00294              if (0 == in_bracket) *p++ = '\\';
00295              *p++ = ch;
00296              break;
00297 
00298            case '[':
00299              in_bracket = 1;
00300              *p++ = ch;
00301              break;
00302              
00303            case ']':
00304              in_bracket = 0;
00305              *p++ = ch;
00306              break;
00307 
00308            case '\\':
00309              ch = *s++;
00310              switch (ch)
00311                {
00312                 case 0:
00313                   s--;
00314                   break;
00315                   
00316                 case '<':
00317                 case '>':
00318                   *p++ = '\\'; *p++ = 'b';
00319                   break;
00320 
00321                 case '(':
00322                 case ')':
00323                 case '{':
00324                 case '}':
00325                   *p++ = ch;
00326                   break;
00327 
00328                 case 'C':
00329                   *p++ = '('; *p++ = '?'; *p++ = 'i'; *p++ = ')';
00330                   break;
00331                 case 'c':
00332                   *p++ = '('; *p++ = '?'; *p++ = '-'; *p++ = 'i'; *p++ = ')';
00333                   break;
00334                   
00335                 default:
00336                   *p++ = '\\';
00337                   *p++ = ch;
00338                }
00339              break;
00340              
00341            default:
00342              *p++ = ch;
00343              break;
00344           }
00345      }
00346    *p = 0;
00347    
00348    s = SLang_create_slstring (pattern);
00349    SLfree (pattern);
00350    return s;
00351 }
00352 
00353 static void slang_to_pcre (char *pattern)
00354 {
00355    /* NULL ok in code below */
00356    pattern = _slang_to_pcre (pattern);
00357    (void) SLang_push_string (pattern);
00358    SLang_free_slstring (pattern);
00359 }
00360 
00361 static void destroy_pcre (SLtype type, VOID_STAR f)
00362 {
00363    PCRE_Type *pt;
00364    (void) type;
00365    
00366    pt = (PCRE_Type *) f;
00367    if (pt->extra != NULL)
00368      pcre_free ((char *) pt->extra);
00369    if (pt->p != NULL)
00370      pcre_free ((char *) pt->p);
00371    free_pcre_type (pt);
00372 }
00373 
00374 #define DUMMY_PCRE_TYPE ((unsigned int)-1)
00375 #define P DUMMY_PCRE_TYPE
00376 #define I SLANG_INT_TYPE
00377 #define V SLANG_VOID_TYPE
00378 #define S SLANG_STRING_TYPE
00379 static SLang_Intrin_Fun_Type PCRE_Intrinsics [] =
00380 {
00381    MAKE_INTRINSIC_0("pcre_exec", _pcre_exec, I),
00382    MAKE_INTRINSIC_0("pcre_compile", _pcre_compile, V),
00383    MAKE_INTRINSIC_2("pcre_nth_match", _pcre_nth_match, V, P, I),
00384    MAKE_INTRINSIC_3("pcre_nth_substr", _pcre_nth_substr, V, P, S, I),
00385    MAKE_INTRINSIC_1("slang_to_pcre", slang_to_pcre, V, S),
00386    SLANG_END_INTRIN_FUN_TABLE
00387 };
00388 
00389 static SLang_IConstant_Type PCRE_Consts [] =
00390 {
00391    /* compile options */
00392    MAKE_ICONSTANT("PCRE_ANCHORED", PCRE_ANCHORED),
00393    MAKE_ICONSTANT("PCRE_CASELESS", PCRE_CASELESS),
00394    MAKE_ICONSTANT("PCRE_DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY),
00395    MAKE_ICONSTANT("PCRE_DOTALL", PCRE_DOTALL),
00396    MAKE_ICONSTANT("PCRE_EXTENDED", PCRE_EXTENDED),
00397    MAKE_ICONSTANT("PCRE_EXTRA", PCRE_EXTRA),
00398    MAKE_ICONSTANT("PCRE_MULTILINE", PCRE_MULTILINE),
00399    MAKE_ICONSTANT("PCRE_UNGREEDY", PCRE_UNGREEDY),
00400    MAKE_ICONSTANT("PCRE_UTF8", PCRE_UTF8),
00401    
00402    /* exec options */
00403    MAKE_ICONSTANT("PCRE_NOTBOL", PCRE_NOTBOL),
00404    MAKE_ICONSTANT("PCRE_NOTEOL", PCRE_NOTEOL),
00405    MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
00406    SLANG_END_ICONST_TABLE
00407 };
00408 
00409 #undef P
00410 #undef I
00411 #undef V
00412 #undef S
00413 
00414 static void patchup_intrinsic_table (SLang_Intrin_Fun_Type *table, 
00415                                      SLtype dummy, SLtype type)
00416 {
00417    while (table->name != NULL)
00418      {
00419         unsigned int i, nargs;
00420         SLtype *args;
00421         
00422         nargs = table->num_args;
00423         args = table->arg_types;
00424         for (i = 0; i < nargs; i++)
00425           {
00426              if (args[i] == dummy)
00427                args[i] = type;
00428           }
00429         
00430         /* For completeness */
00431         if (table->return_type == dummy)
00432           table->return_type = type;
00433 
00434         table++;
00435      }
00436 }
00437 
00438 
00439 static int register_pcre_type (void)
00440 {
00441    SLang_Class_Type *cl;
00442 
00443    if (PCRE_Type_Id != 0)
00444      return 0;
00445 
00446    if (NULL == (cl = SLclass_allocate_class ("PCRE_Type")))
00447      return -1;
00448 
00449    if (-1 == SLclass_set_destroy_function (cl, destroy_pcre))
00450      return -1;
00451 
00452    /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
00453     * type.
00454     */
00455    if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (PCRE_Type), SLANG_CLASS_TYPE_MMT))
00456      return -1;
00457 
00458    PCRE_Type_Id = SLclass_get_class_id (cl);
00459    patchup_intrinsic_table (PCRE_Intrinsics, DUMMY_PCRE_TYPE, PCRE_Type_Id);
00460 
00461    return 0;
00462 }
00463 
00464 static void *do_malloc (size_t n)
00465 {
00466    return (void *) SLmalloc (n);
00467 }
00468 
00469 static void do_free (void *x)
00470 {
00471    SLfree ((char *) x);
00472 }
00473 
00474 int init_pcre_module_ns (char *ns_name)
00475 {
00476    SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
00477    if (ns == NULL)
00478      return -1;
00479 
00480    if (-1 == register_pcre_type ())
00481      return -1;
00482 
00483    pcre_malloc = do_malloc;
00484    pcre_free = do_free;
00485 
00486    if ((-1 == SLns_add_intrin_fun_table (ns, PCRE_Intrinsics, "__PCRE__"))
00487        || (-1 == SLns_add_iconstant_table (ns, PCRE_Consts, NULL)))
00488      return -1;
00489 
00490    return 0;
00491 }
00492 
00493 
00494 /* This function is optional */
00495 void deinit_pcre_module (void)
00496 {
00497 }
00498 

© sourcejam.com 2005-2008