diff options
Diffstat (limited to 'utility/ap_pcre.c')
| -rw-r--r-- | utility/ap_pcre.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/utility/ap_pcre.c b/utility/ap_pcre.c new file mode 100644 index 0000000..b2b9767 --- /dev/null +++ b/utility/ap_pcre.c | |||
| @@ -0,0 +1,344 @@ | |||
| 1 | /************************************************* | ||
| 2 | * Perl-Compatible Regular Expressions * | ||
| 3 | *************************************************/ | ||
| 4 | |||
| 5 | /* | ||
| 6 | This is a library of functions to support regular expressions whose syntax | ||
| 7 | and semantics are as close as possible to those of the Perl 5 language. See | ||
| 8 | the file Tech.Notes for some information on the internals. | ||
| 9 | |||
| 10 | This module is a wrapper that provides a POSIX API to the underlying PCRE | ||
| 11 | functions. | ||
| 12 | |||
| 13 | Written by: Philip Hazel <ph10@cam.ac.uk> | ||
| 14 | |||
| 15 | Copyright (c) 1997-2004 University of Cambridge | ||
| 16 | |||
| 17 | ----------------------------------------------------------------------------- | ||
| 18 | Redistribution and use in source and binary forms, with or without | ||
| 19 | modification, are permitted provided that the following conditions are met: | ||
| 20 | |||
| 21 | * Redistributions of source code must retain the above copyright notice, | ||
| 22 | this list of conditions and the following disclaimer. | ||
| 23 | |||
| 24 | * Redistributions in binary form must reproduce the above copyright | ||
| 25 | notice, this list of conditions and the following disclaimer in the | ||
| 26 | documentation and/or other materials provided with the distribution. | ||
| 27 | |||
| 28 | * Neither the name of the University of Cambridge nor the names of its | ||
| 29 | contributors may be used to endorse or promote products derived from | ||
| 30 | this software without specific prior written permission. | ||
| 31 | |||
| 32 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| 33 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 34 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
| 35 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
| 36 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
| 37 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
| 38 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
| 39 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
| 40 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
| 41 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
| 42 | POSSIBILITY OF SUCH DAMAGE. | ||
| 43 | ----------------------------------------------------------------------------- | ||
| 44 | */ | ||
| 45 | |||
| 46 | #include "apr_lib.h" | ||
| 47 | #include "apr_strings.h" | ||
| 48 | #include "ap_pcre.h" | ||
| 49 | #include "pcre.h" | ||
| 50 | |||
| 51 | #define APR_WANT_STRFUNC | ||
| 52 | #include "apr_want.h" | ||
| 53 | |||
| 54 | #ifndef POSIX_MALLOC_THRESHOLD | ||
| 55 | #define POSIX_MALLOC_THRESHOLD (10) | ||
| 56 | #endif | ||
| 57 | |||
| 58 | /* Table of error strings corresponding to POSIX error codes; must be | ||
| 59 | * kept in synch with include/ap_regex.h's AP_REG_E* definitions. */ | ||
| 60 | |||
| 61 | static const char *const pstring[] = { | ||
| 62 | "", /* Dummy for value 0 */ | ||
| 63 | "internal error", /* AP_REG_ASSERT */ | ||
| 64 | "failed to get memory", /* AP_REG_ESPACE */ | ||
| 65 | "bad argument", /* AP_REG_INVARG */ | ||
| 66 | "match failed" /* AP_REG_NOMATCH */ | ||
| 67 | }; | ||
| 68 | |||
| 69 | apr_size_t ap_regerror(int errcode, const ap_regex_t *preg, char *errbuf, | ||
| 70 | apr_size_t errbuf_size) | ||
| 71 | { | ||
| 72 | const char *message, *addmessage; | ||
| 73 | apr_size_t length, addlength; | ||
| 74 | |||
| 75 | message | ||
| 76 | = (errcode >= (int)(sizeof(pstring)/sizeof(char *))) ? "unknown error code" | ||
| 77 | : pstring[errcode]; | ||
| 78 | length = strlen(message) + 1; | ||
| 79 | |||
| 80 | addmessage = " at offset "; | ||
| 81 | addlength | ||
| 82 | = (preg != NULL && (int)preg->re_erroffset != -1) ? strlen(addmessage) | ||
| 83 | + 6 | ||
| 84 | : 0; | ||
| 85 | |||
| 86 | if (errbuf_size > 0) { | ||
| 87 | if (addlength > 0 && errbuf_size >= length + addlength) | ||
| 88 | apr_snprintf(errbuf, sizeof errbuf, "%s%s%-6d", message, | ||
| 89 | addmessage, (int)preg->re_erroffset); | ||
| 90 | else { | ||
| 91 | strncpy(errbuf, message, errbuf_size - 1); | ||
| 92 | errbuf[errbuf_size-1] = 0; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | return length + addlength; | ||
| 97 | } | ||
| 98 | |||
| 99 | /************************************************* | ||
| 100 | * Free store held by a regex * | ||
| 101 | *************************************************/ | ||
| 102 | |||
| 103 | void ap_regfree(ap_regex_t *preg) | ||
| 104 | { | ||
| 105 | (pcre_free)(preg->re_pcre); | ||
| 106 | } | ||
| 107 | |||
| 108 | /************************************************* | ||
| 109 | * Compile a regular expression * | ||
| 110 | *************************************************/ | ||
| 111 | |||
| 112 | /* | ||
| 113 | Arguments: | ||
| 114 | preg points to a structure for recording the compiled expression | ||
| 115 | pattern the pattern to compile | ||
| 116 | cflags compilation flags | ||
| 117 | |||
| 118 | Returns: 0 on success | ||
| 119 | various non-zero codes on failure | ||
| 120 | */ | ||
| 121 | |||
| 122 | int ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags) | ||
| 123 | { | ||
| 124 | const char *errorptr; | ||
| 125 | int erroffset; | ||
| 126 | int options = 0; | ||
| 127 | |||
| 128 | if ((cflags & AP_REG_ICASE) != 0) | ||
| 129 | options |= PCRE_CASELESS; | ||
| 130 | if ((cflags & AP_REG_NEWLINE) != 0) | ||
| 131 | options |= PCRE_MULTILINE; | ||
| 132 | |||
| 133 | preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); | ||
| 134 | preg->re_erroffset = erroffset; | ||
| 135 | |||
| 136 | if (preg->re_pcre == NULL) | ||
| 137 | return AP_REG_INVARG; | ||
| 138 | |||
| 139 | preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); | ||
| 140 | return 0; | ||
| 141 | } | ||
| 142 | |||
| 143 | /************************************************* | ||
| 144 | * Match a regular expression * | ||
| 145 | *************************************************/ | ||
| 146 | |||
| 147 | /* Unfortunately, PCRE requires 3 ints of working space for each captured | ||
| 148 | substring, so we have to get and release working store instead of just using | ||
| 149 | the POSIX structures as was done in earlier releases when PCRE needed only 2 | ||
| 150 | ints. However, if the number of possible capturing brackets is small, use a | ||
| 151 | block of store on the stack, to reduce the use of malloc/free. The threshold is | ||
| 152 | in a macro that can be changed at configure time. */ | ||
| 153 | |||
| 154 | int ap_regexec(const ap_regex_t *preg, const char *string, apr_size_t nmatch, | ||
| 155 | ap_regmatch_t pmatch[], int eflags) | ||
| 156 | { | ||
| 157 | int rc; | ||
| 158 | int options = 0; | ||
| 159 | int *ovector= NULL; | ||
| 160 | int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; | ||
| 161 | int allocated_ovector = 0; | ||
| 162 | |||
| 163 | if ((eflags & AP_REG_NOTBOL) != 0) | ||
| 164 | options |= PCRE_NOTBOL; | ||
| 165 | if ((eflags & AP_REG_NOTEOL) != 0) | ||
| 166 | options |= PCRE_NOTEOL; | ||
| 167 | |||
| 168 | ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ | ||
| 169 | |||
| 170 | if (nmatch > 0) { | ||
| 171 | if (nmatch <= POSIX_MALLOC_THRESHOLD) { | ||
| 172 | ovector = &(small_ovector[0]); | ||
| 173 | } else { | ||
| 174 | ovector = (int *)malloc(sizeof(int) * nmatch * 3); | ||
| 175 | if (ovector == NULL) | ||
| 176 | return AP_REG_ESPACE; | ||
| 177 | allocated_ovector = 1; | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, | ||
| 182 | (int)strlen(string), 0, options, ovector, nmatch * 3); | ||
| 183 | |||
| 184 | if (rc == 0) | ||
| 185 | rc = nmatch; /* All captured slots were filled in */ | ||
| 186 | |||
| 187 | if (rc >= 0) { | ||
| 188 | apr_size_t i; | ||
| 189 | for (i = 0; i < (apr_size_t)rc; i++) { | ||
| 190 | pmatch[i].rm_so = ovector[i*2]; | ||
| 191 | pmatch[i].rm_eo = ovector[i*2+1]; | ||
| 192 | } | ||
| 193 | if (allocated_ovector) | ||
| 194 | free(ovector); | ||
| 195 | for (; i < nmatch; i++) | ||
| 196 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | ||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | else { | ||
| 201 | if (allocated_ovector) | ||
| 202 | free(ovector); | ||
| 203 | switch (rc) { | ||
| 204 | case PCRE_ERROR_NOMATCH: | ||
| 205 | return AP_REG_NOMATCH; | ||
| 206 | case PCRE_ERROR_NULL: | ||
| 207 | return AP_REG_INVARG; | ||
| 208 | case PCRE_ERROR_BADOPTION: | ||
| 209 | return AP_REG_INVARG; | ||
| 210 | case PCRE_ERROR_BADMAGIC: | ||
| 211 | return AP_REG_INVARG; | ||
| 212 | case PCRE_ERROR_UNKNOWN_NODE: | ||
| 213 | return AP_REG_ASSERT; | ||
| 214 | case PCRE_ERROR_NOMEMORY: | ||
| 215 | return AP_REG_ESPACE; | ||
| 216 | #ifdef PCRE_ERROR_MATCHLIMIT | ||
| 217 | case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE; | ||
| 218 | #endif | ||
| 219 | #ifdef PCRE_ERROR_BADUTF8 | ||
| 220 | case PCRE_ERROR_BADUTF8: return AP_REG_INVARG; | ||
| 221 | #endif | ||
| 222 | #ifdef PCRE_ERROR_BADUTF8_OFFSET | ||
| 223 | case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG; | ||
| 224 | #endif | ||
| 225 | default: | ||
| 226 | return AP_REG_ASSERT; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Here's a pool-based interface to the POSIX-esque ap_regcomp(). | ||
| 233 | * Note that we return ap_regex_t instead of being passed one. | ||
| 234 | * The reason is that if you use an already-used ap_regex_t structure, | ||
| 235 | * the memory that you've already allocated gets forgotten, and | ||
| 236 | * regfree() doesn't clear it. So we don't allow it. | ||
| 237 | */ | ||
| 238 | |||
| 239 | static apr_status_t regex_cleanup(void *preg) | ||
| 240 | { | ||
| 241 | ap_regfree((ap_regex_t *) preg); | ||
| 242 | return APR_SUCCESS; | ||
| 243 | } | ||
| 244 | |||
| 245 | ap_regex_t *ap_pregcomp(apr_pool_t *p, const char *pattern, int cflags) | ||
| 246 | { | ||
| 247 | ap_regex_t *preg = apr_palloc(p, sizeof *preg); | ||
| 248 | |||
| 249 | if (ap_regcomp(preg, pattern, cflags)) { | ||
| 250 | return NULL; | ||
| 251 | } | ||
| 252 | |||
| 253 | apr_pool_cleanup_register(p, (void *) preg, regex_cleanup, | ||
| 254 | apr_pool_cleanup_null); | ||
| 255 | |||
| 256 | return preg; | ||
| 257 | } | ||
| 258 | |||
| 259 | void ap_pregfree(apr_pool_t *p, ap_regex_t *reg) | ||
| 260 | { | ||
| 261 | ap_regfree(reg); | ||
| 262 | apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup); | ||
| 263 | } | ||
| 264 | |||
| 265 | /* This function substitutes for $0-$9, filling in regular expression | ||
| 266 | * submatches. Pass it the same nmatch and pmatch arguments that you | ||
| 267 | * passed ap_regexec(). pmatch should not be greater than the maximum number | ||
| 268 | * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t. | ||
| 269 | * | ||
| 270 | * input should be the string with the $-expressions, source should be the | ||
| 271 | * string that was matched against. | ||
| 272 | * | ||
| 273 | * It returns the substituted string, or NULL on error. | ||
| 274 | * | ||
| 275 | * Parts of this code are based on Henry Spencer's regsub(), from his | ||
| 276 | * AT&T V8 regexp package. | ||
| 277 | */ | ||
| 278 | |||
| 279 | char * ap_pregsub(apr_pool_t *p, const char *input, const char *source, | ||
| 280 | size_t nmatch, ap_regmatch_t pmatch[]) | ||
| 281 | { | ||
| 282 | const char *src = input; | ||
| 283 | char *dest, *dst; | ||
| 284 | char c; | ||
| 285 | size_t no; | ||
| 286 | int len; | ||
| 287 | |||
| 288 | if (!source) | ||
| 289 | return NULL; | ||
| 290 | if (!nmatch) | ||
| 291 | return apr_pstrdup(p, src); | ||
| 292 | |||
| 293 | /* First pass, find the size */ | ||
| 294 | |||
| 295 | len = 0; | ||
| 296 | |||
| 297 | while ((c = *src++) != '\0') { | ||
| 298 | if (c == '&') | ||
| 299 | no = 0; | ||
| 300 | else if (c == '$' && apr_isdigit(*src)) | ||
| 301 | no = *src++ - '0'; | ||
| 302 | else | ||
| 303 | no = 10; | ||
| 304 | |||
| 305 | if (no> 9) { /* Ordinary character. */ | ||
| 306 | if (c == '\\' && (*src == '$' || *src == '&')) | ||
| 307 | c = *src++; | ||
| 308 | len++; | ||
| 309 | } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) { | ||
| 310 | len += pmatch[no].rm_eo - pmatch[no].rm_so; | ||
| 311 | } | ||
| 312 | |||
| 313 | } | ||
| 314 | |||
| 315 | dest = dst = apr_pcalloc(p, len + 1); | ||
| 316 | |||
| 317 | /* Now actually fill in the string */ | ||
| 318 | |||
| 319 | src = input; | ||
| 320 | |||
| 321 | while ((c = *src++) != '\0') { | ||
| 322 | if (c == '&') | ||
| 323 | no = 0; | ||
| 324 | else if (c == '$' && apr_isdigit(*src)) | ||
| 325 | no = *src++ - '0'; | ||
| 326 | else | ||
| 327 | no = 10; | ||
| 328 | |||
| 329 | if (no> 9) { /* Ordinary character. */ | ||
| 330 | if (c == '\\' && (*src == '$' || *src == '&')) | ||
| 331 | c = *src++; | ||
| 332 | *dst++ = c; | ||
| 333 | } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) { | ||
| 334 | len = pmatch[no].rm_eo - pmatch[no].rm_so; | ||
| 335 | memcpy(dst, source + pmatch[no].rm_so, len); | ||
| 336 | dst += len; | ||
| 337 | } | ||
| 338 | |||
| 339 | } | ||
| 340 | *dst = '\0'; | ||
| 341 | |||
| 342 | return dest; | ||
| 343 | } | ||
| 344 | /* End of pcreposix.c */ | ||
