diff options
Diffstat (limited to 'utility/ap_pcre.c')
-rw-r--r-- | utility/ap_pcre.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/utility/ap_pcre.c b/utility/ap_pcre.c new file mode 100644 index 0000000..b2b9767 --- /dev/null +++ b/utility/ap_pcre.c | |||
@@ -0,0 +1,344 @@ | |||
1 | /************************************************* | ||
2 | * Perl-Compatible Regular Expressions * | ||
3 | *************************************************/ | ||
4 | |||
5 | /* | ||
6 | This is a library of functions to support regular expressions whose syntax | ||
7 | and semantics are as close as possible to those of the Perl 5 language. See | ||
8 | the file Tech.Notes for some information on the internals. | ||
9 | |||
10 | This module is a wrapper that provides a POSIX API to the underlying PCRE | ||
11 | functions. | ||
12 | |||
13 | Written by: Philip Hazel <ph10@cam.ac.uk> | ||
14 | |||
15 | Copyright (c) 1997-2004 University of Cambridge | ||
16 | |||
17 | ----------------------------------------------------------------------------- | ||
18 | Redistribution and use in source and binary forms, with or without | ||
19 | modification, are permitted provided that the following conditions are met: | ||
20 | |||
21 | * Redistributions of source code must retain the above copyright notice, | ||
22 | this list of conditions and the following disclaimer. | ||
23 | |||
24 | * Redistributions in binary form must reproduce the above copyright | ||
25 | notice, this list of conditions and the following disclaimer in the | ||
26 | documentation and/or other materials provided with the distribution. | ||
27 | |||
28 | * Neither the name of the University of Cambridge nor the names of its | ||
29 | contributors may be used to endorse or promote products derived from | ||
30 | this software without specific prior written permission. | ||
31 | |||
32 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
33 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
34 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
35 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
36 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
37 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
38 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
39 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
40 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
41 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
42 | POSSIBILITY OF SUCH DAMAGE. | ||
43 | ----------------------------------------------------------------------------- | ||
44 | */ | ||
45 | |||
46 | #include "apr_lib.h" | ||
47 | #include "apr_strings.h" | ||
48 | #include "ap_pcre.h" | ||
49 | #include "pcre.h" | ||
50 | |||
51 | #define APR_WANT_STRFUNC | ||
52 | #include "apr_want.h" | ||
53 | |||
54 | #ifndef POSIX_MALLOC_THRESHOLD | ||
55 | #define POSIX_MALLOC_THRESHOLD (10) | ||
56 | #endif | ||
57 | |||
58 | /* Table of error strings corresponding to POSIX error codes; must be | ||
59 | * kept in synch with include/ap_regex.h's AP_REG_E* definitions. */ | ||
60 | |||
61 | static const char *const pstring[] = { | ||
62 | "", /* Dummy for value 0 */ | ||
63 | "internal error", /* AP_REG_ASSERT */ | ||
64 | "failed to get memory", /* AP_REG_ESPACE */ | ||
65 | "bad argument", /* AP_REG_INVARG */ | ||
66 | "match failed" /* AP_REG_NOMATCH */ | ||
67 | }; | ||
68 | |||
69 | apr_size_t ap_regerror(int errcode, const ap_regex_t *preg, char *errbuf, | ||
70 | apr_size_t errbuf_size) | ||
71 | { | ||
72 | const char *message, *addmessage; | ||
73 | apr_size_t length, addlength; | ||
74 | |||
75 | message | ||
76 | = (errcode >= (int)(sizeof(pstring)/sizeof(char *))) ? "unknown error code" | ||
77 | : pstring[errcode]; | ||
78 | length = strlen(message) + 1; | ||
79 | |||
80 | addmessage = " at offset "; | ||
81 | addlength | ||
82 | = (preg != NULL && (int)preg->re_erroffset != -1) ? strlen(addmessage) | ||
83 | + 6 | ||
84 | : 0; | ||
85 | |||
86 | if (errbuf_size > 0) { | ||
87 | if (addlength > 0 && errbuf_size >= length + addlength) | ||
88 | apr_snprintf(errbuf, sizeof errbuf, "%s%s%-6d", message, | ||
89 | addmessage, (int)preg->re_erroffset); | ||
90 | else { | ||
91 | strncpy(errbuf, message, errbuf_size - 1); | ||
92 | errbuf[errbuf_size-1] = 0; | ||
93 | } | ||
94 | } | ||
95 | |||
96 | return length + addlength; | ||
97 | } | ||
98 | |||
99 | /************************************************* | ||
100 | * Free store held by a regex * | ||
101 | *************************************************/ | ||
102 | |||
103 | void ap_regfree(ap_regex_t *preg) | ||
104 | { | ||
105 | (pcre_free)(preg->re_pcre); | ||
106 | } | ||
107 | |||
108 | /************************************************* | ||
109 | * Compile a regular expression * | ||
110 | *************************************************/ | ||
111 | |||
112 | /* | ||
113 | Arguments: | ||
114 | preg points to a structure for recording the compiled expression | ||
115 | pattern the pattern to compile | ||
116 | cflags compilation flags | ||
117 | |||
118 | Returns: 0 on success | ||
119 | various non-zero codes on failure | ||
120 | */ | ||
121 | |||
122 | int ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags) | ||
123 | { | ||
124 | const char *errorptr; | ||
125 | int erroffset; | ||
126 | int options = 0; | ||
127 | |||
128 | if ((cflags & AP_REG_ICASE) != 0) | ||
129 | options |= PCRE_CASELESS; | ||
130 | if ((cflags & AP_REG_NEWLINE) != 0) | ||
131 | options |= PCRE_MULTILINE; | ||
132 | |||
133 | preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); | ||
134 | preg->re_erroffset = erroffset; | ||
135 | |||
136 | if (preg->re_pcre == NULL) | ||
137 | return AP_REG_INVARG; | ||
138 | |||
139 | preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | /************************************************* | ||
144 | * Match a regular expression * | ||
145 | *************************************************/ | ||
146 | |||
147 | /* Unfortunately, PCRE requires 3 ints of working space for each captured | ||
148 | substring, so we have to get and release working store instead of just using | ||
149 | the POSIX structures as was done in earlier releases when PCRE needed only 2 | ||
150 | ints. However, if the number of possible capturing brackets is small, use a | ||
151 | block of store on the stack, to reduce the use of malloc/free. The threshold is | ||
152 | in a macro that can be changed at configure time. */ | ||
153 | |||
154 | int ap_regexec(const ap_regex_t *preg, const char *string, apr_size_t nmatch, | ||
155 | ap_regmatch_t pmatch[], int eflags) | ||
156 | { | ||
157 | int rc; | ||
158 | int options = 0; | ||
159 | int *ovector= NULL; | ||
160 | int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; | ||
161 | int allocated_ovector = 0; | ||
162 | |||
163 | if ((eflags & AP_REG_NOTBOL) != 0) | ||
164 | options |= PCRE_NOTBOL; | ||
165 | if ((eflags & AP_REG_NOTEOL) != 0) | ||
166 | options |= PCRE_NOTEOL; | ||
167 | |||
168 | ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ | ||
169 | |||
170 | if (nmatch > 0) { | ||
171 | if (nmatch <= POSIX_MALLOC_THRESHOLD) { | ||
172 | ovector = &(small_ovector[0]); | ||
173 | } else { | ||
174 | ovector = (int *)malloc(sizeof(int) * nmatch * 3); | ||
175 | if (ovector == NULL) | ||
176 | return AP_REG_ESPACE; | ||
177 | allocated_ovector = 1; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, | ||
182 | (int)strlen(string), 0, options, ovector, nmatch * 3); | ||
183 | |||
184 | if (rc == 0) | ||
185 | rc = nmatch; /* All captured slots were filled in */ | ||
186 | |||
187 | if (rc >= 0) { | ||
188 | apr_size_t i; | ||
189 | for (i = 0; i < (apr_size_t)rc; i++) { | ||
190 | pmatch[i].rm_so = ovector[i*2]; | ||
191 | pmatch[i].rm_eo = ovector[i*2+1]; | ||
192 | } | ||
193 | if (allocated_ovector) | ||
194 | free(ovector); | ||
195 | for (; i < nmatch; i++) | ||
196 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | ||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | else { | ||
201 | if (allocated_ovector) | ||
202 | free(ovector); | ||
203 | switch (rc) { | ||
204 | case PCRE_ERROR_NOMATCH: | ||
205 | return AP_REG_NOMATCH; | ||
206 | case PCRE_ERROR_NULL: | ||
207 | return AP_REG_INVARG; | ||
208 | case PCRE_ERROR_BADOPTION: | ||
209 | return AP_REG_INVARG; | ||
210 | case PCRE_ERROR_BADMAGIC: | ||
211 | return AP_REG_INVARG; | ||
212 | case PCRE_ERROR_UNKNOWN_NODE: | ||
213 | return AP_REG_ASSERT; | ||
214 | case PCRE_ERROR_NOMEMORY: | ||
215 | return AP_REG_ESPACE; | ||
216 | #ifdef PCRE_ERROR_MATCHLIMIT | ||
217 | case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE; | ||
218 | #endif | ||
219 | #ifdef PCRE_ERROR_BADUTF8 | ||
220 | case PCRE_ERROR_BADUTF8: return AP_REG_INVARG; | ||
221 | #endif | ||
222 | #ifdef PCRE_ERROR_BADUTF8_OFFSET | ||
223 | case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG; | ||
224 | #endif | ||
225 | default: | ||
226 | return AP_REG_ASSERT; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Here's a pool-based interface to the POSIX-esque ap_regcomp(). | ||
233 | * Note that we return ap_regex_t instead of being passed one. | ||
234 | * The reason is that if you use an already-used ap_regex_t structure, | ||
235 | * the memory that you've already allocated gets forgotten, and | ||
236 | * regfree() doesn't clear it. So we don't allow it. | ||
237 | */ | ||
238 | |||
239 | static apr_status_t regex_cleanup(void *preg) | ||
240 | { | ||
241 | ap_regfree((ap_regex_t *) preg); | ||
242 | return APR_SUCCESS; | ||
243 | } | ||
244 | |||
245 | ap_regex_t *ap_pregcomp(apr_pool_t *p, const char *pattern, int cflags) | ||
246 | { | ||
247 | ap_regex_t *preg = apr_palloc(p, sizeof *preg); | ||
248 | |||
249 | if (ap_regcomp(preg, pattern, cflags)) { | ||
250 | return NULL; | ||
251 | } | ||
252 | |||
253 | apr_pool_cleanup_register(p, (void *) preg, regex_cleanup, | ||
254 | apr_pool_cleanup_null); | ||
255 | |||
256 | return preg; | ||
257 | } | ||
258 | |||
259 | void ap_pregfree(apr_pool_t *p, ap_regex_t *reg) | ||
260 | { | ||
261 | ap_regfree(reg); | ||
262 | apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup); | ||
263 | } | ||
264 | |||
265 | /* This function substitutes for $0-$9, filling in regular expression | ||
266 | * submatches. Pass it the same nmatch and pmatch arguments that you | ||
267 | * passed ap_regexec(). pmatch should not be greater than the maximum number | ||
268 | * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t. | ||
269 | * | ||
270 | * input should be the string with the $-expressions, source should be the | ||
271 | * string that was matched against. | ||
272 | * | ||
273 | * It returns the substituted string, or NULL on error. | ||
274 | * | ||
275 | * Parts of this code are based on Henry Spencer's regsub(), from his | ||
276 | * AT&T V8 regexp package. | ||
277 | */ | ||
278 | |||
279 | char * ap_pregsub(apr_pool_t *p, const char *input, const char *source, | ||
280 | size_t nmatch, ap_regmatch_t pmatch[]) | ||
281 | { | ||
282 | const char *src = input; | ||
283 | char *dest, *dst; | ||
284 | char c; | ||
285 | size_t no; | ||
286 | int len; | ||
287 | |||
288 | if (!source) | ||
289 | return NULL; | ||
290 | if (!nmatch) | ||
291 | return apr_pstrdup(p, src); | ||
292 | |||
293 | /* First pass, find the size */ | ||
294 | |||
295 | len = 0; | ||
296 | |||
297 | while ((c = *src++) != '\0') { | ||
298 | if (c == '&') | ||
299 | no = 0; | ||
300 | else if (c == '$' && apr_isdigit(*src)) | ||
301 | no = *src++ - '0'; | ||
302 | else | ||
303 | no = 10; | ||
304 | |||
305 | if (no> 9) { /* Ordinary character. */ | ||
306 | if (c == '\\' && (*src == '$' || *src == '&')) | ||
307 | c = *src++; | ||
308 | len++; | ||
309 | } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) { | ||
310 | len += pmatch[no].rm_eo - pmatch[no].rm_so; | ||
311 | } | ||
312 | |||
313 | } | ||
314 | |||
315 | dest = dst = apr_pcalloc(p, len + 1); | ||
316 | |||
317 | /* Now actually fill in the string */ | ||
318 | |||
319 | src = input; | ||
320 | |||
321 | while ((c = *src++) != '\0') { | ||
322 | if (c == '&') | ||
323 | no = 0; | ||
324 | else if (c == '$' && apr_isdigit(*src)) | ||
325 | no = *src++ - '0'; | ||
326 | else | ||
327 | no = 10; | ||
328 | |||
329 | if (no> 9) { /* Ordinary character. */ | ||
330 | if (c == '\\' && (*src == '$' || *src == '&')) | ||
331 | c = *src++; | ||
332 | *dst++ = c; | ||
333 | } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) { | ||
334 | len = pmatch[no].rm_eo - pmatch[no].rm_so; | ||
335 | memcpy(dst, source + pmatch[no].rm_so, len); | ||
336 | dst += len; | ||
337 | } | ||
338 | |||
339 | } | ||
340 | *dst = '\0'; | ||
341 | |||
342 | return dest; | ||
343 | } | ||
344 | /* End of pcreposix.c */ | ||