summaryrefslogtreecommitdiffstats
path: root/utility/ap_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'utility/ap_pcre.c')
-rw-r--r--utility/ap_pcre.c344
1 files changed, 344 insertions, 0 deletions
diff --git a/utility/ap_pcre.c b/utility/ap_pcre.c
new file mode 100644
index 0000000..b2b9767
--- /dev/null
+++ b/utility/ap_pcre.c
@@ -0,0 +1,344 @@
1/*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5/*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
11 functions.
12
13 Written by: Philip Hazel <ph10@cam.ac.uk>
14
15 Copyright (c) 1997-2004 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
20
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
23
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
44 */
45
46#include "apr_lib.h"
47#include "apr_strings.h"
48#include "ap_pcre.h"
49#include "pcre.h"
50
51#define APR_WANT_STRFUNC
52#include "apr_want.h"
53
54#ifndef POSIX_MALLOC_THRESHOLD
55#define POSIX_MALLOC_THRESHOLD (10)
56#endif
57
58/* Table of error strings corresponding to POSIX error codes; must be
59 * kept in synch with include/ap_regex.h's AP_REG_E* definitions. */
60
61static const char *const pstring[] = {
62 "", /* Dummy for value 0 */
63 "internal error", /* AP_REG_ASSERT */
64 "failed to get memory", /* AP_REG_ESPACE */
65 "bad argument", /* AP_REG_INVARG */
66 "match failed" /* AP_REG_NOMATCH */
67};
68
69apr_size_t ap_regerror(int errcode, const ap_regex_t *preg, char *errbuf,
70 apr_size_t errbuf_size)
71{
72 const char *message, *addmessage;
73 apr_size_t length, addlength;
74
75 message
76 = (errcode >= (int)(sizeof(pstring)/sizeof(char *))) ? "unknown error code"
77 : pstring[errcode];
78 length = strlen(message) + 1;
79
80 addmessage = " at offset ";
81 addlength
82 = (preg != NULL && (int)preg->re_erroffset != -1) ? strlen(addmessage)
83 + 6
84 : 0;
85
86 if (errbuf_size > 0) {
87 if (addlength > 0 && errbuf_size >= length + addlength)
88 apr_snprintf(errbuf, sizeof errbuf, "%s%s%-6d", message,
89 addmessage, (int)preg->re_erroffset);
90 else {
91 strncpy(errbuf, message, errbuf_size - 1);
92 errbuf[errbuf_size-1] = 0;
93 }
94 }
95
96 return length + addlength;
97}
98
99/*************************************************
100 * Free store held by a regex *
101 *************************************************/
102
103void ap_regfree(ap_regex_t *preg)
104{
105 (pcre_free)(preg->re_pcre);
106}
107
108/*************************************************
109 * Compile a regular expression *
110 *************************************************/
111
112/*
113 Arguments:
114 preg points to a structure for recording the compiled expression
115 pattern the pattern to compile
116 cflags compilation flags
117
118 Returns: 0 on success
119 various non-zero codes on failure
120 */
121
122int ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
123{
124 const char *errorptr;
125 int erroffset;
126 int options = 0;
127
128 if ((cflags & AP_REG_ICASE) != 0)
129 options |= PCRE_CASELESS;
130 if ((cflags & AP_REG_NEWLINE) != 0)
131 options |= PCRE_MULTILINE;
132
133 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
134 preg->re_erroffset = erroffset;
135
136 if (preg->re_pcre == NULL)
137 return AP_REG_INVARG;
138
139 preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
140 return 0;
141}
142
143/*************************************************
144 * Match a regular expression *
145 *************************************************/
146
147/* Unfortunately, PCRE requires 3 ints of working space for each captured
148 substring, so we have to get and release working store instead of just using
149 the POSIX structures as was done in earlier releases when PCRE needed only 2
150 ints. However, if the number of possible capturing brackets is small, use a
151 block of store on the stack, to reduce the use of malloc/free. The threshold is
152 in a macro that can be changed at configure time. */
153
154int ap_regexec(const ap_regex_t *preg, const char *string, apr_size_t nmatch,
155 ap_regmatch_t pmatch[], int eflags)
156{
157 int rc;
158 int options = 0;
159 int *ovector= NULL;
160 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
161 int allocated_ovector = 0;
162
163 if ((eflags & AP_REG_NOTBOL) != 0)
164 options |= PCRE_NOTBOL;
165 if ((eflags & AP_REG_NOTEOL) != 0)
166 options |= PCRE_NOTEOL;
167
168 ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
169
170 if (nmatch > 0) {
171 if (nmatch <= POSIX_MALLOC_THRESHOLD) {
172 ovector = &(small_ovector[0]);
173 } else {
174 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
175 if (ovector == NULL)
176 return AP_REG_ESPACE;
177 allocated_ovector = 1;
178 }
179 }
180
181 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string,
182 (int)strlen(string), 0, options, ovector, nmatch * 3);
183
184 if (rc == 0)
185 rc = nmatch; /* All captured slots were filled in */
186
187 if (rc >= 0) {
188 apr_size_t i;
189 for (i = 0; i < (apr_size_t)rc; i++) {
190 pmatch[i].rm_so = ovector[i*2];
191 pmatch[i].rm_eo = ovector[i*2+1];
192 }
193 if (allocated_ovector)
194 free(ovector);
195 for (; i < nmatch; i++)
196 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
197 return 0;
198 }
199
200 else {
201 if (allocated_ovector)
202 free(ovector);
203 switch (rc) {
204 case PCRE_ERROR_NOMATCH:
205 return AP_REG_NOMATCH;
206 case PCRE_ERROR_NULL:
207 return AP_REG_INVARG;
208 case PCRE_ERROR_BADOPTION:
209 return AP_REG_INVARG;
210 case PCRE_ERROR_BADMAGIC:
211 return AP_REG_INVARG;
212 case PCRE_ERROR_UNKNOWN_NODE:
213 return AP_REG_ASSERT;
214 case PCRE_ERROR_NOMEMORY:
215 return AP_REG_ESPACE;
216#ifdef PCRE_ERROR_MATCHLIMIT
217 case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
218#endif
219#ifdef PCRE_ERROR_BADUTF8
220 case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
221#endif
222#ifdef PCRE_ERROR_BADUTF8_OFFSET
223 case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
224#endif
225 default:
226 return AP_REG_ASSERT;
227 }
228 }
229}
230
231/*
232 * Here's a pool-based interface to the POSIX-esque ap_regcomp().
233 * Note that we return ap_regex_t instead of being passed one.
234 * The reason is that if you use an already-used ap_regex_t structure,
235 * the memory that you've already allocated gets forgotten, and
236 * regfree() doesn't clear it. So we don't allow it.
237 */
238
239static apr_status_t regex_cleanup(void *preg)
240{
241 ap_regfree((ap_regex_t *) preg);
242 return APR_SUCCESS;
243}
244
245ap_regex_t *ap_pregcomp(apr_pool_t *p, const char *pattern, int cflags)
246{
247 ap_regex_t *preg = apr_palloc(p, sizeof *preg);
248
249 if (ap_regcomp(preg, pattern, cflags)) {
250 return NULL;
251 }
252
253 apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
254 apr_pool_cleanup_null);
255
256 return preg;
257}
258
259void ap_pregfree(apr_pool_t *p, ap_regex_t *reg)
260{
261 ap_regfree(reg);
262 apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup);
263}
264
265/* This function substitutes for $0-$9, filling in regular expression
266 * submatches. Pass it the same nmatch and pmatch arguments that you
267 * passed ap_regexec(). pmatch should not be greater than the maximum number
268 * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t.
269 *
270 * input should be the string with the $-expressions, source should be the
271 * string that was matched against.
272 *
273 * It returns the substituted string, or NULL on error.
274 *
275 * Parts of this code are based on Henry Spencer's regsub(), from his
276 * AT&T V8 regexp package.
277 */
278
279char * ap_pregsub(apr_pool_t *p, const char *input, const char *source,
280 size_t nmatch, ap_regmatch_t pmatch[])
281{
282 const char *src = input;
283 char *dest, *dst;
284 char c;
285 size_t no;
286 int len;
287
288 if (!source)
289 return NULL;
290 if (!nmatch)
291 return apr_pstrdup(p, src);
292
293 /* First pass, find the size */
294
295 len = 0;
296
297 while ((c = *src++) != '\0') {
298 if (c == '&')
299 no = 0;
300 else if (c == '$' && apr_isdigit(*src))
301 no = *src++ - '0';
302 else
303 no = 10;
304
305 if (no> 9) { /* Ordinary character. */
306 if (c == '\\' && (*src == '$' || *src == '&'))
307 c = *src++;
308 len++;
309 } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
310 len += pmatch[no].rm_eo - pmatch[no].rm_so;
311 }
312
313 }
314
315 dest = dst = apr_pcalloc(p, len + 1);
316
317 /* Now actually fill in the string */
318
319 src = input;
320
321 while ((c = *src++) != '\0') {
322 if (c == '&')
323 no = 0;
324 else if (c == '$' && apr_isdigit(*src))
325 no = *src++ - '0';
326 else
327 no = 10;
328
329 if (no> 9) { /* Ordinary character. */
330 if (c == '\\' && (*src == '$' || *src == '&'))
331 c = *src++;
332 *dst++ = c;
333 } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
334 len = pmatch[no].rm_eo - pmatch[no].rm_so;
335 memcpy(dst, source + pmatch[no].rm_so, len);
336 dst += len;
337 }
338
339 }
340 *dst = '\0';
341
342 return dest;
343}
344/* End of pcreposix.c */