summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Edward Rudd 2008-10-22 12:40:58 +0000
committerGravatar Edward Rudd 2008-10-22 12:40:58 +0000
commit0ddd719a72469f732a881c93d4c804e9aca787fe (patch)
treee05821ff5a6ad0f00d63f23090ce4f2ec19bef75
parentcc75ebf7e8560a69a6847f0260cce4772fff440a (diff)
added more config options
included PCRE wrapper from httpd more complete log parser code. fixed NASTY bug with setting values in the hash tables (Need to DUP the strings before setting the keys)
-rw-r--r--Makefile.in2
-rw-r--r--configure.ac2
-rw-r--r--utility/Makefile.in23
-rw-r--r--utility/ap_pcre.c344
-rw-r--r--utility/ap_pcre.h176
-rw-r--r--utility/config.c263
-rw-r--r--utility/config.h59
-rw-r--r--utility/database.c2
-rw-r--r--utility/database.h4
-rw-r--r--utility/logparse.c93
-rw-r--r--utility/logparse.h11
-rw-r--r--utility/mod_log_sql.conf24
-rw-r--r--utility/shell.c15
-rw-r--r--utility/util.c28
-rw-r--r--utility/util.h13
15 files changed, 949 insertions, 110 deletions
diff --git a/Makefile.in b/Makefile.in
index 7d71505..4c60869 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,7 +1,7 @@
1# @configure_input@ 1# @configure_input@
2 2
3# Modify these top variables. 3# Modify these top variables.
4SUBDIRS = docs contrib src parser utility 4SUBDIRS = docs contrib src utility
5 5
6EXTRA_DIST = AUTHORS INSTALL TODO LICENSE CHANGELOG \ 6EXTRA_DIST = AUTHORS INSTALL TODO LICENSE CHANGELOG \
7 build-apache13.bat build-apache2.bat \ 7 build-apache13.bat build-apache2.bat \
diff --git a/configure.ac b/configure.ac
index 43633e3..e706103 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11,6 +11,8 @@ dnl Add a test for a compiler.
11AC_PROG_CC 11AC_PROG_CC
12AC_PROG_LIBTOOL 12AC_PROG_LIBTOOL
13 13
14PKG_CHECK_MODULES(PCRE,libpcre)
15
14APACHE20_VERSION=2.0.40 16APACHE20_VERSION=2.0.40
15APACHE13_VERSION=1.3.20 17APACHE13_VERSION=1.3.20
16CHECK_APACHE($APACHE13_VERSION,$APACHE20_VERSION, 18CHECK_APACHE($APACHE13_VERSION,$APACHE20_VERSION,
diff --git a/utility/Makefile.in b/utility/Makefile.in
index d397b96..69a746c 100644
--- a/utility/Makefile.in
+++ b/utility/Makefile.in
@@ -4,9 +4,11 @@ top_srcdir = @top_srcdir@
4srcdir = @abs_srcdir@ 4srcdir = @abs_srcdir@
5builddir = @abs_builddir@ 5builddir = @abs_builddir@
6 6
7CFLAGS = -g -Wall -fno-strict-aliasing @APR_CFLAGS@ @APR_INCLUDES@ @APU_INCLUDES@ 7#@APR_CFLAGS@
8CFLAGS = -g3 -Wall -fno-strict-aliasing \
9 @APR_INCLUDES@ @APU_INCLUDES@ @PCRE_CFLAGS@
8CPPFLAGS = @APR_CPPFLAGS@ 10CPPFLAGS = @APR_CPPFLAGS@
9LDFLAGS = @APR_LDFLAGS@ @APU_LDFLAGS@ 11LDFLAGS = @APR_LDFLAGS@ @APU_LDFLAGS@ @PCRE_LIBS@
10 12
11ifeq (@OOO_MAINTAIN@,1) 13ifeq (@OOO_MAINTAIN@,1)
12CFLAGS += -Werror 14CFLAGS += -Werror
@@ -16,10 +18,10 @@ STD_DIST = Makefile.in
16 18
17DISTFILES = $(STD_DIST) $(EXTRA_DIST) $(SOURCES) $(HEADERS) 19DISTFILES = $(STD_DIST) $(EXTRA_DIST) $(SOURCES) $(HEADERS)
18 20
19SOURCES = shell.c config.c logparse.c 21SOURCES = shell.c config.c logparse.c ap_pcre.c util.c
20HEADERS = shell.h config.h logparse.h 22HEADERS = shell.h config.h logparse.h ap_pcre.h util.h
21OBJECTS = $(patsubst %.c,%.o,$(SOURCES)) 23OBJECTS = $(SOURCES:.c=.o)
22 24DEPS = $(SOURCES:.c=.d)
23TARGETS = mod_log_sql 25TARGETS = mod_log_sql
24 26
25all: $(TARGETS) 27all: $(TARGETS)
@@ -27,8 +29,15 @@ all: $(TARGETS)
27mod_log_sql: $(OBJECTS) $(HEADERS) 29mod_log_sql: $(OBJECTS) $(HEADERS)
28 $(CC) -o $@ $(OBJECTS) $(LDFLAGS) 30 $(CC) -o $@ $(OBJECTS) $(LDFLAGS)
29 31
32%.o: %.c %.d
33 gcc -c $(CFLAGS) $(CPPFLAGS) $< -o $@
34%.d: %.c
35 gcc -MM $(CFLAGS) $(CPPFLAGS) $< -o $@
36
37-include $(DEPS)
38
30clean: 39clean:
31 $(RM) $(OBJECTS) $(TARGETS) 40 $(RM) $(OBJECTS) $(TARGETS) $(DEPS)
32 41
33local-dist: $(DISTFILES) 42local-dist: $(DISTFILES)
34 mkdir -p $(DESTDIR) 43 mkdir -p $(DESTDIR)
diff --git a/utility/ap_pcre.c b/utility/ap_pcre.c
new file mode 100644
index 0000000..b2b9767
--- /dev/null
+++ b/utility/ap_pcre.c
@@ -0,0 +1,344 @@
1/*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5/*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
11 functions.
12
13 Written by: Philip Hazel <ph10@cam.ac.uk>
14
15 Copyright (c) 1997-2004 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
20
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
23
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
44 */
45
46#include "apr_lib.h"
47#include "apr_strings.h"
48#include "ap_pcre.h"
49#include "pcre.h"
50
51#define APR_WANT_STRFUNC
52#include "apr_want.h"
53
54#ifndef POSIX_MALLOC_THRESHOLD
55#define POSIX_MALLOC_THRESHOLD (10)
56#endif
57
58/* Table of error strings corresponding to POSIX error codes; must be
59 * kept in synch with include/ap_regex.h's AP_REG_E* definitions. */
60
61static const char *const pstring[] = {
62 "", /* Dummy for value 0 */
63 "internal error", /* AP_REG_ASSERT */
64 "failed to get memory", /* AP_REG_ESPACE */
65 "bad argument", /* AP_REG_INVARG */
66 "match failed" /* AP_REG_NOMATCH */
67};
68
69apr_size_t ap_regerror(int errcode, const ap_regex_t *preg, char *errbuf,
70 apr_size_t errbuf_size)
71{
72 const char *message, *addmessage;
73 apr_size_t length, addlength;
74
75 message
76 = (errcode >= (int)(sizeof(pstring)/sizeof(char *))) ? "unknown error code"
77 : pstring[errcode];
78 length = strlen(message) + 1;
79
80 addmessage = " at offset ";
81 addlength
82 = (preg != NULL && (int)preg->re_erroffset != -1) ? strlen(addmessage)
83 + 6
84 : 0;
85
86 if (errbuf_size > 0) {
87 if (addlength > 0 && errbuf_size >= length + addlength)
88 apr_snprintf(errbuf, sizeof errbuf, "%s%s%-6d", message,
89 addmessage, (int)preg->re_erroffset);
90 else {
91 strncpy(errbuf, message, errbuf_size - 1);
92 errbuf[errbuf_size-1] = 0;
93 }
94 }
95
96 return length + addlength;
97}
98
99/*************************************************
100 * Free store held by a regex *
101 *************************************************/
102
103void ap_regfree(ap_regex_t *preg)
104{
105 (pcre_free)(preg->re_pcre);
106}
107
108/*************************************************
109 * Compile a regular expression *
110 *************************************************/
111
112/*
113 Arguments:
114 preg points to a structure for recording the compiled expression
115 pattern the pattern to compile
116 cflags compilation flags
117
118 Returns: 0 on success
119 various non-zero codes on failure
120 */
121
122int ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
123{
124 const char *errorptr;
125 int erroffset;
126 int options = 0;
127
128 if ((cflags & AP_REG_ICASE) != 0)
129 options |= PCRE_CASELESS;
130 if ((cflags & AP_REG_NEWLINE) != 0)
131 options |= PCRE_MULTILINE;
132
133 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
134 preg->re_erroffset = erroffset;
135
136 if (preg->re_pcre == NULL)
137 return AP_REG_INVARG;
138
139 preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
140 return 0;
141}
142
143/*************************************************
144 * Match a regular expression *
145 *************************************************/
146
147/* Unfortunately, PCRE requires 3 ints of working space for each captured
148 substring, so we have to get and release working store instead of just using
149 the POSIX structures as was done in earlier releases when PCRE needed only 2
150 ints. However, if the number of possible capturing brackets is small, use a
151 block of store on the stack, to reduce the use of malloc/free. The threshold is
152 in a macro that can be changed at configure time. */
153
154int ap_regexec(const ap_regex_t *preg, const char *string, apr_size_t nmatch,
155 ap_regmatch_t pmatch[], int eflags)
156{
157 int rc;
158 int options = 0;
159 int *ovector= NULL;
160 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
161 int allocated_ovector = 0;
162
163 if ((eflags & AP_REG_NOTBOL) != 0)
164 options |= PCRE_NOTBOL;
165 if ((eflags & AP_REG_NOTEOL) != 0)
166 options |= PCRE_NOTEOL;
167
168 ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */
169
170 if (nmatch > 0) {
171 if (nmatch <= POSIX_MALLOC_THRESHOLD) {
172 ovector = &(small_ovector[0]);
173 } else {
174 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
175 if (ovector == NULL)
176 return AP_REG_ESPACE;
177 allocated_ovector = 1;
178 }
179 }
180
181 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string,
182 (int)strlen(string), 0, options, ovector, nmatch * 3);
183
184 if (rc == 0)
185 rc = nmatch; /* All captured slots were filled in */
186
187 if (rc >= 0) {
188 apr_size_t i;
189 for (i = 0; i < (apr_size_t)rc; i++) {
190 pmatch[i].rm_so = ovector[i*2];
191 pmatch[i].rm_eo = ovector[i*2+1];
192 }
193 if (allocated_ovector)
194 free(ovector);
195 for (; i < nmatch; i++)
196 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
197 return 0;
198 }
199
200 else {
201 if (allocated_ovector)
202 free(ovector);
203 switch (rc) {
204 case PCRE_ERROR_NOMATCH:
205 return AP_REG_NOMATCH;
206 case PCRE_ERROR_NULL:
207 return AP_REG_INVARG;
208 case PCRE_ERROR_BADOPTION:
209 return AP_REG_INVARG;
210 case PCRE_ERROR_BADMAGIC:
211 return AP_REG_INVARG;
212 case PCRE_ERROR_UNKNOWN_NODE:
213 return AP_REG_ASSERT;
214 case PCRE_ERROR_NOMEMORY:
215 return AP_REG_ESPACE;
216#ifdef PCRE_ERROR_MATCHLIMIT
217 case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
218#endif
219#ifdef PCRE_ERROR_BADUTF8
220 case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
221#endif
222#ifdef PCRE_ERROR_BADUTF8_OFFSET
223 case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
224#endif
225 default:
226 return AP_REG_ASSERT;
227 }
228 }
229}
230
231/*
232 * Here's a pool-based interface to the POSIX-esque ap_regcomp().
233 * Note that we return ap_regex_t instead of being passed one.
234 * The reason is that if you use an already-used ap_regex_t structure,
235 * the memory that you've already allocated gets forgotten, and
236 * regfree() doesn't clear it. So we don't allow it.
237 */
238
239static apr_status_t regex_cleanup(void *preg)
240{
241 ap_regfree((ap_regex_t *) preg);
242 return APR_SUCCESS;
243}
244
245ap_regex_t *ap_pregcomp(apr_pool_t *p, const char *pattern, int cflags)
246{
247 ap_regex_t *preg = apr_palloc(p, sizeof *preg);
248
249 if (ap_regcomp(preg, pattern, cflags)) {
250 return NULL;
251 }
252
253 apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
254 apr_pool_cleanup_null);
255
256 return preg;
257}
258
259void ap_pregfree(apr_pool_t *p, ap_regex_t *reg)
260{
261 ap_regfree(reg);
262 apr_pool_cleanup_kill(p, (void *) reg, regex_cleanup);
263}
264
265/* This function substitutes for $0-$9, filling in regular expression
266 * submatches. Pass it the same nmatch and pmatch arguments that you
267 * passed ap_regexec(). pmatch should not be greater than the maximum number
268 * of subexpressions - i.e. one more than the re_nsub member of ap_regex_t.
269 *
270 * input should be the string with the $-expressions, source should be the
271 * string that was matched against.
272 *
273 * It returns the substituted string, or NULL on error.
274 *
275 * Parts of this code are based on Henry Spencer's regsub(), from his
276 * AT&T V8 regexp package.
277 */
278
279char * ap_pregsub(apr_pool_t *p, const char *input, const char *source,
280 size_t nmatch, ap_regmatch_t pmatch[])
281{
282 const char *src = input;
283 char *dest, *dst;
284 char c;
285 size_t no;
286 int len;
287
288 if (!source)
289 return NULL;
290 if (!nmatch)
291 return apr_pstrdup(p, src);
292
293 /* First pass, find the size */
294
295 len = 0;
296
297 while ((c = *src++) != '\0') {
298 if (c == '&')
299 no = 0;
300 else if (c == '$' && apr_isdigit(*src))
301 no = *src++ - '0';
302 else
303 no = 10;
304
305 if (no> 9) { /* Ordinary character. */
306 if (c == '\\' && (*src == '$' || *src == '&'))
307 c = *src++;
308 len++;
309 } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
310 len += pmatch[no].rm_eo - pmatch[no].rm_so;
311 }
312
313 }
314
315 dest = dst = apr_pcalloc(p, len + 1);
316
317 /* Now actually fill in the string */
318
319 src = input;
320
321 while ((c = *src++) != '\0') {
322 if (c == '&')
323 no = 0;
324 else if (c == '$' && apr_isdigit(*src))
325 no = *src++ - '0';
326 else
327 no = 10;
328
329 if (no> 9) { /* Ordinary character. */
330 if (c == '\\' && (*src == '$' || *src == '&'))
331 c = *src++;
332 *dst++ = c;
333 } else if (no < nmatch && pmatch[no].rm_so < pmatch[no].rm_eo) {
334 len = pmatch[no].rm_eo - pmatch[no].rm_so;
335 memcpy(dst, source + pmatch[no].rm_so, len);
336 dst += len;
337 }
338
339 }
340 *dst = '\0';
341
342 return dest;
343}
344/* End of pcreposix.c */
diff --git a/utility/ap_pcre.h b/utility/ap_pcre.h
new file mode 100644
index 0000000..a851d29
--- /dev/null
+++ b/utility/ap_pcre.h
@@ -0,0 +1,176 @@
1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* Derived from PCRE's pcreposix.h.
18
19 Copyright (c) 1997-2004 University of Cambridge
20
21-----------------------------------------------------------------------------
22Redistribution and use in source and binary forms, with or without
23modification, are permitted provided that the following conditions are met:
24
25 * Redistributions of source code must retain the above copyright notice,
26 this list of conditions and the following disclaimer.
27
28 * Redistributions in binary form must reproduce the above copyright
29 notice, this list of conditions and the following disclaimer in the
30 documentation and/or other materials provided with the distribution.
31
32 * Neither the name of the University of Cambridge nor the names of its
33 contributors may be used to endorse or promote products derived from
34 this software without specific prior written permission.
35
36THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
37AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
40LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
43INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
44CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
46POSSIBILITY OF SUCH DAMAGE.
47-----------------------------------------------------------------------------
48*/
49
50/**
51 * @file ap_regex.h
52 * @brief Apache Regex defines
53 */
54
55#ifndef AP_REGEX_H
56#define AP_REGEX_H
57
58#include "apr.h"
59
60/* Allow for C++ users */
61
62#ifdef __cplusplus
63extern "C" {
64#endif
65
66/* Options for ap_regexec: */
67
68#define AP_REG_ICASE 0x01 /** use a case-insensitive match */
69#define AP_REG_NEWLINE 0x02 /** don't match newlines against '.' etc */
70#define AP_REG_NOTBOL 0x04 /** ^ will not match against start-of-string */
71#define AP_REG_NOTEOL 0x08 /** $ will not match against end-of-string */
72
73#define AP_REG_EXTENDED (0) /** unused */
74#define AP_REG_NOSUB (0) /** unused */
75
76/* Error values: */
77enum {
78 AP_REG_ASSERT = 1, /** internal error ? */
79 AP_REG_ESPACE, /** failed to get memory */
80 AP_REG_INVARG, /** invalid argument */
81 AP_REG_NOMATCH /** match failed */
82};
83
84/* The structure representing a compiled regular expression. */
85typedef struct {
86 void *re_pcre;
87 apr_size_t re_nsub;
88 apr_size_t re_erroffset;
89} ap_regex_t;
90
91/* The structure in which a captured offset is returned. */
92typedef struct {
93 int rm_so;
94 int rm_eo;
95} ap_regmatch_t;
96
97/* The functions */
98
99/**
100 * Compile a regular expression.
101 * @param preg Returned compiled regex
102 * @param regex The regular expression string
103 * @param cflags Must be zero (currently).
104 * @return Zero on success or non-zero on error
105 */
106int ap_regcomp(ap_regex_t *preg, const char *regex, int cflags);
107
108/**
109 * Match a NUL-terminated string against a pre-compiled regex.
110 * @param preg The pre-compiled regex
111 * @param string The string to match
112 * @param nmatch Provide information regarding the location of any matches
113 * @param pmatch Provide information regarding the location of any matches
114 * @param eflags Bitwise OR of any of AP_REG_* flags
115 * @return 0 for successful match, #REG_NOMATCH otherwise
116 */
117int ap_regexec(const ap_regex_t *preg, const char *string,
118 apr_size_t nmatch, ap_regmatch_t *pmatch, int eflags);
119
120/**
121 * Return the error code returned by regcomp or regexec into error messages
122 * @param errcode the error code returned by regexec or regcomp
123 * @param preg The precompiled regex
124 * @param errbuf A buffer to store the error in
125 * @param errbuf_size The size of the buffer
126 */
127apr_size_t ap_regerror(int errcode, const ap_regex_t *preg,
128 char *errbuf, apr_size_t errbuf_size);
129
130/** Destroy a pre-compiled regex.
131 * @param preg The pre-compiled regex to free.
132 */
133void ap_regfree(ap_regex_t *preg);
134
135/**
136 * Compile a regular expression to be used later
137 * @param p The pool to allocate from
138 * @param pattern the regular expression to compile
139 * @param cflags The bitwise or of one or more of the following:
140 * @li REG_EXTENDED - Use POSIX extended Regular Expressions
141 * @li REG_ICASE - Ignore case
142 * @li REG_NOSUB - Support for substring addressing of matches
143 * not required
144 * @li REG_NEWLINE - Match-any-character operators don't match new-line
145 * @return The compiled regular expression
146 */
147ap_regex_t * ap_pregcomp(apr_pool_t *p, const char *pattern,
148 int cflags);
149
150/**
151 * Free the memory associated with a compiled regular expression
152 * @param p The pool the regex was allocated from
153 * @param reg The regular expression to free
154 */
155void ap_pregfree(apr_pool_t *p, ap_regex_t *reg);
156
157/**
158 * After performing a successful regex match, you may use this function to
159 * perform a series of string substitutions based on subexpressions that were
160 * matched during the call to ap_regexec
161 * @param p The pool to allocate from
162 * @param input An arbitrary string containing $1 through $9. These are
163 * replaced with the corresponding matched sub-expressions
164 * @param source The string that was originally matched to the regex
165 * @param nmatch the nmatch returned from ap_pregex
166 * @param pmatch the pmatch array returned from ap_pregex
167 */
168char * ap_pregsub(apr_pool_t *p, const char *input, const char *source,
169 size_t nmatch, ap_regmatch_t pmatch[]);
170
171#ifdef __cplusplus
172} /* extern "C" */
173#endif
174
175#endif /* AP_REGEX_T */
176
diff --git a/utility/config.c b/utility/config.c
index 6b3dce1..847d474 100644
--- a/utility/config.c
+++ b/utility/config.c
@@ -3,53 +3,61 @@
3#include "apr_file_io.h" 3#include "apr_file_io.h"
4#include "apr_strings.h" 4#include "apr_strings.h"
5#include "apr_hash.h" 5#include "apr_hash.h"
6#include "apr_lib.h" 6
7#include "shell.h" 7#include "shell.h"
8#include "config.h" 8#include "config.h"
9#include "util.h"
10#include "logparse.h"
9 11
10apr_hash_t *g_config_opts; 12apr_hash_t *g_config_opts;
11 13
12apr_status_t config_set_string(config_t *cfg, config_opt_t *opt, int argc, 14static apr_status_t config_set_string(config_t *cfg, config_opt_t *opt,
13 const char **argv) 15 int argc, const char **argv)
14{ 16{
15 int offset = (int)(long)opt->data; 17 int offset = (int)(long)opt->data;
16 char **data = (char **)((void *)cfg + offset); 18 char **data = (char **)((void *)cfg + offset);
17 if (argc != 2) return APR_EINVAL; 19 if (argc != 2)
20 return APR_EINVAL;
18 *data = apr_pstrdup(cfg->pool, argv[1]); 21 *data = apr_pstrdup(cfg->pool, argv[1]);
19 return APR_SUCCESS; 22 return APR_SUCCESS;
20} 23}
21 24
22apr_status_t config_set_int(config_t *cfg, config_opt_t *opt, int argc, 25static apr_status_t config_set_int(config_t *cfg, config_opt_t *opt, int argc,
26 const char **argv) __attribute__ ((__unused__));
27static apr_status_t config_set_int(config_t *cfg, config_opt_t *opt, int argc,
23 const char **argv) 28 const char **argv)
24{ 29{
25 int offset = (int)(long)opt->data; 30 int offset = (int)(long)opt->data;
26 int *data = (int *)((void *)cfg + offset); 31 int *data = (int *)((void *)cfg + offset);
27 if (argc != 2) return APR_EINVAL; 32 if (argc != 2)
33 return APR_EINVAL;
28 *data = apr_atoi64(argv[1]); 34 *data = apr_atoi64(argv[1]);
29 return APR_SUCCESS; 35 return APR_SUCCESS;
30} 36}
31 37
32apr_status_t config_set_flag(config_t *cfg, config_opt_t *opt, int argc, 38static apr_status_t config_set_flag(config_t *cfg, config_opt_t *opt, int argc,
33 const char **argv) 39 const char **argv)
34{ 40{
35 int offset = (int)(long)opt->data; 41 int offset = (int)(long)opt->data;
36 int *data = (int *)((void *)cfg + offset); 42 int *data = (int *)((void *)cfg + offset);
37 if (argc != 2) return APR_EINVAL; 43 if (argc != 2)
44 return APR_EINVAL;
38 *data = CHECK_YESNO(argv[1]); 45 *data = CHECK_YESNO(argv[1]);
39 return APR_SUCCESS; 46 return APR_SUCCESS;
40} 47}
41 48
42apr_status_t config_set_loglevel(config_t *cfg, config_opt_t *opt, int argc, 49static apr_status_t config_set_loglevel(config_t *cfg, config_opt_t *opt,
43 const char **argv) 50 int argc, const char **argv)
44{ 51{
45 if (argc != 2) return APR_EINVAL; 52 if (argc != 2)
46 if (!strcasecmp(argv[1],"error")) { 53 return APR_EINVAL;
54 if (!strcasecmp(argv[1], "error")) {
47 cfg->loglevel = LOGLEVEL_ERROR; 55 cfg->loglevel = LOGLEVEL_ERROR;
48 } else if (!strcasecmp(argv[1],"warn")) { 56 } else if (!strcasecmp(argv[1], "warn")) {
49 cfg->loglevel = LOGLEVEL_WARN; 57 cfg->loglevel = LOGLEVEL_WARN;
50 } else if (!strcasecmp(argv[1],"debug")) { 58 } else if (!strcasecmp(argv[1], "debug")) {
51 cfg->loglevel = LOGLEVEL_DEBUG; 59 cfg->loglevel = LOGLEVEL_DEBUG;
52 } else if (!strcasecmp(argv[1],"quiet")) { 60 } else if (!strcasecmp(argv[1], "quiet")) {
53 cfg->loglevel = LOGLEVEL_QUIET; 61 cfg->loglevel = LOGLEVEL_QUIET;
54 } else { 62 } else {
55 cfg->loglevel = LOGLEVEL_ERROR; 63 cfg->loglevel = LOGLEVEL_ERROR;
@@ -57,49 +65,51 @@ apr_status_t config_set_loglevel(config_t *cfg, config_opt_t *opt, int argc,
57 return APR_SUCCESS; 65 return APR_SUCCESS;
58} 66}
59 67
60apr_status_t config_set_dbconnect(config_t *cfg, config_opt_t *opt, int argc, 68static apr_status_t config_set_dbconnect(config_t *cfg, config_opt_t *opt,
61 const char **argv) 69 int argc, const char **argv)
62{ 70{
63 return APR_SUCCESS; 71 return APR_SUCCESS;
64} 72}
65 73
66apr_status_t config_set_dbparam(config_t *cfg, config_opt_t *opt, int argc, 74static apr_status_t config_set_dbparam(config_t *cfg, config_opt_t *opt,
67 const char **argv) 75 int argc, const char **argv)
68{ 76{
69 return APR_SUCCESS; 77 return APR_SUCCESS;
70} 78}
71 79
72apr_status_t config_set_inputfile(config_t *cfg, config_opt_t *opt, int argc, 80static apr_status_t config_set_inputfile(config_t *cfg, config_opt_t *opt,
73 const char **argv) 81 int argc, const char **argv)
74{ 82{
75 char **newp; 83 char **newp;
76 if (argc != 2) return APR_EINVAL; 84 if (argc != 2)
85 return APR_EINVAL;
77 newp = (char **)apr_array_push(cfg->input_files); 86 newp = (char **)apr_array_push(cfg->input_files);
78 *newp = apr_pstrdup(cfg->pool, argv[1]); 87 *newp = apr_pstrdup(cfg->pool, argv[1]);
79 return APR_SUCCESS; 88 return APR_SUCCESS;
80} 89}
81 90
82apr_status_t config_set_dummy(config_t *cfg, config_opt_t *opt, int argc, 91static apr_status_t config_set_dummy(config_t *cfg, config_opt_t *opt,
83 const char **argv) 92 int argc, const char **argv)
84{ 93{
85 return APR_SUCCESS; 94 return APR_SUCCESS;
86} 95}
87 96
88apr_status_t config_set_logformat(config_t *cfg, config_opt_t *opt, int argc, 97static apr_status_t config_set_logformat(config_t *cfg, config_opt_t *opt,
89 const char **argv) 98 int argc, const char **argv)
90{ 99{
91 config_logformat_t *format; 100 config_logformat_t *format;
92 config_logformat_field_t *field; 101 config_logformat_field_t *field;
93 102
94 if (argc != 4) return APR_EINVAL; 103 if (argc != 4)
104 return APR_EINVAL;
95 105
96 format = apr_hash_get(cfg->log_formats,argv[1],APR_HASH_KEY_STRING); 106 format = apr_hash_get(cfg->log_formats, argv[1], APR_HASH_KEY_STRING);
97 if (!format) { 107 if (!format) {
98 format = apr_palloc(cfg->pool, sizeof(config_logformat_t)); 108 format = apr_palloc(cfg->pool, sizeof(config_logformat_t));
99 format->name = apr_pstrdup(cfg->pool, argv[1]); 109 format->name = apr_pstrdup(cfg->pool, argv[1]);
100 format->fields = apr_array_make(cfg->pool, 5, 110 format->fields = apr_array_make(cfg->pool, 5,
101 sizeof(config_logformat_field_t)); 111 sizeof(config_logformat_field_t));
102 apr_hash_set(cfg->log_formats, argv[1], APR_HASH_KEY_STRING, format); 112 apr_hash_set(cfg->log_formats, apr_pstrdup(cfg->pool,argv[1]), APR_HASH_KEY_STRING, format);
103 } 113 }
104 field = (config_logformat_field_t *)apr_array_push(format->fields); 114 field = (config_logformat_field_t *)apr_array_push(format->fields);
105 field->name = apr_pstrdup(cfg->pool, argv[2]); 115 field->name = apr_pstrdup(cfg->pool, argv[2]);
@@ -107,18 +117,100 @@ apr_status_t config_set_logformat(config_t *cfg, config_opt_t *opt, int argc,
107 return APR_SUCCESS; 117 return APR_SUCCESS;
108} 118}
109 119
120static apr_status_t config_set_output_field(config_t *cfg, config_opt_t *opt,
121 int argc, const char **argv)
122{
123 config_output_field_t *field;
124 char *type, *size, *temp;
125
126 if (argc < 4)
127 return APR_EINVAL;
128 field = (config_output_field_t *)apr_array_push(cfg->output_fields);
129 field->field = apr_pstrdup(cfg->pool, argv[1]);
130 field->source = apr_pstrdup(cfg->pool, argv[3]);
131
132 type = size = apr_pstrdup(cfg->pool, argv[2]);
133 while (*size!='\0' && *size!='(')
134 size++;
135 if (*size == '(') {
136 *size = '\0';
137 size++;
138 temp = size;
139 while (*temp != '\0' && *temp != ')')
140 temp++;
141 *temp = '\0';
142 field->size = apr_atoi64(size);
143 }
144 if (strcasecmp("VARCHAR", type)==0) {
145 field->datatype = LOGSQL_DATATYPE_VARCHAR;
146 } else if (strcasecmp("INT", type)==0) {
147 field->datatype = LOGSQL_DATATYPE_INT;
148 } else if (strcasecmp("CHAR", type)==0) {
149 field->datatype = LOGSQL_DATATYPE_CHAR;
150 } else if (strcasecmp("SMALLINT", type)==0) {
151 field->datatype = LOGSQL_DATATYPE_SMALLINT;
152 } else if (strcasecmp("BIGINT", type)==0) {
153 field->datatype = LOGSQL_DATATYPE_BIGINT;
154 } else {
155 return APR_EINVAL;
156 }
157
158 // Has a function
159 if (argc > 4) {
160 int i;
161 field->fname = apr_pstrdup(cfg->pool, argv[4]);
162 field->func = parser_get_func(field->fname);
163 field->args = apr_pcalloc(cfg->pool, sizeof(char *) * (argc-4+1));
164 for (i=5; i<=argc; i++) {
165 field->args[i-5] = apr_pstrdup(cfg->pool, argv[i]);
166 }
167 }
168
169 return APR_SUCCESS;
170}
171
172static apr_status_t config_set_filter(config_t *cfg, config_opt_t *opt,
173 int argc, const char **argv)
174{
175 int argn = 1;
176 config_filter_t *filter;
177 filter = apr_pcalloc(cfg->pool, sizeof(config_filter_t));
178
179 if (opt->name[0]!='L') { // Pre or post 2-3 args
180 if (argc == 1)
181 return APR_EINVAL;
182 filter->field = apr_pstrdup(cfg->pool, argv[1]);
183 argn++;
184 } // Otherwise Line based only 1-2 args (no field)
185 if (argc <= argn)
186 return APR_EINVAL;
187 if (argv[argn][0] == '+')
188 argn++;
189 if (argv[argn][0] == '-') {
190 filter->negative = 1;
191 argn++;
192 }
193 if (argc <= argn)
194 return APR_EINVAL;
195 filter->filter = apr_pstrdup(cfg->pool, argv[argn]);
196 filter->regex = ap_pregcomp(cfg->pool, filter->filter, AP_REG_ICASE);
197 return APR_SUCCESS;
198}
199
110void config_dump(config_t *cfg) 200void config_dump(config_t *cfg)
111{ 201{
112 apr_hash_index_t *hi; 202 apr_hash_index_t *hi;
203 int i;
204 config_output_field_t *fields;
113 205
114 printf("ErrorLog: %s\n",cfg->errorlog); 206 printf("ErrorLog: %s\n", cfg->errorlog);
115 printf("LogLevel: %d\n",cfg->loglevel); 207 printf("LogLevel: %d\n", cfg->loglevel);
116 208
117 printf("InputDir: %s\n",cfg->input_dir); 209 printf("InputDir: %s\n", cfg->input_dir);
118 210
119 printf("Table: %s\n",cfg->table); 211 printf("Table: %s\n", cfg->table);
120 printf("Transactions: %d\n",cfg->transactions); 212 printf("Transactions: %d\n", cfg->transactions);
121 printf("MachineID: %s\n",cfg->machineid); 213 printf("MachineID: %s\n", cfg->machineid);
122 214
123 printf("Log formats:\n"); 215 printf("Log formats:\n");
124 for (hi = apr_hash_first(cfg->pool, cfg->log_formats); hi; hi 216 for (hi = apr_hash_first(cfg->pool, cfg->log_formats); hi; hi
@@ -128,31 +220,39 @@ void config_dump(config_t *cfg)
128 int i; 220 int i;
129 221
130 apr_hash_this(hi, NULL, NULL, (void **)&format); 222 apr_hash_this(hi, NULL, NULL, (void **)&format);
131 printf(">> %s\n",format->name); 223 printf(">> '%s'\n", format->name);
132 fields = (config_logformat_field_t *)format->fields->elts; 224 fields = (config_logformat_field_t *)format->fields->elts;
133 for (i=0; i<format->fields->nelts; i++) { 225 for (i=0; i<format->fields->nelts; i++) {
134 printf(">>>> %s:%s\n", fields[i].name, fields[i].datatype); 226 printf(">>>> %s:%s\n", fields[i].name, fields[i].datatype);
135 } 227 }
136 } 228 }
137 printf("Log Format: %s\n",cfg->logformat); 229 printf("Log Format: '%s'\n", cfg->logformat);
138 230
139 printf("DryRun: %d\n",cfg->dryrun); 231 printf("Output Fields:\n");
140 printf("Summary: %d\n",cfg->summary); 232 fields = (config_output_field_t *)cfg->output_fields->elts;
141} 233 for (i=0; i<cfg->output_fields->nelts; i++) {
142 234 printf(">> %s %s(%d): %s", fields[i].field, logsql_field_datatyeName(fields[i].datatype), fields[i].size, fields[i].source);
143static char *lowerstr(apr_pool_t *pool, const char *input) { 235 if (fields[i].func) {
144 char *temp; 236 printf(" :: %s(", fields[i].fname);
145 char *itr; 237 if (fields[i].args) {
146 temp = apr_pstrdup(pool, input); 238 int a = 0;
147 for (itr=temp; *itr!='\0'; itr++) { 239 while (fields[i].args[a]) {
148 *itr = apr_tolower(*itr); 240 printf("%s,", fields[i].args[a]);
241 a++;
242 }
243 }
244 printf(")");
149 } 245 }
150 return temp; 246 printf("\n");
247 }
248
249 printf("DryRun: %d\n", cfg->dryrun);
250 printf("Summary: %d\n", cfg->summary);
151} 251}
152 252
153#define config_get_option(name) apr_hash_get(g_config_opts, name, APR_HASH_KEY_STRING) 253#define config_get_option(name) apr_hash_get(g_config_opts, name, APR_HASH_KEY_STRING)
154 254
155void config_add_option(apr_pool_t *p, const char *const name, 255static void config_add_option(apr_pool_t *p, const char *const name,
156 const char *const help, config_func_t func, void *data) 256 const char *const help, config_func_t func, void *data)
157{ 257{
158 config_opt_t *opt; 258 config_opt_t *opt;
@@ -164,22 +264,24 @@ void config_add_option(apr_pool_t *p, const char *const name,
164 opt->help = help; 264 opt->help = help;
165 opt->func = func; 265 opt->func = func;
166 opt->data = data; 266 opt->data = data;
167 apr_hash_set(g_config_opts, lowerstr(p,name), APR_HASH_KEY_STRING, opt); 267 apr_hash_set(g_config_opts, lowerstr(p, name), APR_HASH_KEY_STRING, opt);
168} 268}
169 269
170void config_init(apr_pool_t *p) 270void config_init(apr_pool_t *p)
171{ 271{
172 config_add_option(p, "ErrorLog", "File to log errors", 272 config_add_option(p, "ErrorLog", "File to log errors", config_set_string,
173 config_set_string, (void *)APR_OFFSETOF(config_t, errorlog)); 273 (void *)APR_OFFSETOF(config_t, errorlog));
174 config_add_option(p, "LogLevel", "Set Log Level (error, warn, debug, quiet)", 274 config_add_option(p, "LogLevel",
175 config_set_loglevel, NULL); 275 "Set Log Level (error, warn, debug, quiet)", config_set_loglevel,
276 NULL);
176 277
177 config_add_option(p, "InputDirectory", "Directory to scan for log files", 278 config_add_option(p, "InputDirectory", "Directory to scan for log files",
178 config_set_string, (void *)APR_OFFSETOF(config_t, input_dir)); 279 config_set_string, (void *)APR_OFFSETOF(config_t, input_dir));
179 config_add_option(p, "InputFile", "Parse only this file", 280 config_add_option(p, "InputFile", "Parse only this file",
180 config_set_inputfile, NULL); 281 config_set_inputfile, NULL);
181 282
182 config_add_option(p, "DBConnect", "DB Connection information type://user:pass@hostname/database", 283 config_add_option(p, "DBConnect",
284 "DB Connection information type://user:pass@hostname/database",
183 config_set_dbconnect, NULL); 285 config_set_dbconnect, NULL);
184 config_add_option(p, "DBParam", "DB Connection Parameter", 286 config_add_option(p, "DBParam", "DB Connection Parameter",
185 config_set_dbparam, NULL); 287 config_set_dbparam, NULL);
@@ -187,14 +289,28 @@ void config_init(apr_pool_t *p)
187 config_set_string, (void *)APR_OFFSETOF(config_t, table)); 289 config_set_string, (void *)APR_OFFSETOF(config_t, table));
188 config_add_option(p, "UseTransactions", "Enable Transactions?", 290 config_add_option(p, "UseTransactions", "Enable Transactions?",
189 config_set_flag, (void *)APR_OFFSETOF(config_t, transactions)); 291 config_set_flag, (void *)APR_OFFSETOF(config_t, transactions));
190 config_add_option(p, "MachineID", "Machine ID to set", 292 config_add_option(p, "MachineID", "Machine ID to set", config_set_string,
191 config_set_string, (void *)APR_OFFSETOF(config_t, machineid)); 293 (void *)APR_OFFSETOF(config_t, machineid));
192 294
193 config_add_option(p, "LogFormatConfig", "Define input log formats", 295 config_add_option(p, "LogFormatConfig", "Define input log formats",
194 config_set_logformat, NULL); 296 config_set_logformat, NULL);
195 config_add_option(p, "LogFormat", "Use this logformat when parsing files", 297 config_add_option(p, "LogFormat", "Use this logformat when parsing files",
196 config_set_string, (void *)APR_OFFSETOF(config_t, logformat)); 298 config_set_string, (void *)APR_OFFSETOF(config_t, logformat));
197 299
300 config_add_option(p, "LineFilter",
301 "A regular expression to apply to the input line",
302 config_set_filter, (void *)APR_OFFSETOF(config_t, linefilters));
303 config_add_option(p, "PreFilter",
304 "A regular expression to apply to a specific input field",
305 config_set_filter, (void *)APR_OFFSETOF(config_t, prefilters));
306 config_add_option(p, "PostFilter",
307 "A regular expression to apply to a specific SQL output field",
308 config_set_filter, (void *)APR_OFFSETOF(config_t, postfilters));
309
310 config_add_option(p, "OutputField",
311 "Define output fields: field datatype source optfunc optarg...",
312 config_set_output_field, NULL);
313
198 config_add_option(p, "DryRun", "Don't perform any actual database changes", 314 config_add_option(p, "DryRun", "Don't perform any actual database changes",
199 config_set_flag, (void *)APR_OFFSETOF(config_t, dryrun)); 315 config_set_flag, (void *)APR_OFFSETOF(config_t, dryrun));
200 config_add_option(p, "Config", "Dummy to handle config directive", 316 config_add_option(p, "Config", "Dummy to handle config directive",
@@ -216,16 +332,20 @@ config_t *config_create(apr_pool_t *p)
216 cfg->input_files = apr_array_make(cfg->pool, 10, sizeof(char *)); 332 cfg->input_files = apr_array_make(cfg->pool, 10, sizeof(char *));
217 cfg->dbconfig = apr_table_make(cfg->pool, 5); 333 cfg->dbconfig = apr_table_make(cfg->pool, 5);
218 cfg->log_formats = apr_hash_make(cfg->pool); 334 cfg->log_formats = apr_hash_make(cfg->pool);
219 335 cfg->output_fields = apr_array_make(cfg->pool, 10,
336 sizeof(config_output_field_t));
220 return cfg; 337 return cfg;
221} 338}
222 339
223int config_merge(void *rec, const char *key, const char *value) { 340static int config_merge(void *rec, const char *key, const char *value)
341{
224 config_t *cfg = (config_t *)rec; 342 config_t *cfg = (config_t *)rec;
225 343
226 config_opt_t *opt = config_get_option(key); 344 config_opt_t *opt= config_get_option(key);
227 if (opt) { 345 if (opt) {
228 const char *args[] = {key, value}; 346 const char *args[] = {
347 key,
348 value };
229 opt->func(cfg, opt, 2, args); 349 opt->func(cfg, opt, 2, args);
230 } else { 350 } else {
231 printf("Unhandled: %s\n", key); 351 printf("Unhandled: %s\n", key);
@@ -238,11 +358,11 @@ apr_status_t config_read(config_t *cfg, const char *filename,
238{ 358{
239 apr_finfo_t finfo; 359 apr_finfo_t finfo;
240 apr_file_t *file; 360 apr_file_t *file;
241 apr_status_t rv, ret = APR_SUCCESS; 361 apr_status_t rv, ret= APR_SUCCESS;
242 apr_pool_t *tp, *targp; 362 apr_pool_t *tp, *targp;
243 config_opt_t *opt; 363 config_opt_t *opt;
244 char buff[1024]; 364 char buff[1024];
245 char *ptr, *ptr2; 365 char *ptr;
246 char **targv; 366 char **targv;
247 int targc; 367 int targc;
248 int line; 368 int line;
@@ -254,7 +374,7 @@ apr_status_t config_read(config_t *cfg, const char *filename,
254 return APR_ENOENT; 374 return APR_ENOENT;
255 } 375 }
256 rv = apr_file_open(&file, filename, APR_FOPEN_READ | APR_BUFFERED, 376 rv = apr_file_open(&file, filename, APR_FOPEN_READ | APR_BUFFERED,
257 APR_OS_DEFAULT, tp); 377 APR_OS_DEFAULT, tp);
258 if (rv != APR_SUCCESS) 378 if (rv != APR_SUCCESS)
259 return rv; 379 return rv;
260 380
@@ -267,10 +387,7 @@ apr_status_t config_read(config_t *cfg, const char *filename,
267 // skip leading white space 387 // skip leading white space
268 for (ptr = buff; *ptr == ' ' || *ptr == '\t'; ptr++) 388 for (ptr = buff; *ptr == ' ' || *ptr == '\t'; ptr++)
269 ; 389 ;
270 // chomp off newline 390 line_chomp(ptr);
271 for (ptr2 = ptr + strlen(ptr); *ptr2 != '\r' && *ptr2 != '\n'; ptr2--)
272 ;
273 *ptr2 = '\0';
274 391
275 // skip comments 392 // skip comments
276 if (*ptr == '#') 393 if (*ptr == '#')
@@ -278,9 +395,10 @@ apr_status_t config_read(config_t *cfg, const char *filename,
278 if (*ptr == '\0') 395 if (*ptr == '\0')
279 continue; 396 continue;
280 apr_pool_clear(targp); 397 apr_pool_clear(targp);
281 apr_tokenize_to_argv(buff, &targv, targp); 398 apr_tokenize_to_argv(ptr, &targv, targp);
282 targc = 0; 399 targc = 0;
283 while (targv[targc]) targc++; 400 while (targv[targc])
401 targc++;
284 opt = config_get_option(lowerstr(targp,targv[0])); 402 opt = config_get_option(lowerstr(targp,targv[0]));
285 if (opt) { 403 if (opt) {
286 rv = opt->func(cfg, opt, targc, (const char **)targv); 404 rv = opt->func(cfg, opt, targc, (const char **)targv);
@@ -296,10 +414,9 @@ apr_status_t config_read(config_t *cfg, const char *filename,
296 } while (rv == APR_SUCCESS); 414 } while (rv == APR_SUCCESS);
297 415
298 // Apply merges 416 // Apply merges
299 apr_table_do(config_merge,(void *)cfg,merge,NULL); 417 apr_table_do(config_merge, (void *)cfg, merge, NULL);
300 418
301 apr_file_close(file); 419 apr_file_close(file);
302 apr_pool_destroy(tp); 420 apr_pool_destroy(tp);
303 return ret; 421 return ret;
304} 422}
305
diff --git a/utility/config.h b/utility/config.h
index e1827fe..67f8ea5 100644
--- a/utility/config.h
+++ b/utility/config.h
@@ -5,6 +5,8 @@
5#include "apr_hash.h" 5#include "apr_hash.h"
6#include "apr_file_io.h" 6#include "apr_file_io.h"
7 7
8#include "ap_pcre.h"
9
8typedef enum { 10typedef enum {
9 LOGLEVEL_QUIET = 0, 11 LOGLEVEL_QUIET = 0,
10 LOGLEVEL_ERROR = 1, 12 LOGLEVEL_ERROR = 1,
@@ -12,7 +14,8 @@ typedef enum {
12 LOGLEVEL_DEBUG = 3, 14 LOGLEVEL_DEBUG = 3,
13} loglevel_e; 15} loglevel_e;
14 16
15typedef struct { 17typedef struct config_t config_t;
18struct config_t {
16 /** the structures pool (to ease function arguments) */ 19 /** the structures pool (to ease function arguments) */
17 apr_pool_t *pool; 20 apr_pool_t *pool;
18 21
@@ -46,29 +49,33 @@ typedef struct {
46 apr_array_header_t *output_fields; 49 apr_array_header_t *output_fields;
47 50
48 /** filter configuration */ 51 /** filter configuration */
49 apr_array_header_t *filters; 52 apr_array_header_t *linefilters;
53 apr_array_header_t *prefilters;
54 apr_array_header_t *postfilters;
50 55
51 /** Dry Run */ 56 /** Dry Run */
52 int dryrun; 57 int dryrun;
53 58
54 /* Show the summary */ 59 /* Show the summary */
55 int summary; 60 int summary;
56} config_t; 61};
62
57 63
58typedef struct { 64typedef struct config_logformat_t config_logformat_t;
65struct config_logformat_t {
59 const char *name; 66 const char *name;
60 apr_array_header_t *fields; 67 apr_array_header_t *fields;
61} config_logformat_t; 68};
62 69
63typedef struct { 70typedef struct config_logformat_field_t config_logformat_field_t;
71struct config_logformat_field_t {
64 const char *name; 72 const char *name;
65 const char *datatype; 73 const char *datatype;
66} config_logformat_field_t; 74};
67 75
68typedef struct config_opt_t config_opt_t; 76typedef struct config_opt_t config_opt_t;
69typedef apr_status_t (*config_func_t)(config_t *cfg, config_opt_t *opt, 77typedef apr_status_t (*config_func_t)(config_t *cfg, config_opt_t *opt,
70 int argc, const char **argv); 78 int argc, const char **argv);
71
72struct config_opt_t { 79struct config_opt_t {
73 const char *name; 80 const char *name;
74 const char *help; 81 const char *help;
@@ -76,6 +83,40 @@ struct config_opt_t {
76 void *data; 83 void *data;
77}; 84};
78 85
86typedef struct config_filter_t config_filter_t;
87struct config_filter_t {
88 const char *field;
89 const char *filter;
90 int negative;
91 ap_regex_t *regex;
92};
93
94typedef enum {
95 LOGSQL_DATATYPE_INT = 0,
96 LOGSQL_DATATYPE_SMALLINT,
97 LOGSQL_DATATYPE_VARCHAR,
98 LOGSQL_DATATYPE_CHAR,
99 LOGSQL_DATATYPE_BIGINT
100} logsql_field_datatype;
101#define logsql_field_datatyeName(x) \
102 (x == LOGSQL_DATATYPE_INT ? "INT" \
103 : (x == LOGSQL_DATATYPE_SMALLINT ? "SMALLINT" \
104 : (x == LOGSQL_DATATYPE_VARCHAR ? "VARCHAR" \
105 : (x == LOGSQL_DATATYPE_CHAR ? "CHAR" \
106 : (x == LOGSQL_DATATYPE_BIGINT ? "BIGINT" : "ERR")))))
107
108typedef struct config_output_field_t config_output_field_t;
109
110struct config_output_field_t {
111 const char *field;
112 logsql_field_datatype datatype;
113 apr_size_t size;
114 const char *source;
115 const char *fname;
116 void *func;
117 const char **args;
118};
119
79#define CHECK_YESNO(c) ((!strcasecmp(c,"on") || !strcasecmp(c,"yes")) ? 1 : 0) 120#define CHECK_YESNO(c) ((!strcasecmp(c,"on") || !strcasecmp(c,"yes")) ? 1 : 0)
80 121
81/** 122/**
@@ -99,6 +140,4 @@ config_t *config_create(apr_pool_t *p);
99apr_status_t config_read(config_t *cfg, const char *filename, 140apr_status_t config_read(config_t *cfg, const char *filename,
100 apr_table_t *merge); 141 apr_table_t *merge);
101 142
102void config_generate(const char *filename);
103
104#endif /*CONFIG_H_*/ 143#endif /*CONFIG_H_*/
diff --git a/utility/database.c b/utility/database.c
new file mode 100644
index 0000000..5fece50
--- /dev/null
+++ b/utility/database.c
@@ -0,0 +1,2 @@
1#include "database.h"
2
diff --git a/utility/database.h b/utility/database.h
new file mode 100644
index 0000000..9fc4844
--- /dev/null
+++ b/utility/database.h
@@ -0,0 +1,4 @@
1#ifndef DATABASE_H_
2#define DATABASE_H_
3
4#endif /*DATABASE_H_*/
diff --git a/utility/logparse.c b/utility/logparse.c
index 2940534..4d823ce 100644
--- a/utility/logparse.c
+++ b/utility/logparse.c
@@ -3,7 +3,35 @@
3#include "apr_file_io.h" 3#include "apr_file_io.h"
4#include "apr_strings.h" 4#include "apr_strings.h"
5 5
6void find_log_files(config_t *cfg) 6#include "util.h"
7
8apr_hash_t *g_parser_funcs;
9
10static apr_status_t parser_func_regexmatch(config_t *cfg, const char *data,
11 int argc, const char **argv)
12{
13 return APR_SUCCESS;
14}
15parser_func_t parser_get_func(const char *name)
16{
17 return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING);
18}
19
20static void parser_add_func(apr_pool_t *p, const char *const name,
21 parser_func_t func)
22{
23 if (!g_parser_funcs) {
24 g_parser_funcs = apr_hash_make(p);
25 }
26 apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, func);
27}
28
29void parser_init(apr_pool_t *p)
30{
31 parser_add_func(p, "regexmatch", parser_func_regexmatch);
32}
33
34void parser_find_logs(config_t *cfg)
7{ 35{
8 apr_pool_t *tp; 36 apr_pool_t *tp;
9 apr_dir_t *dir; 37 apr_dir_t *dir;
@@ -39,7 +67,7 @@ void find_log_files(config_t *cfg)
39 * found during parsing of the arg_str. 67 * found during parsing of the arg_str.
40 * keepquotes: Keep the quotes instead of stripping them 68 * keepquotes: Keep the quotes instead of stripping them
41 */ 69 */
42apr_status_t tokenize_logline(const char *arg_str, char ***argv_out, 70static apr_status_t tokenize_logline(const char *arg_str, char ***argv_out,
43 apr_pool_t *token_context, int keepquotes) 71 apr_pool_t *token_context, int keepquotes)
44{ 72{
45 const char *cp; 73 const char *cp;
@@ -157,7 +185,7 @@ apr_status_t tokenize_logline(const char *arg_str, char ***argv_out,
157 185
158apr_status_t parse_logfile(config_t *cfg, const char *filename) 186apr_status_t parse_logfile(config_t *cfg, const char *filename)
159{ 187{
160 apr_pool_t *tp, *argp; 188 apr_pool_t *tp, *targp;
161 apr_file_t *file; 189 apr_file_t *file;
162 apr_status_t rv; 190 apr_status_t rv;
163 char buff[2048]; 191 char buff[2048];
@@ -166,7 +194,7 @@ apr_status_t parse_logfile(config_t *cfg, const char *filename)
166 int line; 194 int line;
167 195
168 apr_pool_create(&tp, cfg->pool); 196 apr_pool_create(&tp, cfg->pool);
169 apr_pool_create(&argp, tp); 197 apr_pool_create(&targp, tp);
170 198
171 rv = apr_file_open(&file, filename, APR_FOPEN_READ | APR_BUFFERED, 199 rv = apr_file_open(&file, filename, APR_FOPEN_READ | APR_BUFFERED,
172 APR_OS_DEFAULT, tp); 200 APR_OS_DEFAULT, tp);
@@ -180,16 +208,16 @@ apr_status_t parse_logfile(config_t *cfg, const char *filename)
180 rv = apr_file_gets(buff, 1024, file); 208 rv = apr_file_gets(buff, 1024, file);
181 if (rv == APR_SUCCESS) { 209 if (rv == APR_SUCCESS) {
182 line++; 210 line++;
183 char *ptr;
184 // chomp off newline 211 // chomp off newline
185 for (ptr = buff + strlen(buff); *ptr != '\r' && *ptr != '\n'; ptr--) 212 line_chomp(buff);
186 ; 213
187 *ptr = '\0'; 214 apr_pool_clear(targp);
188 apr_pool_clear(argp); 215 tokenize_logline(buff, &targv, targp, 1);
189 tokenize_logline(buff, &targv, argp, 1);
190 targc = 0; 216 targc = 0;
191 while (targv[targc]) targc++; 217 while (targv[targc]) targc++;
192 if (targc != 9) { 218 /** @todo Run Line Filters here */
219 rv = parse_processline(targp, cfg, targv, targc);
220 if (rv != APR_SUCCESS) {
193 int i; 221 int i;
194 printf("Line %d(%d): %s\n",line, targc, buff); 222 printf("Line %d(%d): %s\n",line, targc, buff);
195 for (i = 0; targv[i]; i++) { 223 for (i = 0; targv[i]; i++) {
@@ -203,3 +231,46 @@ apr_status_t parse_logfile(config_t *cfg, const char *filename)
203 apr_pool_destroy(tp); 231 apr_pool_destroy(tp);
204 return APR_SUCCESS; 232 return APR_SUCCESS;
205} 233}
234
235apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, int argc)
236{
237 config_logformat_t *fmt;
238 config_logformat_field_t *ifields;
239 config_output_field_t *ofields;
240 apr_table_t *datain;
241 apr_table_t *dataout;
242 int i;
243
244 fmt = apr_hash_get(cfg->log_formats, cfg->logformat, APR_HASH_KEY_STRING);
245 if (!fmt) return APR_EINVAL;
246 if (fmt->fields->nelts != argc) return APR_EINVAL;
247
248 datain = apr_table_make(ptemp, fmt->fields->nelts);
249 dataout = apr_table_make(ptemp, cfg->output_fields->nelts);
250
251 ifields = (config_logformat_field_t *)fmt->fields->elts;
252 for (i=0; i<fmt->fields->nelts; i++) {
253 apr_table_setn(datain,ifields[i].name,argv[i]);
254 }
255 /** @todo Run Pre Filters here */
256
257 // Convert input fields to output fields
258 ofields = (config_output_field_t *)cfg->output_fields->elts;
259 for (i=0; i<cfg->output_fields->nelts; i++) {
260 const char *t;
261 if (!ofields[i].func) {
262 t = apr_table_get(datain, ofields[i].source);
263 if (!t) {
264 return APR_EINVAL;
265 }
266 apr_table_setn(dataout,ofields[i].field, t);
267 printf("S: %s = %s\n",ofields[i].source, t);
268 } else {
269 printf("S: %s, F: %p\n",ofields[i].source, ofields[i].func);
270 }
271 }
272
273 /** @todo Run Post Filters here */
274
275 return APR_SUCCESS;
276}
diff --git a/utility/logparse.h b/utility/logparse.h
index 540a9e0..ebabf56 100644
--- a/utility/logparse.h
+++ b/utility/logparse.h
@@ -3,8 +3,17 @@
3 3
4#include "config.h" 4#include "config.h"
5 5
6void find_log_files(config_t *cfg); 6typedef apr_status_t (*parser_func_t)(config_t *cfg, const char *data,
7 int argc, const char **argv);
8
9parser_func_t parser_get_func(const char *name);
10
11void parser_init(apr_pool_t *p);
12
13void parser_find_logs(config_t *cfg);
7 14
8apr_status_t parse_logfile(config_t *cfg, const char *filename); 15apr_status_t parse_logfile(config_t *cfg, const char *filename);
9 16
17apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, int argc);
18
10#endif /*LOGPARSE_H_*/ 19#endif /*LOGPARSE_H_*/
diff --git a/utility/mod_log_sql.conf b/utility/mod_log_sql.conf
index 6cfae61..72f0205 100644
--- a/utility/mod_log_sql.conf
+++ b/utility/mod_log_sql.conf
@@ -9,7 +9,7 @@ LogLevel notice
9DryRun on 9DryRun on
10Summary on 10Summary on
11 11
12LogFormatConfig CLF host String 12LogFormatConfig CLF remhost String
13LogFormatConfig CLF ident String 13LogFormatConfig CLF ident String
14LogFormatConfig CLF user String 14LogFormatConfig CLF user String
15LogFormatConfig CLF date Date 15LogFormatConfig CLF date Date
@@ -17,7 +17,7 @@ LogFormatConfig CLF request String
17LogFormatConfig CLF status Number 17LogFormatConfig CLF status Number
18LogFormatConfig CLF bytes_sent Number 18LogFormatConfig CLF bytes_sent Number
19 19
20LogFormatConfig Combined host String 20LogFormatConfig Combined remhost String
21LogFormatConfig Combined ident String 21LogFormatConfig Combined ident String
22LogFormatConfig Combined user String 22LogFormatConfig Combined user String
23LogFormatConfig Combined date Date 23LogFormatConfig Combined date Date
@@ -28,3 +28,23 @@ LogFormatConfig Combined referer String
28LogFormatConfig Combined agent String 28LogFormatConfig Combined agent String
29 29
30LogFormat Combined 30LogFormat Combined
31
32# not yet implemented
33Linefilter - "BAD"
34PreFilter request - "GET \/images"
35PostFilter request_method "GET"
36
37# Usage field datatype(size) source [function [param]...]
38OutputField bytes_sent int bytes_sent
39OutputField request_protocol varchar(10) request regexmatch "(HTTP\/[\d\.]+)$"
40OutputField remote_host varchar(50) remhost
41OutputField request_method varchar(25) request regexmatch "^(\w+)"
42OutputField time_stamp int date totimestamp
43OutputField status smallint status
44OutputField request_uri varchar(255) request regexmatch "^\w+ (.+) \w+\.[\d\.]+$"
45OutputField remote_user varchar(50) user
46OutputField remote_logname varchar(50) ident
47OutputField remote_time char(28) date
48#Only used for Combined log input, if standard CLF input, they are ignored
49OutputField agent varchar(255) agent
50OutputField referer varchar(255) referer
diff --git a/utility/shell.c b/utility/shell.c
index 07d9da1..eaa7098 100644
--- a/utility/shell.c
+++ b/utility/shell.c
@@ -54,7 +54,7 @@ void show_help(const char *prog, const apr_getopt_option_t *opts, FILE *output)
54 54
55int main(int argc, const char *const argv[]) 55int main(int argc, const char *const argv[])
56{ 56{
57 apr_pool_t *pool; 57 apr_pool_t *pool, *ptemp;
58 apr_getopt_t *opts; 58 apr_getopt_t *opts;
59 int opt; 59 int opt;
60 const char *opt_arg; 60 const char *opt_arg;
@@ -69,12 +69,13 @@ int main(int argc, const char *const argv[])
69 fprintf(stderr, "Failed to create memory pool!\n"); 69 fprintf(stderr, "Failed to create memory pool!\n");
70 exit(1); 70 exit(1);
71 } 71 }
72 apr_pool_create(&ptemp, NULL);
72 73
73 /** Iterate over command line arguments 74 /** Iterate over command line arguments
74 * shoving args in a apr_table for processing later*/ 75 * shoving args in a apr_table for processing later*/
75 args = apr_table_make(pool, 5); 76 args = apr_table_make(ptemp, 5);
76 apr_table_setn(args, "config", "mod_log_sql.conf"); 77 apr_table_setn(args, "config", "mod_log_sql.conf");
77 apr_getopt_init(&opts, pool, argc, argv); 78 apr_getopt_init(&opts, ptemp, argc, argv);
78 while ((rv = apr_getopt_long(opts, _opt_config, &opt, &opt_arg)) == APR_SUCCESS) { 79 while ((rv = apr_getopt_long(opts, _opt_config, &opt, &opt_arg)) == APR_SUCCESS) {
79 switch (opt) { 80 switch (opt) {
80 case 'c': 81 case 'c':
@@ -122,17 +123,21 @@ int main(int argc, const char *const argv[])
122 } 123 }
123 124
124 // Process configuration file 125 // Process configuration file
126 parser_init(pool);
125 config_init(pool); 127 config_init(pool);
126 base = config_create(pool); 128 base = config_create(pool);
127 rv = config_read(base, apr_table_get(args,"Config"), args); 129 rv = config_read(base, apr_table_get(args,"Config"), args);
130 apr_pool_destroy(ptemp);
131
128 if (APR_STATUS_IS_ENOENT(rv)) { 132 if (APR_STATUS_IS_ENOENT(rv)) {
129 fprintf(stderr,"Could not load configuration file: %s\n",apr_table_get(args,"config")); 133 fprintf(stderr,"Could not load configuration file: %s\n",apr_table_get(args,"config"));
130 } else if (rv) { 134 } else if (rv) {
131 exit(1); 135 exit(1);
132 } 136 }
133 config_dump(base); 137 config_dump(base);
134 // Apply overrides from command line 138
135 find_log_files(base); 139 // Find files and parse
140 parser_find_logs(base);
136 if (!apr_is_empty_array(base->input_files)) { 141 if (!apr_is_empty_array(base->input_files)) {
137 char **filelist; 142 char **filelist;
138 int f, l; 143 int f, l;
diff --git a/utility/util.c b/utility/util.c
new file mode 100644
index 0000000..ef3ea68
--- /dev/null
+++ b/utility/util.c
@@ -0,0 +1,28 @@
1#include "util.h"
2#include "apr_strings.h"
3#include "apr_lib.h"
4
5
6char *lowerstr(apr_pool_t *pool, const char *input)
7{
8 char *temp;
9 char *itr;
10 temp = apr_pstrdup(pool, input);
11 for (itr=temp; *itr!='\0'; itr++) {
12 *itr = apr_tolower(*itr);
13 }
14 return temp;
15}
16
17void line_chomp(char *str)
18{
19 int len;
20 // chomp off newline
21 len = strlen(str);
22 if (len) {
23 while (str[len-1] == '\r' || str[len-1] == '\n') {
24 str[len-1] = '\0';
25 len--;
26 }
27 }
28}
diff --git a/utility/util.h b/utility/util.h
new file mode 100644
index 0000000..8c48474
--- /dev/null
+++ b/utility/util.h
@@ -0,0 +1,13 @@
1#ifndef UTIL_H_
2#define UTIL_H_
3
4#include "apr_pools.h"
5
6char *lowerstr(apr_pool_t *pool, const char *input);
7
8/**
9 * Chomp new line characters off the end of the line
10 */
11void line_chomp(char *str);
12
13#endif /*UTIL_H_*/