diff options
author | Edward Rudd | 2008-10-25 16:44:17 +0000 |
---|---|---|
committer | Edward Rudd | 2008-10-25 16:44:17 +0000 |
commit | 19e09e15d96fa891d18fd07bb0f751faa0a7fc7a (patch) | |
tree | 77da4c55a5c91713184a1344cca6270ae8fd0afe | |
parent | caae8dcfed1462cb19c82f99087e6fe2ba3d407c (diff) |
implement query arg extracter
"merge in" ap_unescape_url from HTTPD
updated logging statements
add per-line func data and per-func data
-rw-r--r-- | utility/config.c | 10 | ||||
-rw-r--r-- | utility/config.h | 4 | ||||
-rw-r--r-- | utility/logparse.c | 105 | ||||
-rw-r--r-- | utility/logparse.h | 15 | ||||
-rw-r--r-- | utility/mod_log_sql.conf | 3 | ||||
-rw-r--r-- | utility/util.c | 96 | ||||
-rw-r--r-- | utility/util.h | 2 |
7 files changed, 206 insertions, 29 deletions
diff --git a/utility/config.c b/utility/config.c index b1e7585..fca2f77 100644 --- a/utility/config.c +++ b/utility/config.c | |||
@@ -320,15 +320,19 @@ apr_status_t config_check(config_t *cfg) | |||
320 | { | 320 | { |
321 | apr_status_t ret = APR_SUCCESS; | 321 | apr_status_t ret = APR_SUCCESS; |
322 | if (!cfg->dbdriver || !cfg->dbparams) { | 322 | if (!cfg->dbdriver || !cfg->dbparams) { |
323 | logging_log(cfg, LOGLEVEL_NOISE, "Database configuration is missing\n"); | 323 | logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: Database configuration is missing"); |
324 | ret = APR_EINVAL; | 324 | ret = APR_EINVAL; |
325 | } | 325 | } |
326 | if (!cfg->table) { | 326 | if (!cfg->table) { |
327 | logging_log(cfg, LOGLEVEL_NOISE, "No Log Table defined\n"); | 327 | logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Log Table defined"); |
328 | ret = APR_EINVAL; | 328 | ret = APR_EINVAL; |
329 | } | 329 | } |
330 | if (apr_is_empty_array(cfg->output_fields)) { | 330 | if (apr_is_empty_array(cfg->output_fields)) { |
331 | logging_log(cfg, LOGLEVEL_NOISE, "No Output Fields Defined\n"); | 331 | logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Output Fields Defined"); |
332 | ret = APR_EINVAL; | ||
333 | } | ||
334 | if (apr_hash_count(cfg->log_formats)==0) { | ||
335 | logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Input Log Formats Defined"); | ||
332 | ret = APR_EINVAL; | 336 | ret = APR_EINVAL; |
333 | } | 337 | } |
334 | return ret; | 338 | return ret; |
diff --git a/utility/config.h b/utility/config.h index 763ef5d..26a4e99 100644 --- a/utility/config.h +++ b/utility/config.h | |||
@@ -115,6 +115,8 @@ typedef enum { | |||
115 | 115 | ||
116 | typedef struct config_output_field_t config_output_field_t; | 116 | typedef struct config_output_field_t config_output_field_t; |
117 | 117 | ||
118 | typedef struct parser_func_t parser_func_t; | ||
119 | |||
118 | struct config_output_field_t { | 120 | struct config_output_field_t { |
119 | const char *field; | 121 | const char *field; |
120 | logsql_field_datatype datatype; | 122 | logsql_field_datatype datatype; |
@@ -122,7 +124,7 @@ struct config_output_field_t { | |||
122 | const char *def; | 124 | const char *def; |
123 | const char *source; | 125 | const char *source; |
124 | const char *fname; | 126 | const char *fname; |
125 | void *func; | 127 | parser_func_t *func; |
126 | const char **args; | 128 | const char **args; |
127 | void *data; | 129 | void *data; |
128 | }; | 130 | }; |
diff --git a/utility/logparse.c b/utility/logparse.c index f4afb52..7ea6bc1 100644 --- a/utility/logparse.c +++ b/utility/logparse.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "database.h" | 12 | #include "database.h" |
13 | 13 | ||
14 | apr_hash_t *g_parser_funcs; | 14 | apr_hash_t *g_parser_funcs; |
15 | void **g_parser_linedata; | ||
15 | 16 | ||
16 | static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, | 17 | static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, |
17 | config_output_field_t *field, const char *value, const char **ret) | 18 | config_output_field_t *field, const char *value, const char **ret) |
@@ -19,33 +20,34 @@ static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, | |||
19 | struct { | 20 | struct { |
20 | ap_regex_t *rx; | 21 | ap_regex_t *rx; |
21 | const char *substr; | 22 | const char *substr; |
22 | }*data; | 23 | }*_data; |
23 | ap_regmatch_t regm[AP_MAX_REG_MATCH]; | 24 | ap_regmatch_t regm[AP_MAX_REG_MATCH]; |
24 | // Check if a regular expression configured | 25 | // Check if a regular expression configured |
25 | if (!field->args[0]) | 26 | if (!field->args[0]) |
26 | return APR_EINVAL; | 27 | return APR_EINVAL; |
27 | if (!field->data) { | 28 | if (!field->data) { |
28 | // pre compile the regex | 29 | // pre compile the regex |
29 | data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); | 30 | _data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); |
30 | data->rx = ap_pregcomp(cfg->pool, field->args[0], | 31 | _data->rx = ap_pregcomp(cfg->pool, field->args[0], |
31 | AP_REG_EXTENDED|AP_REG_ICASE); | 32 | AP_REG_EXTENDED|AP_REG_ICASE); |
32 | if (field->args[1]) { | 33 | if (field->args[1]) { |
33 | data->substr = field->args[1]; | 34 | _data->substr = field->args[1]; |
34 | } else { | 35 | } else { |
35 | data->substr = "$1"; | 36 | _data->substr = "$1"; |
36 | } | 37 | } |
37 | if (!data->rx) | 38 | if (!_data->rx) |
38 | return APR_EINVAL; | 39 | return APR_EINVAL; |
39 | field->data = data; | 40 | field->data = _data; |
40 | } else | 41 | } else |
41 | data = field->data; | 42 | _data = field->data; |
42 | 43 | ||
43 | if (!ap_regexec(data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { | 44 | if (!ap_regexec(_data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { |
44 | *ret = ap_pregsub(p, data->substr, value, AP_MAX_REG_MATCH, regm); | 45 | *ret = ap_pregsub(p, _data->substr, value, AP_MAX_REG_MATCH, regm); |
45 | } else { | 46 | } else { |
46 | *ret = field->def; | 47 | *ret = field->def; |
47 | } | 48 | } |
48 | //printf("We matched %s against %s to %s\n",value, field->args[0], *ret); | 49 | logging_log(cfg, LOGLEVEL_DEBUG, "REGEX: matched %s against %s to %s", value, |
50 | field->args[0], *ret); | ||
49 | return APR_SUCCESS; | 51 | return APR_SUCCESS; |
50 | } | 52 | } |
51 | 53 | ||
@@ -76,26 +78,87 @@ static apr_status_t parser_func_machineid(apr_pool_t *p, config_t *cfg, | |||
76 | } | 78 | } |
77 | 79 | ||
78 | /** @todo Implement Query arg ripping function */ | 80 | /** @todo Implement Query arg ripping function */ |
81 | static apr_status_t parser_func_queryarg(apr_pool_t *p, config_t *cfg, | ||
82 | config_output_field_t *field, const char *value, const char **ret) | ||
83 | { | ||
84 | apr_table_t *query = parser_get_linedata(field->func); | ||
85 | |||
86 | if (!field->args[0]) | ||
87 | return APR_EINVAL; | ||
88 | |||
89 | if (!query) { | ||
90 | char *query_beg; | ||
91 | |||
92 | query = apr_table_make(p,3); | ||
93 | |||
94 | query_beg = strchr(value, '?'); | ||
95 | // if we have a query string, rip it apart | ||
96 | if (query_beg) { | ||
97 | char *key; | ||
98 | char *value; | ||
99 | char *query_string; | ||
100 | char *strtok_state; | ||
101 | char *query_end = strrchr(++query_beg,' '); | ||
102 | |||
103 | query_string = apr_pstrndup(p, query_beg, query_end-query_beg); | ||
104 | logging_log(cfg, LOGLEVEL_DEBUG, "QUERY: Found String %pp, %pp, %s", | ||
105 | query_beg, query_end, query_string); | ||
106 | |||
107 | key = apr_strtok(query_string, "&", &strtok_state); | ||
108 | while (key) { | ||
109 | value = strchr(key, '='); | ||
110 | if (value) { | ||
111 | *value = '\0'; /* Split the string in two */ | ||
112 | value++; /* Skip past the = */ | ||
113 | } | ||
114 | else { | ||
115 | value = "1"; | ||
116 | } | ||
117 | ap_unescape_url(key); | ||
118 | ap_unescape_url(value); | ||
119 | apr_table_set(query, key, value); | ||
120 | |||
121 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
122 | "QUERY: Found arg: %s = %s", key, value); | ||
79 | 123 | ||
80 | parser_func_t parser_get_func(const char *name) | 124 | key = apr_strtok(NULL, "&", &strtok_state); |
125 | } | ||
126 | } | ||
127 | parser_set_linedata(field->func,query); | ||
128 | } | ||
129 | *ret = apr_table_get(query, field->args[0]); | ||
130 | if (*ret == NULL) *ret = field->def; | ||
131 | return APR_SUCCESS; | ||
132 | } | ||
133 | |||
134 | parser_func_t *parser_get_func(const char *name) | ||
81 | { | 135 | { |
82 | return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); | 136 | return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); |
83 | } | 137 | } |
84 | 138 | ||
85 | static void parser_add_func(apr_pool_t *p, const char *const name, | 139 | static void parser_add_func(apr_pool_t *p, const char *const name, |
86 | parser_func_t func) | 140 | parser_func_f func, int id) |
87 | { | 141 | { |
142 | parser_func_t *s; | ||
88 | if (!g_parser_funcs) { | 143 | if (!g_parser_funcs) { |
89 | g_parser_funcs = apr_hash_make(p); | 144 | g_parser_funcs = apr_hash_make(p); |
90 | } | 145 | } |
91 | apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, func); | 146 | s = apr_palloc(p, sizeof(parser_func_t)); |
147 | s->func = func; | ||
148 | s->pos = id; | ||
149 | s->linedata = &g_parser_linedata; | ||
150 | apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, s); | ||
92 | } | 151 | } |
93 | 152 | ||
94 | void parser_init(apr_pool_t *p) | 153 | void parser_init(apr_pool_t *p) |
95 | { | 154 | { |
96 | parser_add_func(p, "regexmatch", parser_func_regexmatch); | 155 | int i = 0; |
97 | parser_add_func(p, "totimestamp", parser_func_totimestamp); | 156 | parser_add_func(p, "regexmatch", parser_func_regexmatch, ++i); |
98 | parser_add_func(p, "machineid", parser_func_machineid); | 157 | parser_add_func(p, "totimestamp", parser_func_totimestamp, ++i); |
158 | parser_add_func(p, "machineid", parser_func_machineid, ++i); | ||
159 | parser_add_func(p, "queryarg", parser_func_queryarg, ++i); | ||
160 | g_parser_linedata = apr_pcalloc(p, sizeof(void *) * (i+1)); | ||
161 | g_parser_linedata[0] = (void *)i; | ||
99 | } | 162 | } |
100 | 163 | ||
101 | void parser_find_logs(config_t *cfg) | 164 | void parser_find_logs(config_t *cfg) |
@@ -333,8 +396,10 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, | |||
333 | } | 396 | } |
334 | /** @todo Run Pre Filters here */ | 397 | /** @todo Run Pre Filters here */ |
335 | 398 | ||
336 | // Convert input fields to output fields | ||
337 | ofields = (config_output_field_t *)cfg->output_fields->elts; | 399 | ofields = (config_output_field_t *)cfg->output_fields->elts; |
400 | // clear out ofield function per-line data | ||
401 | memset(&g_parser_linedata[1],0,sizeof(void *)*(int)g_parser_linedata[0]); | ||
402 | // Convert input fields to output fields | ||
338 | for (i=0; i<cfg->output_fields->nelts; i++) { | 403 | for (i=0; i<cfg->output_fields->nelts; i++) { |
339 | const char *val; | 404 | const char *val; |
340 | val = apr_table_get(datain, ofields[i].source); | 405 | val = apr_table_get(datain, ofields[i].source); |
@@ -347,8 +412,8 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, | |||
347 | apr_table_setn(dataout, ofields[i].field, val); | 412 | apr_table_setn(dataout, ofields[i].field, val); |
348 | } else { | 413 | } else { |
349 | const char *ret= NULL; | 414 | const char *ret= NULL; |
350 | rv = ((parser_func_t)ofields[i].func)(ptemp, cfg, &ofields[i], val, | 415 | rv = ((parser_func_t *)ofields[i].func)->func(ptemp, cfg, |
351 | &ret); | 416 | &ofields[i], val, &ret); |
352 | if (rv) | 417 | if (rv) |
353 | return rv; | 418 | return rv; |
354 | apr_table_setn(dataout, ofields[i].field, ret); | 419 | apr_table_setn(dataout, ofields[i].field, ret); |
diff --git a/utility/logparse.h b/utility/logparse.h index 816624a..bc39cb1 100644 --- a/utility/logparse.h +++ b/utility/logparse.h | |||
@@ -3,10 +3,21 @@ | |||
3 | 3 | ||
4 | #include "config.h" | 4 | #include "config.h" |
5 | 5 | ||
6 | typedef apr_status_t (*parser_func_t)(apr_pool_t *p, config_t *cfg, | 6 | typedef apr_status_t (*parser_func_f)(apr_pool_t *p, config_t *cfg, |
7 | config_output_field_t *field, const char *value, const char **ret); | 7 | config_output_field_t *field, const char *value, const char **ret); |
8 | 8 | ||
9 | parser_func_t parser_get_func(const char *name); | 9 | struct parser_func_t { |
10 | parser_func_f func; | ||
11 | int pos; | ||
12 | void *data; | ||
13 | void ***linedata; | ||
14 | }; | ||
15 | |||
16 | #define parser_get_linedata(f) (*f->linedata)[f->pos] | ||
17 | |||
18 | #define parser_set_linedata(f, v) (*f->linedata)[f->pos] = v | ||
19 | |||
20 | parser_func_t *parser_get_func(const char *name); | ||
10 | 21 | ||
11 | void parser_init(apr_pool_t *p); | 22 | void parser_init(apr_pool_t *p); |
12 | 23 | ||
diff --git a/utility/mod_log_sql.conf b/utility/mod_log_sql.conf index 771f7c7..8822bb5 100644 --- a/utility/mod_log_sql.conf +++ b/utility/mod_log_sql.conf | |||
@@ -49,3 +49,6 @@ OutputField request_time char(28) "" date regexmatch ".+" | |||
49 | OutputField agent varchar(255) "" agent | 49 | OutputField agent varchar(255) "" agent |
50 | OutputField referer varchar(255) "" referer | 50 | OutputField referer varchar(255) "" referer |
51 | OutputField machine_id varchar(25) "" "" machineid | 51 | OutputField machine_id varchar(25) "" "" machineid |
52 | #VIZU Fields | ||
53 | OutputField poll_id int 0 request queryarg n | ||
54 | |||
diff --git a/utility/util.c b/utility/util.c index 99bb046..7ecb902 100644 --- a/utility/util.c +++ b/utility/util.c | |||
@@ -31,6 +31,99 @@ void line_chomp(char *str) | |||
31 | } | 31 | } |
32 | } | 32 | } |
33 | 33 | ||
34 | /* | ||
35 | * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) | ||
36 | */ | ||
37 | static char x2c(const char *what) | ||
38 | { | ||
39 | register char digit; | ||
40 | |||
41 | digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10 | ||
42 | : (what[0] - '0')); | ||
43 | digit *= 16; | ||
44 | digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 | ||
45 | : (what[1] - '0')); | ||
46 | return (digit); | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) | ||
51 | * | ||
52 | * Unescapes a URL, leaving reserved characters intact. | ||
53 | * Returns 0 on success, non-zero on error | ||
54 | * Failure is due to | ||
55 | * bad % escape returns HTTP_BAD_REQUEST | ||
56 | * | ||
57 | * decoding %00 or a forbidden character returns HTTP_NOT_FOUND | ||
58 | */ | ||
59 | static int unescape_url(char *url, const char *forbid, const char *reserved) | ||
60 | { | ||
61 | register int badesc, badpath; | ||
62 | char *x, *y; | ||
63 | |||
64 | badesc = 0; | ||
65 | badpath = 0; | ||
66 | /* Initial scan for first '%'. Don't bother writing values before | ||
67 | * seeing a '%' */ | ||
68 | y = strchr(url, '%'); | ||
69 | if (y == NULL) { | ||
70 | return APR_SUCCESS; | ||
71 | } | ||
72 | for (x = y; *y; ++x, ++y) { | ||
73 | if (*y != '%') { | ||
74 | *x = *y; | ||
75 | } | ||
76 | else { | ||
77 | if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) { | ||
78 | badesc = 1; | ||
79 | *x = '%'; | ||
80 | } | ||
81 | else { | ||
82 | char decoded; | ||
83 | decoded = x2c(y + 1); | ||
84 | if ((decoded == '\0') | ||
85 | || (forbid && strchr(forbid, decoded))) { | ||
86 | badpath = 1; | ||
87 | *x = decoded; | ||
88 | y += 2; | ||
89 | } | ||
90 | else if (reserved && strchr(reserved, decoded)) { | ||
91 | *x++ = *y++; | ||
92 | *x++ = *y++; | ||
93 | *x = *y; | ||
94 | } | ||
95 | else { | ||
96 | *x = decoded; | ||
97 | y += 2; | ||
98 | } | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | *x = '\0'; | ||
103 | if (badesc) { | ||
104 | return APR_EINVAL; | ||
105 | } | ||
106 | else if (badpath) { | ||
107 | return APR_EINVAL; | ||
108 | } | ||
109 | else { | ||
110 | return APR_SUCCESS; | ||
111 | } | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) | ||
116 | */ | ||
117 | int ap_unescape_url(char *url) | ||
118 | { | ||
119 | /* Traditional */ | ||
120 | #ifdef CASE_BLIND_FILESYSTEM | ||
121 | return unescape_url(url, "/\\", NULL); | ||
122 | #else | ||
123 | return unescape_url(url, "/", NULL); | ||
124 | #endif | ||
125 | } | ||
126 | |||
34 | void logging_init(config_t *cfg) | 127 | void logging_init(config_t *cfg) |
35 | { | 128 | { |
36 | apr_status_t rv; | 129 | apr_status_t rv; |
@@ -58,9 +151,6 @@ const char *logging_strerror(apr_status_t rv) | |||
58 | return apr_strerror(rv, buff, 256); | 151 | return apr_strerror(rv, buff, 256); |
59 | } | 152 | } |
60 | 153 | ||
61 | /** | ||
62 | * @todo implement logging | ||
63 | */ | ||
64 | void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) | 154 | void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) |
65 | { | 155 | { |
66 | va_list ap; | 156 | va_list ap; |
diff --git a/utility/util.h b/utility/util.h index c67cf9c..99f93aa 100644 --- a/utility/util.h +++ b/utility/util.h | |||
@@ -12,6 +12,8 @@ char *lowerstr(apr_pool_t *pool, const char *input); | |||
12 | */ | 12 | */ |
13 | void line_chomp(char *str); | 13 | void line_chomp(char *str); |
14 | 14 | ||
15 | int ap_unescape_url(char *url); | ||
16 | |||
15 | void logging_init(config_t *cfg); | 17 | void logging_init(config_t *cfg); |
16 | 18 | ||
17 | void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) | 19 | void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) |