summaryrefslogtreecommitdiffstatsabout
diff options
context:
space:
mode:
authorEdward Rudd <urkle@outoforder.cc>2008-10-25 16:44:17 (GMT)
committer Edward Rudd <urkle@outoforder.cc>2008-10-25 16:44:17 (GMT)
commit19e09e15d96fa891d18fd07bb0f751faa0a7fc7a (patch)
tree77da4c55a5c91713184a1344cca6270ae8fd0afe
parentcaae8dcfed1462cb19c82f99087e6fe2ba3d407c (diff)
implement query arg extracter
"merge in" ap_unescape_url from HTTPD updated logging statements add per-line func data and per-func data
-rw-r--r--utility/config.c10
-rw-r--r--utility/config.h4
-rw-r--r--utility/logparse.c105
-rw-r--r--utility/logparse.h15
-rw-r--r--utility/mod_log_sql.conf3
-rw-r--r--utility/util.c96
-rw-r--r--utility/util.h2
7 files changed, 206 insertions, 29 deletions
diff --git a/utility/config.c b/utility/config.c
index b1e7585..fca2f77 100644
--- a/utility/config.c
+++ b/utility/config.c
@@ -320,15 +320,19 @@ apr_status_t config_check(config_t *cfg)
320{ 320{
321 apr_status_t ret = APR_SUCCESS; 321 apr_status_t ret = APR_SUCCESS;
322 if (!cfg->dbdriver || !cfg->dbparams) { 322 if (!cfg->dbdriver || !cfg->dbparams) {
323 logging_log(cfg, LOGLEVEL_NOISE, "Database configuration is missing\n"); 323 logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: Database configuration is missing");
324 ret = APR_EINVAL; 324 ret = APR_EINVAL;
325 } 325 }
326 if (!cfg->table) { 326 if (!cfg->table) {
327 logging_log(cfg, LOGLEVEL_NOISE, "No Log Table defined\n"); 327 logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Log Table defined");
328 ret = APR_EINVAL; 328 ret = APR_EINVAL;
329 } 329 }
330 if (apr_is_empty_array(cfg->output_fields)) { 330 if (apr_is_empty_array(cfg->output_fields)) {
331 logging_log(cfg, LOGLEVEL_NOISE, "No Output Fields Defined\n"); 331 logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Output Fields Defined");
332 ret = APR_EINVAL;
333 }
334 if (apr_hash_count(cfg->log_formats)==0) {
335 logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Input Log Formats Defined");
332 ret = APR_EINVAL; 336 ret = APR_EINVAL;
333 } 337 }
334 return ret; 338 return ret;
diff --git a/utility/config.h b/utility/config.h
index 763ef5d..26a4e99 100644
--- a/utility/config.h
+++ b/utility/config.h
@@ -115,6 +115,8 @@ typedef enum {
115 115
116typedef struct config_output_field_t config_output_field_t; 116typedef struct config_output_field_t config_output_field_t;
117 117
118typedef struct parser_func_t parser_func_t;
119
118struct config_output_field_t { 120struct config_output_field_t {
119 const char *field; 121 const char *field;
120 logsql_field_datatype datatype; 122 logsql_field_datatype datatype;
@@ -122,7 +124,7 @@ struct config_output_field_t {
122 const char *def; 124 const char *def;
123 const char *source; 125 const char *source;
124 const char *fname; 126 const char *fname;
125 void *func; 127 parser_func_t *func;
126 const char **args; 128 const char **args;
127 void *data; 129 void *data;
128}; 130};
diff --git a/utility/logparse.c b/utility/logparse.c
index f4afb52..7ea6bc1 100644
--- a/utility/logparse.c
+++ b/utility/logparse.c
@@ -12,6 +12,7 @@
12#include "database.h" 12#include "database.h"
13 13
14apr_hash_t *g_parser_funcs; 14apr_hash_t *g_parser_funcs;
15void **g_parser_linedata;
15 16
16static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, 17static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg,
17 config_output_field_t *field, const char *value, const char **ret) 18 config_output_field_t *field, const char *value, const char **ret)
@@ -19,33 +20,34 @@ static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg,
19 struct { 20 struct {
20 ap_regex_t *rx; 21 ap_regex_t *rx;
21 const char *substr; 22 const char *substr;
22 }*data; 23 }*_data;
23 ap_regmatch_t regm[AP_MAX_REG_MATCH]; 24 ap_regmatch_t regm[AP_MAX_REG_MATCH];
24 // Check if a regular expression configured 25 // Check if a regular expression configured
25 if (!field->args[0]) 26 if (!field->args[0])
26 return APR_EINVAL; 27 return APR_EINVAL;
27 if (!field->data) { 28 if (!field->data) {
28 // pre compile the regex 29 // pre compile the regex
29 data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); 30 _data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *));
30 data->rx = ap_pregcomp(cfg->pool, field->args[0], 31 _data->rx = ap_pregcomp(cfg->pool, field->args[0],
31 AP_REG_EXTENDED|AP_REG_ICASE); 32 AP_REG_EXTENDED|AP_REG_ICASE);
32 if (field->args[1]) { 33 if (field->args[1]) {
33 data->substr = field->args[1]; 34 _data->substr = field->args[1];
34 } else { 35 } else {
35 data->substr = "$1"; 36 _data->substr = "$1";
36 } 37 }
37 if (!data->rx) 38 if (!_data->rx)
38 return APR_EINVAL; 39 return APR_EINVAL;
39 field->data = data; 40 field->data = _data;
40 } else 41 } else
41 data = field->data; 42 _data = field->data;
42 43
43 if (!ap_regexec(data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { 44 if (!ap_regexec(_data->rx, value, AP_MAX_REG_MATCH, regm, 0)) {
44 *ret = ap_pregsub(p, data->substr, value, AP_MAX_REG_MATCH, regm); 45 *ret = ap_pregsub(p, _data->substr, value, AP_MAX_REG_MATCH, regm);
45 } else { 46 } else {
46 *ret = field->def; 47 *ret = field->def;
47 } 48 }
48 //printf("We matched %s against %s to %s\n",value, field->args[0], *ret); 49 logging_log(cfg, LOGLEVEL_DEBUG, "REGEX: matched %s against %s to %s", value,
50 field->args[0], *ret);
49 return APR_SUCCESS; 51 return APR_SUCCESS;
50} 52}
51 53
@@ -76,26 +78,87 @@ static apr_status_t parser_func_machineid(apr_pool_t *p, config_t *cfg,
76} 78}
77 79
78/** @todo Implement Query arg ripping function */ 80/** @todo Implement Query arg ripping function */
81static apr_status_t parser_func_queryarg(apr_pool_t *p, config_t *cfg,
82 config_output_field_t *field, const char *value, const char **ret)
83{
84 apr_table_t *query = parser_get_linedata(field->func);
85
86 if (!field->args[0])
87 return APR_EINVAL;
88
89 if (!query) {
90 char *query_beg;
91
92 query = apr_table_make(p,3);
93
94 query_beg = strchr(value, '?');
95 // if we have a query string, rip it apart
96 if (query_beg) {
97 char *key;
98 char *value;
99 char *query_string;
100 char *strtok_state;
101 char *query_end = strrchr(++query_beg,' ');
102
103 query_string = apr_pstrndup(p, query_beg, query_end-query_beg);
104 logging_log(cfg, LOGLEVEL_DEBUG, "QUERY: Found String %pp, %pp, %s",
105 query_beg, query_end, query_string);
106
107 key = apr_strtok(query_string, "&", &strtok_state);
108 while (key) {
109 value = strchr(key, '=');
110 if (value) {
111 *value = '\0'; /* Split the string in two */
112 value++; /* Skip past the = */
113 }
114 else {
115 value = "1";
116 }
117 ap_unescape_url(key);
118 ap_unescape_url(value);
119 apr_table_set(query, key, value);
120
121 logging_log(cfg, LOGLEVEL_DEBUG,
122 "QUERY: Found arg: %s = %s", key, value);
79 123
80parser_func_t parser_get_func(const char *name) 124 key = apr_strtok(NULL, "&", &strtok_state);
125 }
126 }
127 parser_set_linedata(field->func,query);
128 }
129 *ret = apr_table_get(query, field->args[0]);
130 if (*ret == NULL) *ret = field->def;
131 return APR_SUCCESS;
132}
133
134parser_func_t *parser_get_func(const char *name)
81{ 135{
82 return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); 136 return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING);
83} 137}
84 138
85static void parser_add_func(apr_pool_t *p, const char *const name, 139static void parser_add_func(apr_pool_t *p, const char *const name,
86 parser_func_t func) 140 parser_func_f func, int id)
87{ 141{
142 parser_func_t *s;
88 if (!g_parser_funcs) { 143 if (!g_parser_funcs) {
89 g_parser_funcs = apr_hash_make(p); 144 g_parser_funcs = apr_hash_make(p);
90 } 145 }
91 apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, func); 146 s = apr_palloc(p, sizeof(parser_func_t));
147 s->func = func;
148 s->pos = id;
149 s->linedata = &g_parser_linedata;
150 apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, s);
92} 151}
93 152
94void parser_init(apr_pool_t *p) 153void parser_init(apr_pool_t *p)
95{ 154{
96 parser_add_func(p, "regexmatch", parser_func_regexmatch); 155 int i = 0;
97 parser_add_func(p, "totimestamp", parser_func_totimestamp); 156 parser_add_func(p, "regexmatch", parser_func_regexmatch, ++i);
98 parser_add_func(p, "machineid", parser_func_machineid); 157 parser_add_func(p, "totimestamp", parser_func_totimestamp, ++i);
158 parser_add_func(p, "machineid", parser_func_machineid, ++i);
159 parser_add_func(p, "queryarg", parser_func_queryarg, ++i);
160 g_parser_linedata = apr_pcalloc(p, sizeof(void *) * (i+1));
161 g_parser_linedata[0] = (void *)i;
99} 162}
100 163
101void parser_find_logs(config_t *cfg) 164void parser_find_logs(config_t *cfg)
@@ -333,8 +396,10 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv,
333 } 396 }
334 /** @todo Run Pre Filters here */ 397 /** @todo Run Pre Filters here */
335 398
336 // Convert input fields to output fields
337 ofields = (config_output_field_t *)cfg->output_fields->elts; 399 ofields = (config_output_field_t *)cfg->output_fields->elts;
400 // clear out ofield function per-line data
401 memset(&g_parser_linedata[1],0,sizeof(void *)*(int)g_parser_linedata[0]);
402 // Convert input fields to output fields
338 for (i=0; i<cfg->output_fields->nelts; i++) { 403 for (i=0; i<cfg->output_fields->nelts; i++) {
339 const char *val; 404 const char *val;
340 val = apr_table_get(datain, ofields[i].source); 405 val = apr_table_get(datain, ofields[i].source);
@@ -347,8 +412,8 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv,
347 apr_table_setn(dataout, ofields[i].field, val); 412 apr_table_setn(dataout, ofields[i].field, val);
348 } else { 413 } else {
349 const char *ret= NULL; 414 const char *ret= NULL;
350 rv = ((parser_func_t)ofields[i].func)(ptemp, cfg, &ofields[i], val, 415 rv = ((parser_func_t *)ofields[i].func)->func(ptemp, cfg,
351 &ret); 416 &ofields[i], val, &ret);
352 if (rv) 417 if (rv)
353 return rv; 418 return rv;
354 apr_table_setn(dataout, ofields[i].field, ret); 419 apr_table_setn(dataout, ofields[i].field, ret);
diff --git a/utility/logparse.h b/utility/logparse.h
index 816624a..bc39cb1 100644
--- a/utility/logparse.h
+++ b/utility/logparse.h
@@ -3,10 +3,21 @@
3 3
4#include "config.h" 4#include "config.h"
5 5
6typedef apr_status_t (*parser_func_t)(apr_pool_t *p, config_t *cfg, 6typedef apr_status_t (*parser_func_f)(apr_pool_t *p, config_t *cfg,
7 config_output_field_t *field, const char *value, const char **ret); 7 config_output_field_t *field, const char *value, const char **ret);
8 8
9parser_func_t parser_get_func(const char *name); 9struct parser_func_t {
10 parser_func_f func;
11 int pos;
12 void *data;
13 void ***linedata;
14};
15
16#define parser_get_linedata(f) (*f->linedata)[f->pos]
17
18#define parser_set_linedata(f, v) (*f->linedata)[f->pos] = v
19
20parser_func_t *parser_get_func(const char *name);
10 21
11void parser_init(apr_pool_t *p); 22void parser_init(apr_pool_t *p);
12 23
diff --git a/utility/mod_log_sql.conf b/utility/mod_log_sql.conf
index 771f7c7..8822bb5 100644
--- a/utility/mod_log_sql.conf
+++ b/utility/mod_log_sql.conf
@@ -49,3 +49,6 @@ OutputField request_time char(28) "" date regexmatch ".+"
49OutputField agent varchar(255) "" agent 49OutputField agent varchar(255) "" agent
50OutputField referer varchar(255) "" referer 50OutputField referer varchar(255) "" referer
51OutputField machine_id varchar(25) "" "" machineid 51OutputField machine_id varchar(25) "" "" machineid
52#VIZU Fields
53OutputField poll_id int 0 request queryarg n
54
diff --git a/utility/util.c b/utility/util.c
index 99bb046..7ecb902 100644
--- a/utility/util.c
+++ b/utility/util.c
@@ -31,6 +31,99 @@ void line_chomp(char *str)
31 } 31 }
32} 32}
33 33
34/*
35 * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?)
36 */
37static char x2c(const char *what)
38{
39 register char digit;
40
41 digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10
42 : (what[0] - '0'));
43 digit *= 16;
44 digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10
45 : (what[1] - '0'));
46 return (digit);
47}
48
49/*
50 * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?)
51 *
52 * Unescapes a URL, leaving reserved characters intact.
53 * Returns 0 on success, non-zero on error
54 * Failure is due to
55 * bad % escape returns HTTP_BAD_REQUEST
56 *
57 * decoding %00 or a forbidden character returns HTTP_NOT_FOUND
58 */
59static int unescape_url(char *url, const char *forbid, const char *reserved)
60{
61 register int badesc, badpath;
62 char *x, *y;
63
64 badesc = 0;
65 badpath = 0;
66 /* Initial scan for first '%'. Don't bother writing values before
67 * seeing a '%' */
68 y = strchr(url, '%');
69 if (y == NULL) {
70 return APR_SUCCESS;
71 }
72 for (x = y; *y; ++x, ++y) {
73 if (*y != '%') {
74 *x = *y;
75 }
76 else {
77 if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
78 badesc = 1;
79 *x = '%';
80 }
81 else {
82 char decoded;
83 decoded = x2c(y + 1);
84 if ((decoded == '\0')
85 || (forbid && strchr(forbid, decoded))) {
86 badpath = 1;
87 *x = decoded;
88 y += 2;
89 }
90 else if (reserved && strchr(reserved, decoded)) {
91 *x++ = *y++;
92 *x++ = *y++;
93 *x = *y;
94 }
95 else {
96 *x = decoded;
97 y += 2;
98 }
99 }
100 }
101 }
102 *x = '\0';
103 if (badesc) {
104 return APR_EINVAL;
105 }
106 else if (badpath) {
107 return APR_EINVAL;
108 }
109 else {
110 return APR_SUCCESS;
111 }
112}
113
114/*
115 * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?)
116 */
117int ap_unescape_url(char *url)
118{
119 /* Traditional */
120#ifdef CASE_BLIND_FILESYSTEM
121 return unescape_url(url, "/\\", NULL);
122#else
123 return unescape_url(url, "/", NULL);
124#endif
125}
126
34void logging_init(config_t *cfg) 127void logging_init(config_t *cfg)
35{ 128{
36 apr_status_t rv; 129 apr_status_t rv;
@@ -58,9 +151,6 @@ const char *logging_strerror(apr_status_t rv)
58 return apr_strerror(rv, buff, 256); 151 return apr_strerror(rv, buff, 256);
59} 152}
60 153
61/**
62 * @todo implement logging
63 */
64void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) 154void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...)
65{ 155{
66 va_list ap; 156 va_list ap;
diff --git a/utility/util.h b/utility/util.h
index c67cf9c..99f93aa 100644
--- a/utility/util.h
+++ b/utility/util.h
@@ -12,6 +12,8 @@ char *lowerstr(apr_pool_t *pool, const char *input);
12 */ 12 */
13void line_chomp(char *str); 13void line_chomp(char *str);
14 14
15int ap_unescape_url(char *url);
16
15void logging_init(config_t *cfg); 17void logging_init(config_t *cfg);
16 18
17void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) 19void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...)