From 19e09e15d96fa891d18fd07bb0f751faa0a7fc7a Mon Sep 17 00:00:00 2001 From: Edward Rudd Date: Sat, 25 Oct 2008 16:44:17 +0000 Subject: implement query arg extracter "merge in" ap_unescape_url from HTTPD updated logging statements add per-line func data and per-func data --- utility/config.c | 10 +++-- utility/config.h | 4 +- utility/logparse.c | 105 ++++++++++++++++++++++++++++++++++++++--------- utility/logparse.h | 15 ++++++- utility/mod_log_sql.conf | 3 ++ utility/util.c | 96 +++++++++++++++++++++++++++++++++++++++++-- utility/util.h | 2 + 7 files changed, 206 insertions(+), 29 deletions(-) diff --git a/utility/config.c b/utility/config.c index b1e7585..fca2f77 100644 --- a/utility/config.c +++ b/utility/config.c @@ -320,15 +320,19 @@ apr_status_t config_check(config_t *cfg) { apr_status_t ret = APR_SUCCESS; if (!cfg->dbdriver || !cfg->dbparams) { - logging_log(cfg, LOGLEVEL_NOISE, "Database configuration is missing\n"); + logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: Database configuration is missing"); ret = APR_EINVAL; } if (!cfg->table) { - logging_log(cfg, LOGLEVEL_NOISE, "No Log Table defined\n"); + logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Log Table defined"); ret = APR_EINVAL; } if (apr_is_empty_array(cfg->output_fields)) { - logging_log(cfg, LOGLEVEL_NOISE, "No Output Fields Defined\n"); + logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Output Fields Defined"); + ret = APR_EINVAL; + } + if (apr_hash_count(cfg->log_formats)==0) { + logging_log(cfg, LOGLEVEL_NOISE, "CONFIG: No Input Log Formats Defined"); ret = APR_EINVAL; } return ret; diff --git a/utility/config.h b/utility/config.h index 763ef5d..26a4e99 100644 --- a/utility/config.h +++ b/utility/config.h @@ -115,6 +115,8 @@ typedef enum { typedef struct config_output_field_t config_output_field_t; +typedef struct parser_func_t parser_func_t; + struct config_output_field_t { const char *field; logsql_field_datatype datatype; @@ -122,7 +124,7 @@ struct config_output_field_t { const char *def; const char *source; const char *fname; - void *func; + parser_func_t *func; const char **args; void *data; }; diff --git a/utility/logparse.c b/utility/logparse.c index f4afb52..7ea6bc1 100644 --- a/utility/logparse.c +++ b/utility/logparse.c @@ -12,6 +12,7 @@ #include "database.h" apr_hash_t *g_parser_funcs; +void **g_parser_linedata; static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, config_output_field_t *field, const char *value, const char **ret) @@ -19,33 +20,34 @@ static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, struct { ap_regex_t *rx; const char *substr; - }*data; + }*_data; ap_regmatch_t regm[AP_MAX_REG_MATCH]; // Check if a regular expression configured if (!field->args[0]) return APR_EINVAL; if (!field->data) { // pre compile the regex - data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); - data->rx = ap_pregcomp(cfg->pool, field->args[0], + _data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); + _data->rx = ap_pregcomp(cfg->pool, field->args[0], AP_REG_EXTENDED|AP_REG_ICASE); if (field->args[1]) { - data->substr = field->args[1]; + _data->substr = field->args[1]; } else { - data->substr = "$1"; + _data->substr = "$1"; } - if (!data->rx) + if (!_data->rx) return APR_EINVAL; - field->data = data; + field->data = _data; } else - data = field->data; + _data = field->data; - if (!ap_regexec(data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { - *ret = ap_pregsub(p, data->substr, value, AP_MAX_REG_MATCH, regm); + if (!ap_regexec(_data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { + *ret = ap_pregsub(p, _data->substr, value, AP_MAX_REG_MATCH, regm); } else { *ret = field->def; } - //printf("We matched %s against %s to %s\n",value, field->args[0], *ret); + logging_log(cfg, LOGLEVEL_DEBUG, "REGEX: matched %s against %s to %s", value, + field->args[0], *ret); return APR_SUCCESS; } @@ -76,26 +78,87 @@ static apr_status_t parser_func_machineid(apr_pool_t *p, config_t *cfg, } /** @todo Implement Query arg ripping function */ +static apr_status_t parser_func_queryarg(apr_pool_t *p, config_t *cfg, + config_output_field_t *field, const char *value, const char **ret) +{ + apr_table_t *query = parser_get_linedata(field->func); + + if (!field->args[0]) + return APR_EINVAL; + + if (!query) { + char *query_beg; + + query = apr_table_make(p,3); + + query_beg = strchr(value, '?'); + // if we have a query string, rip it apart + if (query_beg) { + char *key; + char *value; + char *query_string; + char *strtok_state; + char *query_end = strrchr(++query_beg,' '); + + query_string = apr_pstrndup(p, query_beg, query_end-query_beg); + logging_log(cfg, LOGLEVEL_DEBUG, "QUERY: Found String %pp, %pp, %s", + query_beg, query_end, query_string); + + key = apr_strtok(query_string, "&", &strtok_state); + while (key) { + value = strchr(key, '='); + if (value) { + *value = '\0'; /* Split the string in two */ + value++; /* Skip past the = */ + } + else { + value = "1"; + } + ap_unescape_url(key); + ap_unescape_url(value); + apr_table_set(query, key, value); + + logging_log(cfg, LOGLEVEL_DEBUG, + "QUERY: Found arg: %s = %s", key, value); -parser_func_t parser_get_func(const char *name) + key = apr_strtok(NULL, "&", &strtok_state); + } + } + parser_set_linedata(field->func,query); + } + *ret = apr_table_get(query, field->args[0]); + if (*ret == NULL) *ret = field->def; + return APR_SUCCESS; +} + +parser_func_t *parser_get_func(const char *name) { return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); } static void parser_add_func(apr_pool_t *p, const char *const name, - parser_func_t func) + parser_func_f func, int id) { + parser_func_t *s; if (!g_parser_funcs) { g_parser_funcs = apr_hash_make(p); } - apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, func); + s = apr_palloc(p, sizeof(parser_func_t)); + s->func = func; + s->pos = id; + s->linedata = &g_parser_linedata; + apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, s); } void parser_init(apr_pool_t *p) { - parser_add_func(p, "regexmatch", parser_func_regexmatch); - parser_add_func(p, "totimestamp", parser_func_totimestamp); - parser_add_func(p, "machineid", parser_func_machineid); + int i = 0; + parser_add_func(p, "regexmatch", parser_func_regexmatch, ++i); + parser_add_func(p, "totimestamp", parser_func_totimestamp, ++i); + parser_add_func(p, "machineid", parser_func_machineid, ++i); + parser_add_func(p, "queryarg", parser_func_queryarg, ++i); + g_parser_linedata = apr_pcalloc(p, sizeof(void *) * (i+1)); + g_parser_linedata[0] = (void *)i; } void parser_find_logs(config_t *cfg) @@ -333,8 +396,10 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, } /** @todo Run Pre Filters here */ - // Convert input fields to output fields ofields = (config_output_field_t *)cfg->output_fields->elts; + // clear out ofield function per-line data + memset(&g_parser_linedata[1],0,sizeof(void *)*(int)g_parser_linedata[0]); + // Convert input fields to output fields for (i=0; ioutput_fields->nelts; i++) { const char *val; val = apr_table_get(datain, ofields[i].source); @@ -347,8 +412,8 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, apr_table_setn(dataout, ofields[i].field, val); } else { const char *ret= NULL; - rv = ((parser_func_t)ofields[i].func)(ptemp, cfg, &ofields[i], val, - &ret); + rv = ((parser_func_t *)ofields[i].func)->func(ptemp, cfg, + &ofields[i], val, &ret); if (rv) return rv; apr_table_setn(dataout, ofields[i].field, ret); diff --git a/utility/logparse.h b/utility/logparse.h index 816624a..bc39cb1 100644 --- a/utility/logparse.h +++ b/utility/logparse.h @@ -3,10 +3,21 @@ #include "config.h" -typedef apr_status_t (*parser_func_t)(apr_pool_t *p, config_t *cfg, +typedef apr_status_t (*parser_func_f)(apr_pool_t *p, config_t *cfg, config_output_field_t *field, const char *value, const char **ret); -parser_func_t parser_get_func(const char *name); +struct parser_func_t { + parser_func_f func; + int pos; + void *data; + void ***linedata; +}; + +#define parser_get_linedata(f) (*f->linedata)[f->pos] + +#define parser_set_linedata(f, v) (*f->linedata)[f->pos] = v + +parser_func_t *parser_get_func(const char *name); void parser_init(apr_pool_t *p); diff --git a/utility/mod_log_sql.conf b/utility/mod_log_sql.conf index 771f7c7..8822bb5 100644 --- a/utility/mod_log_sql.conf +++ b/utility/mod_log_sql.conf @@ -49,3 +49,6 @@ OutputField request_time char(28) "" date regexmatch ".+" OutputField agent varchar(255) "" agent OutputField referer varchar(255) "" referer OutputField machine_id varchar(25) "" "" machineid +#VIZU Fields +OutputField poll_id int 0 request queryarg n + diff --git a/utility/util.c b/utility/util.c index 99bb046..7ecb902 100644 --- a/utility/util.c +++ b/utility/util.c @@ -31,6 +31,99 @@ void line_chomp(char *str) } } +/* + * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) + */ +static char x2c(const char *what) +{ + register char digit; + + digit = ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10 + : (what[0] - '0')); + digit *= 16; + digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 + : (what[1] - '0')); + return (digit); +} + +/* + * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) + * + * Unescapes a URL, leaving reserved characters intact. + * Returns 0 on success, non-zero on error + * Failure is due to + * bad % escape returns HTTP_BAD_REQUEST + * + * decoding %00 or a forbidden character returns HTTP_NOT_FOUND + */ +static int unescape_url(char *url, const char *forbid, const char *reserved) +{ + register int badesc, badpath; + char *x, *y; + + badesc = 0; + badpath = 0; + /* Initial scan for first '%'. Don't bother writing values before + * seeing a '%' */ + y = strchr(url, '%'); + if (y == NULL) { + return APR_SUCCESS; + } + for (x = y; *y; ++x, ++y) { + if (*y != '%') { + *x = *y; + } + else { + if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) { + badesc = 1; + *x = '%'; + } + else { + char decoded; + decoded = x2c(y + 1); + if ((decoded == '\0') + || (forbid && strchr(forbid, decoded))) { + badpath = 1; + *x = decoded; + y += 2; + } + else if (reserved && strchr(reserved, decoded)) { + *x++ = *y++; + *x++ = *y++; + *x = *y; + } + else { + *x = decoded; + y += 2; + } + } + } + } + *x = '\0'; + if (badesc) { + return APR_EINVAL; + } + else if (badpath) { + return APR_EINVAL; + } + else { + return APR_SUCCESS; + } +} + +/* + * *** Ripped from HTTPD util.c (why are so many PORTABLE things not in APR UTIL?) + */ +int ap_unescape_url(char *url) +{ + /* Traditional */ +#ifdef CASE_BLIND_FILESYSTEM + return unescape_url(url, "/\\", NULL); +#else + return unescape_url(url, "/", NULL); +#endif +} + void logging_init(config_t *cfg) { apr_status_t rv; @@ -58,9 +151,6 @@ const char *logging_strerror(apr_status_t rv) return apr_strerror(rv, buff, 256); } -/** - * @todo implement logging - */ void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) { va_list ap; diff --git a/utility/util.h b/utility/util.h index c67cf9c..99f93aa 100644 --- a/utility/util.h +++ b/utility/util.h @@ -12,6 +12,8 @@ char *lowerstr(apr_pool_t *pool, const char *input); */ void line_chomp(char *str); +int ap_unescape_url(char *url); + void logging_init(config_t *cfg); void logging_log(config_t *cfg, loglevel_e level, const char *fmt, ...) -- cgit