From aed9f10440d8789f99919c3a6f91e8d76bbc44dc Mon Sep 17 00:00:00 2001 From: Edward Rudd Date: Thu, 23 Oct 2008 06:29:36 +0000 Subject: more parsing implementation (Custom functions) initial DB inserting (w/ prepared query) --- utility/Makefile.in | 4 +- utility/ap_pcre.h | 2 + utility/config.c | 46 ++++++++++------------- utility/config.h | 9 ++++- utility/database.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ utility/database.h | 12 ++++++ utility/logparse.c | 89 +++++++++++++++++++++++++++++++++++++++------ utility/logparse.h | 7 ++-- utility/mod_log_sql.conf | 32 ++++++++-------- utility/shell.c | 10 ++++- 10 files changed, 245 insertions(+), 61 deletions(-) (limited to 'utility') diff --git a/utility/Makefile.in b/utility/Makefile.in index 69a746c..eeb26b8 100644 --- a/utility/Makefile.in +++ b/utility/Makefile.in @@ -18,8 +18,8 @@ STD_DIST = Makefile.in DISTFILES = $(STD_DIST) $(EXTRA_DIST) $(SOURCES) $(HEADERS) -SOURCES = shell.c config.c logparse.c ap_pcre.c util.c -HEADERS = shell.h config.h logparse.h ap_pcre.h util.h +SOURCES = shell.c config.c logparse.c ap_pcre.c util.c database.c +HEADERS = shell.h config.h logparse.h ap_pcre.h util.h database.h OBJECTS = $(SOURCES:.c=.o) DEPS = $(SOURCES:.c=.d) TARGETS = mod_log_sql diff --git a/utility/ap_pcre.h b/utility/ap_pcre.h index a851d29..e817dc2 100644 --- a/utility/ap_pcre.h +++ b/utility/ap_pcre.h @@ -73,6 +73,8 @@ extern "C" { #define AP_REG_EXTENDED (0) /** unused */ #define AP_REG_NOSUB (0) /** unused */ +#define AP_MAX_REG_MATCH 10 + /* Error values: */ enum { AP_REG_ASSERT = 1, /** internal error ? */ diff --git a/utility/config.c b/utility/config.c index 847d474..d3a6ca5 100644 --- a/utility/config.c +++ b/utility/config.c @@ -3,6 +3,7 @@ #include "apr_file_io.h" #include "apr_strings.h" #include "apr_hash.h" +#include "apr_uri.h" #include "shell.h" #include "config.h" @@ -65,18 +66,6 @@ static apr_status_t config_set_loglevel(config_t *cfg, config_opt_t *opt, return APR_SUCCESS; } -static apr_status_t config_set_dbconnect(config_t *cfg, config_opt_t *opt, - int argc, const char **argv) -{ - return APR_SUCCESS; -} - -static apr_status_t config_set_dbparam(config_t *cfg, config_opt_t *opt, - int argc, const char **argv) -{ - return APR_SUCCESS; -} - static apr_status_t config_set_inputfile(config_t *cfg, config_opt_t *opt, int argc, const char **argv) { @@ -123,12 +112,12 @@ static apr_status_t config_set_output_field(config_t *cfg, config_opt_t *opt, config_output_field_t *field; char *type, *size, *temp; - if (argc < 4) + if (argc < 5) return APR_EINVAL; field = (config_output_field_t *)apr_array_push(cfg->output_fields); field->field = apr_pstrdup(cfg->pool, argv[1]); - field->source = apr_pstrdup(cfg->pool, argv[3]); - + field->source = apr_pstrdup(cfg->pool, argv[4]); + field->def = apr_pstrdup(cfg->pool, argv[3]); type = size = apr_pstrdup(cfg->pool, argv[2]); while (*size!='\0' && *size!='(') size++; @@ -156,13 +145,13 @@ static apr_status_t config_set_output_field(config_t *cfg, config_opt_t *opt, } // Has a function - if (argc > 4) { + if (argc > 5) { int i; - field->fname = apr_pstrdup(cfg->pool, argv[4]); + field->fname = apr_pstrdup(cfg->pool, argv[5]); field->func = parser_get_func(field->fname); - field->args = apr_pcalloc(cfg->pool, sizeof(char *) * (argc-4+1)); - for (i=5; i<=argc; i++) { - field->args[i-5] = apr_pstrdup(cfg->pool, argv[i]); + field->args = apr_pcalloc(cfg->pool, sizeof(char *) * (argc-5+1)); + for (i=6; i<=argc; i++) { + field->args[i-6] = apr_pstrdup(cfg->pool, argv[i]); } } @@ -208,6 +197,9 @@ void config_dump(config_t *cfg) printf("InputDir: %s\n", cfg->input_dir); + printf("DB Driver: %s\n", cfg->dbdriver); + printf("DB Params: %s\n", cfg->dbparams); + printf("Table: %s\n", cfg->table); printf("Transactions: %d\n", cfg->transactions); printf("MachineID: %s\n", cfg->machineid); @@ -231,7 +223,9 @@ void config_dump(config_t *cfg) printf("Output Fields:\n"); fields = (config_output_field_t *)cfg->output_fields->elts; for (i=0; ioutput_fields->nelts; i++) { - printf(">> %s %s(%d): %s", fields[i].field, logsql_field_datatyeName(fields[i].datatype), fields[i].size, fields[i].source); + printf(">> %s %s(%d) DEFAULT '%s': %s", fields[i].field, + logsql_field_datatyeName(fields[i].datatype), + fields[i].size, fields[i].def, fields[i].source); if (fields[i].func) { printf(" :: %s(", fields[i].fname); if (fields[i].args) { @@ -280,11 +274,10 @@ void config_init(apr_pool_t *p) config_add_option(p, "InputFile", "Parse only this file", config_set_inputfile, NULL); - config_add_option(p, "DBConnect", - "DB Connection information type://user:pass@hostname/database", - config_set_dbconnect, NULL); - config_add_option(p, "DBParam", "DB Connection Parameter", - config_set_dbparam, NULL); + config_add_option(p, "DBDDriver", "DBD Driver to use", + config_set_string, (void *)APR_OFFSETOF(config_t, dbdriver)); + config_add_option(p, "DBDParams", "DBD Connection Parameters", + config_set_string, (void *)APR_OFFSETOF(config_t, dbparams)); config_add_option(p, "Table", "Table to import the log to", config_set_string, (void *)APR_OFFSETOF(config_t, table)); config_add_option(p, "UseTransactions", "Enable Transactions?", @@ -330,7 +323,6 @@ config_t *config_create(apr_pool_t *p) cfg->summary = 1; cfg->transactions = 1; cfg->input_files = apr_array_make(cfg->pool, 10, sizeof(char *)); - cfg->dbconfig = apr_table_make(cfg->pool, 5); cfg->log_formats = apr_hash_make(cfg->pool); cfg->output_fields = apr_array_make(cfg->pool, 10, sizeof(config_output_field_t)); diff --git a/utility/config.h b/utility/config.h index 67f8ea5..0e80856 100644 --- a/utility/config.h +++ b/utility/config.h @@ -4,7 +4,6 @@ #include "apr_tables.h" #include "apr_hash.h" #include "apr_file_io.h" - #include "ap_pcre.h" typedef enum { @@ -14,6 +13,7 @@ typedef enum { LOGLEVEL_DEBUG = 3, } loglevel_e; +typedef struct config_dbd_t config_dbd_t; typedef struct config_t config_t; struct config_t { /** the structures pool (to ease function arguments) */ @@ -32,7 +32,10 @@ struct config_t { apr_array_header_t *input_files; /** db connection configuration */ - apr_table_t *dbconfig; + const char *dbdriver; + const char *dbparams; + config_dbd_t *dbconn; + /** Logging table */ const char *table; /** Use transactons */ @@ -111,10 +114,12 @@ struct config_output_field_t { const char *field; logsql_field_datatype datatype; apr_size_t size; + const char *def; const char *source; const char *fname; void *func; const char **args; + void *data; }; #define CHECK_YESNO(c) ((!strcasecmp(c,"on") || !strcasecmp(c,"yes")) ? 1 : 0) diff --git a/utility/database.c b/utility/database.c index 5fece50..e7650aa 100644 --- a/utility/database.c +++ b/utility/database.c @@ -1,2 +1,97 @@ #include "database.h" +#include "apu.h" +#include "apr_dbd.h" +#include "apr_strings.h" +struct config_dbd_t { + const apr_dbd_driver_t *driver; + apr_dbd_t *dbd; + apr_dbd_prepared_t *stmt; + const char **args; +}; + +void database_init(apr_pool_t *p) +{ + apr_dbd_init(p); +} + +apr_status_t database_connect(config_t *cfg) +{ + apr_status_t rv; + if (!cfg->dbconn) { + cfg->dbconn = apr_palloc(cfg->pool, sizeof(config_dbd_t)); + } + rv = apr_dbd_get_driver(cfg->pool, cfg->dbdriver, &(cfg->dbconn->driver)); + if (rv) + return rv; + + rv = apr_dbd_open(cfg->dbconn->driver, cfg->pool, cfg->dbparams, + &(cfg->dbconn->dbd)); + if (rv) + return rv; + + return APR_SUCCESS; +} + +apr_status_t database_disconnect(config_t *cfg) +{ + return apr_dbd_close(cfg->dbconn->driver, cfg->dbconn->dbd); +} + +apr_status_t database_insert(config_t *cfg, apr_pool_t *p, apr_table_t *data) +{ + apr_status_t rv; + int f, nfs; + config_output_field_t *ofields; + ofields = (config_output_field_t *)cfg->output_fields->elts; + nfs = cfg->output_fields->nelts; + // Prepare statement + if (!cfg->dbconn->stmt) { + char *sql; + int i; + struct iovec *vec; + vec = apr_palloc(p, (nfs*2 + 5) * sizeof(struct iovec)); + sql = apr_palloc(p, (nfs*3)); + vec[0].iov_base = "INSERT INTO "; + vec[0].iov_len = 12; + vec[1].iov_base = (void *)cfg->table; + vec[1].iov_len = strlen(cfg->table); + vec[2].iov_base = " ("; + vec[2].iov_len = 2; + for (i=3, f=0; fdbconn->args = apr_palloc(cfg->pool, nfs * sizeof(char *)); + rv = apr_dbd_prepare(cfg->dbconn->driver, cfg->pool, cfg->dbconn->dbd, + sql, "INSERT", &(cfg->dbconn->stmt)); + if (rv) { + printf("DB Error: %s\n", apr_dbd_error(cfg->dbconn->driver, + cfg->dbconn->dbd, rv)); + return rv; + } + } + for (f=0; fdbconn->args[f] = apr_table_get(data, ofields[f].field); + } + rv = apr_dbd_pquery(cfg->dbconn->driver, p, cfg->dbconn->dbd, &f, + cfg->dbconn->stmt, nfs, cfg->dbconn->args); + if (rv) { + printf("DB Error: %s\n", apr_dbd_error(cfg->dbconn->driver, + cfg->dbconn->dbd, rv)); + return rv; + } + return APR_SUCCESS; +} diff --git a/utility/database.h b/utility/database.h index 9fc4844..fd24994 100644 --- a/utility/database.h +++ b/utility/database.h @@ -1,4 +1,16 @@ #ifndef DATABASE_H_ #define DATABASE_H_ +#include "apr_pools.h" + +#include "config.h" + +void database_init(apr_pool_t *p); + +apr_status_t database_connect(config_t *cfg); + +apr_status_t database_disconnect(config_t *cfg); + +apr_status_t database_insert(config_t *cfg, apr_pool_t *p, apr_table_t *data); + #endif /*DATABASE_H_*/ diff --git a/utility/logparse.c b/utility/logparse.c index 4d823ce..e9ca340 100644 --- a/utility/logparse.c +++ b/utility/logparse.c @@ -2,16 +2,70 @@ #include "apr_file_info.h" #include "apr_file_io.h" #include "apr_strings.h" +#include "apr_time.h" + +#include "time.h" +#include "stdlib.h" #include "util.h" +#include "ap_pcre.h" +#include "database.h" + apr_hash_t *g_parser_funcs; -static apr_status_t parser_func_regexmatch(config_t *cfg, const char *data, - int argc, const char **argv) +static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, + config_output_field_t *field, const char *value, char **ret) { + struct { + ap_regex_t *rx; + const char *substr; + } *data; + ap_regmatch_t regm[AP_MAX_REG_MATCH]; + // Check if a regular expression configured + if (!field->args[0]) return APR_EINVAL; + if (!field->data) { + // pre compile the regex + data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); + data->rx = ap_pregcomp(cfg->pool, field->args[0], + AP_REG_EXTENDED|AP_REG_ICASE); + if (field->args[1]) { + data->substr = field->args[1]; + } else { + data->substr = "$1"; + } + if (!data->rx) return APR_EINVAL; + field->data = data; + } else data = field->data; + + if (!ap_regexec(data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { + *ret = ap_pregsub(p, data->substr, value, AP_MAX_REG_MATCH, regm); + } + //printf("We matched %s against %s to %s\n",value, field->args[0], *ret); return APR_SUCCESS; } + +static apr_status_t parser_func_totimestamp(apr_pool_t *p, config_t *cfg, + config_output_field_t *field, const char *value, char **ret) +{ + time_t time; + struct tm ts; + + + strptime(value, "%d/%b/%Y:%H:%M:%S %z", &ts); + time = mktime(&ts); + + *ret = apr_itoa(p, time); + return APR_SUCCESS; +} + +static apr_status_t parser_func_machineid(apr_pool_t *p, config_t *cfg, + config_output_field_t *field, const char *value, char **ret) +{ + *ret = apr_pstrdup(p,cfg->machineid); + return APR_SUCCESS; +} + parser_func_t parser_get_func(const char *name) { return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); @@ -29,6 +83,8 @@ static void parser_add_func(apr_pool_t *p, const char *const name, void parser_init(apr_pool_t *p) { parser_add_func(p, "regexmatch", parser_func_regexmatch); + parser_add_func(p, "totimestamp", parser_func_totimestamp); + parser_add_func(p, "machineid", parser_func_machineid); } void parser_find_logs(config_t *cfg) @@ -212,7 +268,7 @@ apr_status_t parse_logfile(config_t *cfg, const char *filename) line_chomp(buff); apr_pool_clear(targp); - tokenize_logline(buff, &targv, targp, 1); + tokenize_logline(buff, &targv, targp, 0); targc = 0; while (targv[targc]) targc++; /** @todo Run Line Filters here */ @@ -239,6 +295,7 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, in config_output_field_t *ofields; apr_table_t *datain; apr_table_t *dataout; + apr_status_t rv; int i; fmt = apr_hash_get(cfg->log_formats, cfg->logformat, APR_HASH_KEY_STRING); @@ -257,20 +314,30 @@ apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, in // Convert input fields to output fields ofields = (config_output_field_t *)cfg->output_fields->elts; for (i=0; ioutput_fields->nelts; i++) { - const char *t; + const char *val; + val = apr_table_get(datain, ofields[i].source); + // If we can't find the source field just continue + if (!val && !(ofields[i].source[0]=='\0' && ofields[i].func)) { + apr_table_setn(dataout, ofields[i].field, ofields[i].def); + continue; + } if (!ofields[i].func) { - t = apr_table_get(datain, ofields[i].source); - if (!t) { - return APR_EINVAL; - } - apr_table_setn(dataout,ofields[i].field, t); - printf("S: %s = %s\n",ofields[i].source, t); + apr_table_setn(dataout,ofields[i].field, val); + //printf("S: %s = %s\n",ofields[i].field, val); } else { - printf("S: %s, F: %p\n",ofields[i].source, ofields[i].func); + char *ret = NULL; + rv = ((parser_func_t)ofields[i].func)(ptemp, cfg, &ofields[i], val, + &ret); + if (rv) return rv; + apr_table_setn(dataout, ofields[i].field, ret); + //printf("S: %s = %s\n",ofields[i].field, ret); } } /** @todo Run Post Filters here */ + // Process DB Query + rv = database_insert(cfg, ptemp, dataout); + if (rv) return rv; return APR_SUCCESS; } diff --git a/utility/logparse.h b/utility/logparse.h index ebabf56..53b376b 100644 --- a/utility/logparse.h +++ b/utility/logparse.h @@ -3,8 +3,8 @@ #include "config.h" -typedef apr_status_t (*parser_func_t)(config_t *cfg, const char *data, - int argc, const char **argv); +typedef apr_status_t (*parser_func_t)(apr_pool_t *p, config_t *cfg, + config_output_field_t *field, const char *value, char **ret); parser_func_t parser_get_func(const char *name); @@ -14,6 +14,7 @@ void parser_find_logs(config_t *cfg); apr_status_t parse_logfile(config_t *cfg, const char *filename); -apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, int argc); +apr_status_t parse_processline(apr_pool_t *ptemp, config_t *cfg, char **argv, + int argc); #endif /*LOGPARSE_H_*/ diff --git a/utility/mod_log_sql.conf b/utility/mod_log_sql.conf index 72f0205..9730983 100644 --- a/utility/mod_log_sql.conf +++ b/utility/mod_log_sql.conf @@ -1,8 +1,9 @@ InputDirectory ./logs ErrorLog ./error_log -DBConnect mysql://username:host@server/database +DBDDriver mysql +DBDParams "host=localhost;user=root;dbname=apache_log" DBParam socketfile /tmp/mysql.sock -Table apache_logs +Table access_log MachineID 7of9 UseTransactions on LogLevel notice @@ -34,17 +35,18 @@ Linefilter - "BAD" PreFilter request - "GET \/images" PostFilter request_method "GET" -# Usage field datatype(size) source [function [param]...] -OutputField bytes_sent int bytes_sent -OutputField request_protocol varchar(10) request regexmatch "(HTTP\/[\d\.]+)$" -OutputField remote_host varchar(50) remhost -OutputField request_method varchar(25) request regexmatch "^(\w+)" -OutputField time_stamp int date totimestamp -OutputField status smallint status -OutputField request_uri varchar(255) request regexmatch "^\w+ (.+) \w+\.[\d\.]+$" -OutputField remote_user varchar(50) user -OutputField remote_logname varchar(50) ident -OutputField remote_time char(28) date +# Usage field datatype(size) default source [function [param]...] +OutputField bytes_sent int 0 bytes_sent +OutputField request_protocol varchar(10) "" request regexmatch "(HTTP/[\\d\\.]+)$" +OutputField remote_host varchar(50) "" remhost +OutputField request_method varchar(25) "" request regexmatch "^(\\w+)" +OutputField time_stamp int 0 date totimestamp +OutputField status smallint 0 status +OutputField request_uri varchar(255) "" request regexmatch "^\\w+ (.+) \\w+/[\\d\\.]+$" +OutputField remote_user varchar(50) "" user +OutputField remote_logname varchar(50) "" ident +OutputField request_time char(28) "" date regexmatch ".+" "[$0]" #Only used for Combined log input, if standard CLF input, they are ignored -OutputField agent varchar(255) agent -OutputField referer varchar(255) referer +OutputField agent varchar(255) "" agent +OutputField referer varchar(255) "" referer +OutputField machine_id varchar(25) "" "" machineid diff --git a/utility/shell.c b/utility/shell.c index eaa7098..94cca35 100644 --- a/utility/shell.c +++ b/utility/shell.c @@ -9,6 +9,7 @@ #include "shell.h" #include "config.h" #include "logparse.h" +#include "database.h" const apr_getopt_option_t _opt_config[] = { {"machineid", 'm', 1, "Machine ID for the log file"}, @@ -122,9 +123,11 @@ int main(int argc, const char *const argv[]) exit(1); } - // Process configuration file + // Initialize sub systems parser_init(pool); config_init(pool); + database_init(pool); + // Process configuration file base = config_create(pool); rv = config_read(base, apr_table_get(args,"Config"), args); apr_pool_destroy(ptemp); @@ -138,6 +141,10 @@ int main(int argc, const char *const argv[]) // Find files and parse parser_find_logs(base); + if ((rv = database_connect(base))) { + printf("Error Connecting to Database: %d\n",rv); + exit(1); + } if (!apr_is_empty_array(base->input_files)) { char **filelist; int f, l; @@ -149,5 +156,6 @@ int main(int argc, const char *const argv[]) } else { printf("No input files\n"); } + database_disconnect(base); return 0; } -- cgit