diff options
Diffstat (limited to 'utility/logparse.c')
-rw-r--r-- | utility/logparse.c | 768 |
1 files changed, 768 insertions, 0 deletions
diff --git a/utility/logparse.c b/utility/logparse.c new file mode 100644 index 0000000..1b3cc97 --- /dev/null +++ b/utility/logparse.c | |||
@@ -0,0 +1,768 @@ | |||
1 | #include "logparse.h" | ||
2 | #include "apr_file_info.h" | ||
3 | #include "apr_file_io.h" | ||
4 | #include "apr_strings.h" | ||
5 | #include "apr_time.h" | ||
6 | |||
7 | #include "time.h" | ||
8 | #include "stdlib.h" | ||
9 | |||
10 | #include "util.h" | ||
11 | #include "ap_pcre.h" | ||
12 | #include "database.h" | ||
13 | |||
14 | apr_hash_t *g_parser_funcs; | ||
15 | void **g_parser_linedata; | ||
16 | |||
17 | static apr_status_t parser_func_wrap(apr_pool_t *p, config_t *cfg, | ||
18 | config_output_field_t *field, const char *value, const char **ret) | ||
19 | { | ||
20 | if (field->args[0] && field->args[1]) { | ||
21 | struct iovec vec[3]; | ||
22 | apr_size_t len; | ||
23 | |||
24 | vec[0].iov_base = (void *)field->args[0]; | ||
25 | vec[0].iov_len = strlen(field->args[0]); | ||
26 | vec[1].iov_base = (void *)value; | ||
27 | vec[1].iov_len = strlen(value); | ||
28 | vec[2].iov_base = (void *)field->args[1]; | ||
29 | vec[2].iov_len = strlen(field->args[1]); | ||
30 | |||
31 | *ret = apr_pstrcatv(p, vec, 3, &len); | ||
32 | } else { | ||
33 | logging_log(cfg, LOGLEVEL_NOISE, "wrap requires before and after strings"); | ||
34 | return APR_EINVAL; | ||
35 | } | ||
36 | return APR_SUCCESS; | ||
37 | } | ||
38 | |||
39 | static apr_status_t parser_func_regexmatch(apr_pool_t *p, config_t *cfg, | ||
40 | config_output_field_t *field, const char *value, const char **ret) | ||
41 | { | ||
42 | struct { | ||
43 | ap_regex_t *rx; | ||
44 | const char *substr; | ||
45 | }*_data; | ||
46 | ap_regmatch_t regm[AP_MAX_REG_MATCH]; | ||
47 | // Check if a regular expression configured | ||
48 | if (!field->args[0]) | ||
49 | return APR_EINVAL; | ||
50 | if (!field->data) { | ||
51 | // pre compile the regex | ||
52 | _data = apr_palloc(cfg->pool, sizeof(ap_regex_t)+sizeof(const char *)); | ||
53 | _data->rx = ap_pregcomp(cfg->pool, field->args[0], | ||
54 | AP_REG_EXTENDED|AP_REG_ICASE); | ||
55 | if (field->args[1]) { | ||
56 | _data->substr = field->args[1]; | ||
57 | } else { | ||
58 | _data->substr = "$1"; | ||
59 | } | ||
60 | if (!_data->rx) { | ||
61 | logging_log(cfg, LOGLEVEL_NOISE, "Failed to compile regular expression"); | ||
62 | return APR_EINVAL; | ||
63 | } | ||
64 | field->data = _data; | ||
65 | } else | ||
66 | _data = field->data; | ||
67 | |||
68 | if (!ap_regexec(_data->rx, value, AP_MAX_REG_MATCH, regm, 0)) { | ||
69 | *ret = ap_pregsub(p, _data->substr, value, AP_MAX_REG_MATCH, regm); | ||
70 | } | ||
71 | logging_log(cfg, LOGLEVEL_DEBUG, "REGEX: matched %s against %s to %s", value, | ||
72 | field->args[0], *ret); | ||
73 | return APR_SUCCESS; | ||
74 | } | ||
75 | |||
76 | static apr_status_t parser_func_totimestamp(apr_pool_t *p, config_t *cfg, | ||
77 | config_output_field_t *field, const char *value, const char **ret) | ||
78 | { | ||
79 | time_t time; | ||
80 | struct tm ts; | ||
81 | |||
82 | //memset(&ts,0,sizeof(struct tm)); | ||
83 | |||
84 | strptime(value, "%d/%b/%Y:%H:%M:%S %z", &ts); | ||
85 | time = mktime(&ts); | ||
86 | |||
87 | *ret = apr_itoa(p, time); | ||
88 | return APR_SUCCESS; | ||
89 | } | ||
90 | |||
91 | static apr_status_t parser_func_machineid(apr_pool_t *p, config_t *cfg, | ||
92 | config_output_field_t *field, const char *value, const char **ret) | ||
93 | { | ||
94 | if (cfg->machineid) { | ||
95 | *ret = apr_pstrdup(p, cfg->machineid); | ||
96 | } | ||
97 | return APR_SUCCESS; | ||
98 | } | ||
99 | |||
100 | static apr_status_t parser_func_queryarg(apr_pool_t *p, config_t *cfg, | ||
101 | config_output_field_t *field, const char *value, const char **ret) | ||
102 | { | ||
103 | apr_table_t *query = parser_get_linedata(field->func); | ||
104 | |||
105 | if (!field->args[0]) { | ||
106 | logging_log(cfg, LOGLEVEL_NOISE, "queryarg requires name of query arg"); | ||
107 | return APR_EINVAL; | ||
108 | } | ||
109 | |||
110 | if (!query) { | ||
111 | char *query_beg; | ||
112 | |||
113 | query = apr_table_make(p,3); | ||
114 | |||
115 | query_beg = strchr(value, '?'); | ||
116 | // if we have a query string, rip it apart | ||
117 | if (query_beg) { | ||
118 | char *key; | ||
119 | char *value; | ||
120 | const char *delim = "&"; | ||
121 | char *query_string; | ||
122 | char *strtok_state; | ||
123 | char *query_end = strrchr(++query_beg,' '); | ||
124 | |||
125 | query_string = apr_pstrndup(p, query_beg, query_end-query_beg); | ||
126 | logging_log(cfg, LOGLEVEL_DEBUG, "QUERY: Found String %pp, %pp, %s", | ||
127 | query_beg, query_end, query_string); | ||
128 | if (field->args[1]) { | ||
129 | delim = field->args[1]; | ||
130 | } | ||
131 | key = apr_strtok(query_string, delim, &strtok_state); | ||
132 | while (key) { | ||
133 | value = strchr(key, '='); | ||
134 | if (value) { | ||
135 | *value = '\0'; /* Split the string in two */ | ||
136 | value++; /* Skip past the = */ | ||
137 | } | ||
138 | else { | ||
139 | value = "1"; | ||
140 | } | ||
141 | ap_unescape_url(key); | ||
142 | ap_unescape_url(value); | ||
143 | apr_table_set(query, key, value); | ||
144 | |||
145 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
146 | "QUERY: Found arg: %s = %s", key, value); | ||
147 | |||
148 | key = apr_strtok(NULL, delim, &strtok_state); | ||
149 | } | ||
150 | } | ||
151 | parser_set_linedata(field->func,query); | ||
152 | } | ||
153 | *ret = apr_table_get(query, field->args[0]); | ||
154 | return APR_SUCCESS; | ||
155 | } | ||
156 | |||
157 | parser_func_t *parser_get_func(const char *name) | ||
158 | { | ||
159 | return apr_hash_get(g_parser_funcs, name, APR_HASH_KEY_STRING); | ||
160 | } | ||
161 | |||
162 | static void parser_add_func(apr_pool_t *p, const char *const name, | ||
163 | parser_func_f func, int id) | ||
164 | { | ||
165 | parser_func_t *s; | ||
166 | if (!g_parser_funcs) { | ||
167 | g_parser_funcs = apr_hash_make(p); | ||
168 | } | ||
169 | s = apr_palloc(p, sizeof(parser_func_t)); | ||
170 | s->func = func; | ||
171 | s->pos = id; | ||
172 | s->data = NULL; | ||
173 | s->linedata = &g_parser_linedata; | ||
174 | apr_hash_set(g_parser_funcs, lowerstr(p, name), APR_HASH_KEY_STRING, s); | ||
175 | } | ||
176 | |||
177 | void parser_init(apr_pool_t *p) | ||
178 | { | ||
179 | int i = 0; | ||
180 | parser_add_func(p, "regexmatch", parser_func_regexmatch, ++i); | ||
181 | parser_add_func(p, "totimestamp", parser_func_totimestamp, ++i); | ||
182 | parser_add_func(p, "machineid", parser_func_machineid, ++i); | ||
183 | parser_add_func(p, "queryarg", parser_func_queryarg, ++i); | ||
184 | parser_add_func(p, "wrap", parser_func_wrap, ++i); | ||
185 | g_parser_linedata = apr_pcalloc(p, sizeof(void *) * (i+1)); | ||
186 | g_parser_linedata[0] = (void *)i; | ||
187 | } | ||
188 | |||
189 | void parser_find_logs(config_t *cfg) | ||
190 | { | ||
191 | apr_pool_t *tp; | ||
192 | apr_dir_t *dir; | ||
193 | apr_finfo_t finfo; | ||
194 | config_filestat_t *newp; | ||
195 | |||
196 | logging_log(cfg, LOGLEVEL_NOTICE, "Find Log files"); | ||
197 | if (!cfg->input_dir) | ||
198 | return; | ||
199 | apr_pool_create(&tp, cfg->pool); | ||
200 | if (apr_dir_open(&dir, cfg->input_dir, tp)==APR_SUCCESS) { | ||
201 | while (apr_dir_read(&finfo, APR_FINFO_NAME | APR_FINFO_TYPE, dir) | ||
202 | == APR_SUCCESS) { | ||
203 | char *temp; | ||
204 | if (finfo.filetype == APR_DIR) | ||
205 | continue; | ||
206 | newp = (config_filestat_t *)apr_array_push(cfg->input_files); | ||
207 | newp->result = "Not Parsed"; | ||
208 | apr_filepath_merge(&temp, cfg->input_dir, finfo.name, | ||
209 | APR_FILEPATH_TRUENAME, cfg->pool); | ||
210 | newp->fname = temp; | ||
211 | } | ||
212 | apr_dir_close(dir); | ||
213 | } | ||
214 | apr_pool_destroy(tp); | ||
215 | } | ||
216 | |||
217 | #define BUFFER_SIZE (16 * 1024) | ||
218 | |||
219 | void parser_split_logs(config_t *cfg) | ||
220 | { | ||
221 | apr_pool_t *tp, *tfp; | ||
222 | apr_array_header_t *foundfiles; | ||
223 | config_filestat_t *filelist; | ||
224 | config_filestat_t *newfile; | ||
225 | apr_file_t *infile; | ||
226 | int f, l; | ||
227 | apr_status_t rv; | ||
228 | apr_finfo_t finfo; | ||
229 | char buff[BUFFER_SIZE]; | ||
230 | int linecount; | ||
231 | int piecesize; | ||
232 | |||
233 | if (!cfg->split_enabled) return; | ||
234 | if (!cfg->split_dir) { | ||
235 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Missing Split Output directory"); | ||
236 | return; | ||
237 | } | ||
238 | apr_pool_create(&tp, cfg->pool); | ||
239 | apr_pool_create(&tfp, tp); | ||
240 | |||
241 | if (APR_SUCCESS != apr_stat(&finfo, cfg->split_dir, APR_FINFO_MIN, tp)) { | ||
242 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Directory %s does not exist", cfg->split_dir); | ||
243 | return; | ||
244 | } | ||
245 | foundfiles = apr_array_copy(tp, cfg->input_files); | ||
246 | apr_array_clear(cfg->input_files); | ||
247 | |||
248 | filelist = (config_filestat_t *)foundfiles->elts; | ||
249 | for (f=0, l=foundfiles->nelts; f < l; f++) { | ||
250 | apr_pool_clear(tfp); | ||
251 | logging_log(cfg, LOGLEVEL_NOTICE, "SPLITTER: Begin Splitting Log File '%s'", filelist[f].fname); | ||
252 | rv = apr_file_open(&infile, filelist[f].fname, APR_FOPEN_READ, APR_OS_DEFAULT, tfp); | ||
253 | |||
254 | if (rv != APR_SUCCESS) { | ||
255 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Could not open %s", filelist[f].fname); | ||
256 | return; | ||
257 | } | ||
258 | linecount = 0; | ||
259 | while (apr_file_eof(infile) == APR_SUCCESS) { | ||
260 | apr_size_t read = BUFFER_SIZE; | ||
261 | char *p; | ||
262 | apr_file_read(infile, buff, &read); | ||
263 | p = buff; | ||
264 | while ((p = memchr(p, '\n', (buff + read) - p))) { | ||
265 | ++p; | ||
266 | ++linecount; | ||
267 | } | ||
268 | } | ||
269 | // now we know how long it is. Lets split up the file | ||
270 | piecesize = linecount / cfg->split_count; | ||
271 | if (piecesize < cfg->split_minimum) | ||
272 | piecesize = cfg->split_minimum; | ||
273 | if (piecesize > cfg->split_maximum && cfg->split_maximum > 0) | ||
274 | piecesize = cfg->split_maximum; | ||
275 | if (piecesize > linecount) { | ||
276 | // File is smaller than piece size just add it back in as is | ||
277 | newfile = (config_filestat_t *)apr_array_push(cfg->input_files); | ||
278 | newfile->result = "Not Parsed"; | ||
279 | newfile->fname = filelist[f].fname; | ||
280 | } else { | ||
281 | //split apart the files | ||
282 | int cur_line = 0; | ||
283 | int file_count = 1; | ||
284 | int out_lines = 0; | ||
285 | const char *basefile, *file; | ||
286 | apr_file_t *outfile; | ||
287 | char trail[2048]; | ||
288 | apr_size_t trail_size = 0; | ||
289 | apr_size_t write; | ||
290 | apr_off_t off = 0; | ||
291 | |||
292 | apr_file_seek(infile, APR_SET, &off); | ||
293 | |||
294 | basefile = apr_pstrdup(tfp, basename(apr_pstrdup(tfp, filelist[f].fname))); | ||
295 | |||
296 | file = apr_psprintf(tfp, "%s/%s-%d", cfg->split_dir, basefile, file_count++); | ||
297 | logging_log(cfg, LOGLEVEL_NOTICE, "SPLITTER: Creating output file %s", file); | ||
298 | rv = apr_file_open(&outfile, file, APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE, APR_OS_DEFAULT, tfp); | ||
299 | if (rv != APR_SUCCESS) { | ||
300 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Could not open %s (%d)", file, rv); | ||
301 | return; | ||
302 | } | ||
303 | newfile = (config_filestat_t *)apr_array_push(cfg->input_files); | ||
304 | newfile->result = "Not Parsed"; | ||
305 | newfile->fname = apr_pstrdup(cfg->pool, file); | ||
306 | |||
307 | while (apr_file_eof(infile) == APR_SUCCESS) { | ||
308 | apr_size_t read = BUFFER_SIZE; | ||
309 | char *p, *pp, *buff_start; | ||
310 | apr_file_read(infile, buff, &read); | ||
311 | buff_start = p = pp = buff; | ||
312 | if (trail_size) { | ||
313 | p = memchr(p, '\n', (buff + read) - p); | ||
314 | if (p) { | ||
315 | //printf("Trail Line: %p, %p, %d\n", pp, p, (p - pp) + trail_size); | ||
316 | ++p; | ||
317 | pp = p; | ||
318 | ++cur_line; | ||
319 | ++out_lines; | ||
320 | // write out to file | ||
321 | apr_file_write(outfile, trail, &trail_size); | ||
322 | trail_size = 0; | ||
323 | } else { | ||
324 | if ((read + trail_size) > 2048) { | ||
325 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Excessively long line %d in file %s", cur_line, filelist[f].fname); | ||
326 | exit(1); | ||
327 | } else { | ||
328 | memcpy(trail+trail_size, buff, read); | ||
329 | trail_size += read; | ||
330 | } | ||
331 | } | ||
332 | } | ||
333 | while ((p = memchr(p, '\n', (buff + read) - p))) { | ||
334 | //printf("Line: %p, %p, %d\n", pp, p, (p - pp)); | ||
335 | if (out_lines == piecesize) { | ||
336 | // Write out to file | ||
337 | write = pp - buff_start; | ||
338 | apr_file_write(outfile, buff_start, &write); | ||
339 | buff_start = pp; | ||
340 | out_lines = 0; | ||
341 | // Open new file | ||
342 | file = apr_psprintf(tfp, "%s/%s-%d", cfg->split_dir, basefile, file_count++); | ||
343 | logging_log(cfg, LOGLEVEL_NOTICE, "SPLITTER: Creating output file %s", file); | ||
344 | rv = apr_file_open(&outfile, file, APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE, APR_OS_DEFAULT, tfp); | ||
345 | if (rv != APR_SUCCESS) { | ||
346 | logging_log(cfg, LOGLEVEL_NOISE, "SPLITTER: Could not open %s (%d)", file, rv); | ||
347 | return; | ||
348 | } | ||
349 | newfile = (config_filestat_t *)apr_array_push(cfg->input_files); | ||
350 | newfile->result = "Not Parsed"; | ||
351 | newfile->fname = apr_pstrdup(cfg->pool, file); | ||
352 | } | ||
353 | ++p; | ||
354 | pp = p; | ||
355 | ++cur_line; | ||
356 | ++out_lines; | ||
357 | } | ||
358 | // Write out to file | ||
359 | write = pp - buff_start; | ||
360 | apr_file_write(outfile, buff_start, &write); | ||
361 | |||
362 | trail_size = (buff+read) - pp; | ||
363 | if (trail_size) { | ||
364 | memcpy(trail, pp, trail_size); | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | apr_pool_destroy(tfp); | ||
370 | apr_pool_destroy(tp); | ||
371 | } | ||
372 | |||
373 | apr_status_t parser_logbadline(config_t *cfg, const char *filename, | ||
374 | const char *badline) | ||
375 | { | ||
376 | apr_status_t rv = APR_SUCCESS; | ||
377 | apr_size_t len; | ||
378 | struct iovec vec[5]; | ||
379 | |||
380 | if (cfg->badlinefile) { | ||
381 | if (!cfg->badline_fp) { | ||
382 | rv = apr_file_open(&cfg->badline_fp, cfg->badlinefile, | ||
383 | APR_FOPEN_CREATE | APR_FOPEN_WRITE | APR_FOPEN_APPEND, | ||
384 | APR_OS_DEFAULT, cfg->pool); | ||
385 | if (rv) { | ||
386 | logging_log(cfg, LOGLEVEL_NOISE, | ||
387 | "Error opening badline file %s\n", cfg->badlinefile); | ||
388 | cfg->badlinefile = NULL; | ||
389 | } | ||
390 | } | ||
391 | if (!rv) { | ||
392 | if (filename != cfg->badlastfile){ | ||
393 | char date[APR_RFC822_DATE_LEN]; | ||
394 | vec[0].iov_base = "Starting BadLines for \""; | ||
395 | vec[0].iov_len = sizeof("Starting BadLines for \"")-1; | ||
396 | vec[1].iov_base = (void *)filename; | ||
397 | vec[1].iov_len = strlen(filename); | ||
398 | vec[2].iov_base = "\" on "; | ||
399 | vec[2].iov_len = sizeof("\" on ")-1; | ||
400 | apr_rfc822_date(date, apr_time_now()); | ||
401 | vec[3].iov_base = date; | ||
402 | vec[3].iov_len = APR_RFC822_DATE_LEN-1; | ||
403 | vec[4].iov_base = "\n"; | ||
404 | vec[4].iov_len = 1; | ||
405 | apr_file_writev(cfg->badline_fp, vec,5, &len); | ||
406 | cfg->badlastfile = filename; | ||
407 | } | ||
408 | |||
409 | if ((++cfg->badline_count) > cfg->badlinemax) { | ||
410 | logging_log(cfg, LOGLEVEL_NOISE, | ||
411 | "Found more than %d bad lines (found %d)", | ||
412 | cfg->badlinemax, cfg->badline_count); | ||
413 | rv = APR_EINVAL; | ||
414 | } else { | ||
415 | vec[0].iov_base = (void *)badline; | ||
416 | vec[0].iov_len = strlen(badline); | ||
417 | vec[1].iov_base = "\n"; | ||
418 | vec[1].iov_len = 1; | ||
419 | apr_file_writev(cfg->badline_fp, vec,2, &len); | ||
420 | } | ||
421 | } | ||
422 | } | ||
423 | return rv; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Modified version of apr_tokenize_to_argv to add [] as quoting characters | ||
428 | * | ||
429 | * token_context: Context from which pool allocations will occur. | ||
430 | * arg_str: Input string for conversion to argv[]. | ||
431 | * argv_out: Output location. This is a pointer to an array | ||
432 | * of pointers to strings (ie. &(char *argv[]). | ||
433 | * This value will be allocated from the contexts | ||
434 | * pool and filled in with copies of the tokens | ||
435 | * found during parsing of the arg_str. | ||
436 | */ | ||
437 | apr_status_t parser_tokenize_line(const char *arg_str, char ***argv_out, | ||
438 | apr_pool_t *token_context) | ||
439 | { | ||
440 | const char *cp; | ||
441 | const char *ct; | ||
442 | char *cleaned, *dirty; | ||
443 | int escaped; | ||
444 | int isquoted, numargs = 0, argnum; | ||
445 | |||
446 | #define SKIP_WHITESPACE(cp) \ | ||
447 | for ( ; *cp == ' ' || *cp == '\t'; ) { \ | ||
448 | cp++; \ | ||
449 | }; | ||
450 | |||
451 | #define CHECK_QUOTATION(cp,isquoted) \ | ||
452 | isquoted = 0; \ | ||
453 | if (*cp == '"') { \ | ||
454 | isquoted = 1; \ | ||
455 | cp++; \ | ||
456 | } \ | ||
457 | else if (*cp == '\'') { \ | ||
458 | isquoted = 2; \ | ||
459 | cp++; \ | ||
460 | } \ | ||
461 | else if (*cp == '[') { \ | ||
462 | isquoted = 3; \ | ||
463 | cp++; \ | ||
464 | } | ||
465 | |||
466 | /* DETERMINE_NEXTSTRING: | ||
467 | * At exit, cp will point to one of the following: NULL, SPACE, TAB or QUOTE. | ||
468 | * NULL implies the argument string has been fully traversed. | ||
469 | */ | ||
470 | #define DETERMINE_NEXTSTRING(cp,isquoted) \ | ||
471 | for ( ; *cp != '\0'; cp++) { \ | ||
472 | if ( (*cp == '\\' && (*(cp+1) == ' ' || *(cp+1) == '\t' || \ | ||
473 | *(cp+1) == '"' || *(cp+1) == '\'' || \ | ||
474 | *(cp+1) == '[' || *(cp+1) == ']'))) { \ | ||
475 | cp++; \ | ||
476 | continue; \ | ||
477 | } \ | ||
478 | if ( (!isquoted && (*cp == ' ' || *cp == '\t')) \ | ||
479 | || (isquoted == 1 && *cp == '"') \ | ||
480 | || (isquoted == 2 && *cp == '\'') \ | ||
481 | || (isquoted == 3 && *cp == ']') \ | ||
482 | ) { \ | ||
483 | break; \ | ||
484 | } \ | ||
485 | } | ||
486 | |||
487 | /* REMOVE_ESCAPE_CHARS: | ||
488 | * Compresses the arg string to remove all of the '\' escape chars. | ||
489 | * The final argv strings should not have any extra escape chars in it. | ||
490 | */ | ||
491 | #define REMOVE_ESCAPE_CHARS(cleaned, dirty, escaped) \ | ||
492 | escaped = 0; \ | ||
493 | while(*dirty) { \ | ||
494 | if (!escaped && *dirty == '\\') { \ | ||
495 | escaped = 1; \ | ||
496 | } \ | ||
497 | else { \ | ||
498 | escaped = 0; \ | ||
499 | *cleaned++ = *dirty; \ | ||
500 | } \ | ||
501 | ++dirty; \ | ||
502 | } \ | ||
503 | *cleaned = 0; /* last line of macro... */ | ||
504 | |||
505 | cp = arg_str; | ||
506 | SKIP_WHITESPACE(cp); | ||
507 | ct = cp; | ||
508 | |||
509 | /* This is ugly and expensive, but if anyone wants to figure a | ||
510 | * way to support any number of args without counting and | ||
511 | * allocating, please go ahead and change the code. | ||
512 | * | ||
513 | * Must account for the trailing NULL arg. | ||
514 | */ | ||
515 | numargs = 1; | ||
516 | while (*ct != '\0') { | ||
517 | CHECK_QUOTATION(ct, isquoted) | ||
518 | ; | ||
519 | DETERMINE_NEXTSTRING(ct, isquoted); | ||
520 | if (*ct != '\0') { | ||
521 | ct++; | ||
522 | } | ||
523 | numargs++; | ||
524 | SKIP_WHITESPACE(ct); | ||
525 | } | ||
526 | *argv_out = apr_palloc(token_context, numargs * sizeof(char*)); | ||
527 | |||
528 | /* determine first argument */ | ||
529 | for (argnum = 0; argnum < (numargs-1); argnum++) { | ||
530 | SKIP_WHITESPACE(cp); | ||
531 | CHECK_QUOTATION(cp, isquoted) | ||
532 | ; | ||
533 | ct = cp; | ||
534 | DETERMINE_NEXTSTRING(cp, isquoted); | ||
535 | cp++; | ||
536 | (*argv_out)[argnum] = apr_palloc(token_context, cp - ct); | ||
537 | apr_cpystrn((*argv_out)[argnum], ct, cp - ct); | ||
538 | cleaned = dirty = (*argv_out)[argnum]; | ||
539 | REMOVE_ESCAPE_CHARS(cleaned, dirty, escaped) | ||
540 | ; | ||
541 | } | ||
542 | (*argv_out)[argnum] = NULL; | ||
543 | |||
544 | return APR_SUCCESS; | ||
545 | } | ||
546 | |||
547 | apr_status_t parser_parsefile(config_t *cfg, config_dbd_t *dbconn, | ||
548 | config_filestat_t *fstat) | ||
549 | { | ||
550 | apr_pool_t *tp, *targp; | ||
551 | apr_file_t *file; | ||
552 | apr_status_t rv; | ||
553 | char buff[2048]; | ||
554 | char readbuff[BUFFER_SIZE]; | ||
555 | char **targv; | ||
556 | int targc; | ||
557 | |||
558 | apr_pool_create(&tp, cfg->pool); | ||
559 | apr_pool_create(&targp, tp); | ||
560 | |||
561 | logging_log(cfg, LOGLEVEL_NOTICE, "PARSER: Begin Parsing Log File '%s'", fstat->fname); | ||
562 | |||
563 | rv = apr_file_open(&file, fstat->fname, APR_FOPEN_READ, APR_OS_DEFAULT, tp); | ||
564 | apr_file_buffer_set(file, readbuff, BUFFER_SIZE); | ||
565 | if (rv != APR_SUCCESS) { | ||
566 | logging_log(cfg, LOGLEVEL_NOISE, "PARSER: Could not open %s", fstat->fname); | ||
567 | return rv; | ||
568 | } | ||
569 | |||
570 | fstat->linesparsed = 0; | ||
571 | // Start Transaction | ||
572 | fstat->start = apr_time_now(); | ||
573 | if (!cfg->dryrun && database_trans_start(cfg, dbconn, tp)) { | ||
574 | fstat->result = "Database Transaction Error"; | ||
575 | fstat->stop = apr_time_now(); | ||
576 | return rv; | ||
577 | } | ||
578 | |||
579 | do { | ||
580 | rv = apr_file_gets(buff, 2048, file); | ||
581 | if (rv == APR_SUCCESS) { | ||
582 | int i,m, cont = 0; | ||
583 | config_filter_t *filters; | ||
584 | |||
585 | fstat->linesparsed++; | ||
586 | // chomp off newline | ||
587 | line_chomp(buff); | ||
588 | // Run line filters | ||
589 | for (i=0, m=cfg->linefilters->nelts, | ||
590 | filters = (config_filter_t *)cfg->linefilters->elts; | ||
591 | i<m; i++) { | ||
592 | if (!filters[i].regex || ap_regexec(filters[i].regex, buff, 0, NULL,0)==0) { | ||
593 | if (filters[i].negative) { | ||
594 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
595 | "PARSER: LINEFILTER: Skipping Line %d due to Filter (%d)%s", | ||
596 | fstat->linesparsed, i, filters[i].filter); | ||
597 | fstat->lineskipped++; | ||
598 | cont = 1; | ||
599 | } else { | ||
600 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
601 | "PARSER: LINEFILTER: Force Parsing Line %d due to Filter (%d)%s", | ||
602 | fstat->linesparsed, i, filters[i].filter); | ||
603 | } | ||
604 | break; | ||
605 | } | ||
606 | } | ||
607 | if (cont) continue; | ||
608 | |||
609 | apr_pool_clear(targp); | ||
610 | parser_tokenize_line(buff, &targv, targp); | ||
611 | targc = 0; | ||
612 | while (targv[targc]) | ||
613 | targc++; | ||
614 | rv = parser_processline(targp, cfg, dbconn, fstat, targv, targc); | ||
615 | if (rv != APR_SUCCESS) { | ||
616 | int i; | ||
617 | |||
618 | fstat->linesbad++; | ||
619 | rv = parser_logbadline(cfg, fstat->fname, buff); | ||
620 | if (rv) { | ||
621 | if (!cfg->dryrun) database_trans_abort(cfg, dbconn); | ||
622 | logging_log(cfg, LOGLEVEL_ERROR, "Line %d(%d): %s", fstat->linesparsed, | ||
623 | targc, buff); | ||
624 | for (i = 0; targv[i]; i++) { | ||
625 | logging_log(cfg, LOGLEVEL_ERROR, "Arg (%d): '%s'", i, | ||
626 | targv[i]); | ||
627 | } | ||
628 | } | ||
629 | } | ||
630 | } else { | ||
631 | rv = APR_SUCCESS; | ||
632 | break; | ||
633 | } | ||
634 | } while (rv == APR_SUCCESS); | ||
635 | apr_file_close(file); | ||
636 | // Finish Transaction | ||
637 | if (!cfg->dryrun && database_trans_stop(cfg, dbconn, tp)) { | ||
638 | fstat->result = apr_psprintf(cfg->pool, | ||
639 | "Input line %d, Database Transaction Error", | ||
640 | fstat->linesparsed); | ||
641 | } | ||
642 | |||
643 | apr_pool_destroy(tp); | ||
644 | logging_log(cfg, LOGLEVEL_NOTICE, | ||
645 | "PARSER: Finish Parsing Log File '%s'. Lines: (%d/%d)", | ||
646 | fstat->fname, fstat->linesparsed - fstat->lineskipped, fstat->linesparsed); | ||
647 | if (!rv) { | ||
648 | fstat->result = "File Parsed Succesfully"; | ||
649 | } | ||
650 | fstat->stop = apr_time_now(); | ||
651 | return rv; | ||
652 | } | ||
653 | |||
654 | apr_status_t parser_processline(apr_pool_t *ptemp, config_t *cfg, | ||
655 | config_dbd_t *dbconn, config_filestat_t *fstat, char **argv, int argc) | ||
656 | { | ||
657 | config_logformat_t *fmt; | ||
658 | config_logformat_field_t *ifields; | ||
659 | config_output_field_t *ofields; | ||
660 | config_filter_t *filters; | ||
661 | apr_table_t *datain; | ||
662 | apr_table_t *dataout; | ||
663 | apr_status_t rv= APR_SUCCESS; | ||
664 | int i,m; | ||
665 | |||
666 | fmt = apr_hash_get(cfg->log_formats, cfg->logformat, APR_HASH_KEY_STRING); | ||
667 | if (!fmt) { | ||
668 | logging_log(cfg, LOGLEVEL_NOISE, "PARSER: No Input Log format"); | ||
669 | return APR_EINVAL; | ||
670 | } | ||
671 | if (fmt->fields->nelts != argc) { | ||
672 | logging_log(cfg, LOGLEVEL_NOISE, | ||
673 | "PARSER: Input line field number differs from expected. Expected %d got %d.", | ||
674 | fmt->fields->nelts, argc); | ||
675 | fstat->result = apr_psprintf(cfg->pool, | ||
676 | "Input line %d is badly formatted (wrong number of fields)", | ||
677 | fstat->linesparsed); | ||
678 | return APR_EINVAL; | ||
679 | } | ||
680 | |||
681 | datain = apr_table_make(ptemp, fmt->fields->nelts); | ||
682 | dataout = apr_table_make(ptemp, cfg->output_fields->nelts); | ||
683 | |||
684 | ifields = (config_logformat_field_t *)fmt->fields->elts; | ||
685 | for (i=0; i<fmt->fields->nelts; i++) { | ||
686 | apr_table_setn(datain, ifields[i].name, argv[i]); | ||
687 | } | ||
688 | // Run Pre Filters | ||
689 | for (i=0, m=cfg->prefilters->nelts, | ||
690 | filters = (config_filter_t *)cfg->prefilters->elts; | ||
691 | i<m; i++) { | ||
692 | const char *temp = apr_table_get(datain, filters[i].field); | ||
693 | if (temp && (!filters[i].regex || ap_regexec(filters[i].regex, temp, 0, NULL,0)==0)) { | ||
694 | if (filters[i].negative) { | ||
695 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
696 | "PARSER: PREFILTER: Skipping Line %d due to Filter (%d)%s", | ||
697 | fstat->linesparsed, i, filters[i].filter); | ||
698 | fstat->lineskipped++; | ||
699 | return APR_SUCCESS; | ||
700 | } else { | ||
701 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
702 | "PARSER: PREFILTER: Force Parsing Line %d due to Filter (%d)%s", | ||
703 | fstat->linesparsed, i, filters[i].filter); | ||
704 | } | ||
705 | break; | ||
706 | } | ||
707 | } | ||
708 | |||
709 | ofields = (config_output_field_t *)cfg->output_fields->elts; | ||
710 | // clear out ofield function per-line data | ||
711 | memset(&g_parser_linedata[1],0,sizeof(void *)*(int)g_parser_linedata[0]); | ||
712 | // Convert input fields to output fields | ||
713 | for (i=0,m=cfg->output_fields->nelts; i<m; i++) { | ||
714 | const char *val; | ||
715 | val = apr_table_get(datain, ofields[i].source); | ||
716 | // If we can't find the source field just continue | ||
717 | if (!val && !(ofields[i].source[0]=='\0' && ofields[i].func)) { | ||
718 | apr_table_setn(dataout, ofields[i].field, ofields[i].def); | ||
719 | continue; | ||
720 | } | ||
721 | if (!ofields[i].func) { | ||
722 | apr_table_setn(dataout, ofields[i].field, val); | ||
723 | } else { | ||
724 | const char *ret= NULL; | ||
725 | rv = ((parser_func_t *)ofields[i].func)->func(ptemp, cfg, | ||
726 | &ofields[i], val, &ret); | ||
727 | if (rv) { | ||
728 | fstat->result = apr_psprintf(cfg->pool, | ||
729 | "Input line %d, Parser function %s returned error (%d)%s", | ||
730 | fstat->linesparsed, ofields[i].fname, rv, logging_strerror(rv)); | ||
731 | return rv; | ||
732 | } | ||
733 | apr_table_setn(dataout, ofields[i].field, ret ? ret : ofields[i].def); | ||
734 | } | ||
735 | } | ||
736 | |||
737 | // Run Post filters | ||
738 | for (i=0, m=cfg->postfilters->nelts, | ||
739 | filters = (config_filter_t *)cfg->postfilters->elts; | ||
740 | i<m; i++) { | ||
741 | const char *temp = apr_table_get(dataout, filters[i].field); | ||
742 | if (temp && (!filters[i].regex || ap_regexec(filters[i].regex, temp, 0, NULL,0)==0)) { | ||
743 | if (filters[i].negative) { | ||
744 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
745 | "PARSER: POSTFILTER: Skipping Line %d due to Filter (%d)%s", | ||
746 | fstat->linesparsed, i, filters[i].filter); | ||
747 | fstat->lineskipped++; | ||
748 | return APR_SUCCESS; | ||
749 | } else { | ||
750 | logging_log(cfg, LOGLEVEL_DEBUG, | ||
751 | "PARSER: POSTFILTER: Force Parsing Line %d due to Filter (%d)%s", | ||
752 | fstat->linesparsed, i, filters[i].filter); | ||
753 | } | ||
754 | break; | ||
755 | } | ||
756 | } | ||
757 | |||
758 | // Process DB Query | ||
759 | if (!cfg->dryrun) { | ||
760 | rv = database_insert(cfg, dbconn, ptemp, dataout); | ||
761 | if (rv) { | ||
762 | fstat->result = apr_psprintf(cfg->pool, | ||
763 | "Input line %d, Database Error", | ||
764 | fstat->linesparsed); | ||
765 | } | ||
766 | } | ||
767 | return rv; | ||
768 | } | ||