summaryrefslogtreecommitdiffstats
path: root/utility/logparse.c
diff options
context:
space:
mode:
authorGravatar Edward Rudd 2008-10-01 03:57:58 +0000
committerGravatar Edward Rudd 2008-10-01 03:57:58 +0000
commitcc75ebf7e8560a69a6847f0260cce4772fff440a (patch)
tree0af45d26c6781995f1d643e599fe36be481d9ee5 /utility/logparse.c
parentba30ceeb705e9b4d40ce0d98f6a4e047d47ce919 (diff)
Initial revision of command line importer.
Diffstat (limited to 'utility/logparse.c')
-rw-r--r--utility/logparse.c205
1 files changed, 205 insertions, 0 deletions
diff --git a/utility/logparse.c b/utility/logparse.c
new file mode 100644
index 0000000..2940534
--- /dev/null
+++ b/utility/logparse.c
@@ -0,0 +1,205 @@
1#include "logparse.h"
2#include "apr_file_info.h"
3#include "apr_file_io.h"
4#include "apr_strings.h"
5
6void find_log_files(config_t *cfg)
7{
8 apr_pool_t *tp;
9 apr_dir_t *dir;
10 apr_finfo_t finfo;
11 char **newp;
12
13 if (!cfg->input_dir)
14 return;
15 apr_pool_create(&tp, cfg->pool);
16 if (apr_dir_open(&dir, cfg->input_dir, tp)==APR_SUCCESS) {
17 while (apr_dir_read(&finfo, APR_FINFO_NAME | APR_FINFO_TYPE, dir)
18 == APR_SUCCESS) {
19 if (finfo.filetype == APR_DIR)
20 continue;
21 newp = (char **)apr_array_push(cfg->input_files);
22 apr_filepath_merge(newp, cfg->input_dir, finfo.name,
23 APR_FILEPATH_TRUENAME, cfg->pool);
24 }
25 apr_dir_close(dir);
26 }
27 apr_pool_destroy(tp);
28}
29
30/*
31 * Modified version of apr_tokenize_to_argv to add [] as quoting characters
32 *
33 * token_context: Context from which pool allocations will occur.
34 * arg_str: Input string for conversion to argv[].
35 * argv_out: Output location. This is a pointer to an array
36 * of pointers to strings (ie. &(char *argv[]).
37 * This value will be allocated from the contexts
38 * pool and filled in with copies of the tokens
39 * found during parsing of the arg_str.
40 * keepquotes: Keep the quotes instead of stripping them
41 */
42apr_status_t tokenize_logline(const char *arg_str, char ***argv_out,
43 apr_pool_t *token_context, int keepquotes)
44{
45 const char *cp;
46 const char *ct;
47 char *cleaned, *dirty;
48 int escaped;
49 int isquoted, numargs = 0, argnum;
50
51#define SKIP_WHITESPACE(cp) \
52 for ( ; *cp == ' ' || *cp == '\t'; ) { \
53 cp++; \
54 };
55
56#define CHECK_QUOTATION(cp,isquoted) \
57 isquoted = 0; \
58 if (*cp == '"') { \
59 isquoted = 1; \
60 cp++; \
61 } \
62 else if (*cp == '\'') { \
63 isquoted = 2; \
64 cp++; \
65 } \
66 else if (*cp == '[') { \
67 isquoted = 3; \
68 cp++; \
69 }
70
71 /* DETERMINE_NEXTSTRING:
72 * At exit, cp will point to one of the following: NULL, SPACE, TAB or QUOTE.
73 * NULL implies the argument string has been fully traversed.
74 */
75#define DETERMINE_NEXTSTRING(cp,isquoted) \
76 for ( ; *cp != '\0'; cp++) { \
77 if ( (isquoted && (*cp == ' ' || *cp == '\t')) \
78 || (*cp == '\\' && (*(cp+1) == ' ' || *(cp+1) == '\t' || \
79 *(cp+1) == '"' || *(cp+1) == '\'' || \
80 *(cp+1) == '[' || *(cp+1) == ']'))) { \
81 cp++; \
82 continue; \
83 } \
84 if ( (!isquoted && (*cp == ' ' || *cp == '\t')) \
85 || (isquoted == 1 && *cp == '"') \
86 || (isquoted == 2 && *cp == '\'') \
87 || (isquoted == 3 && *cp == ']') \
88 ) { \
89 break; \
90 } \
91 }
92
93 /* REMOVE_ESCAPE_CHARS:
94 * Compresses the arg string to remove all of the '\' escape chars.
95 * The final argv strings should not have any extra escape chars in it.
96 */
97#define REMOVE_ESCAPE_CHARS(cleaned, dirty, escaped) \
98 escaped = 0; \
99 while(*dirty) { \
100 if (!escaped && *dirty == '\\') { \
101 escaped = 1; \
102 } \
103 else { \
104 escaped = 0; \
105 *cleaned++ = *dirty; \
106 } \
107 ++dirty; \
108 } \
109 *cleaned = 0; /* last line of macro... */
110
111 cp = arg_str;
112 SKIP_WHITESPACE(cp);
113 ct = cp;
114
115 /* This is ugly and expensive, but if anyone wants to figure a
116 * way to support any number of args without counting and
117 * allocating, please go ahead and change the code.
118 *
119 * Must account for the trailing NULL arg.
120 */
121 numargs = 1;
122 while (*ct != '\0') {
123 CHECK_QUOTATION(ct, isquoted)
124 ;
125 DETERMINE_NEXTSTRING(ct, isquoted);
126 if (*ct != '\0') {
127 ct++;
128 }
129 numargs++;
130 SKIP_WHITESPACE(ct);
131 }
132 *argv_out = apr_palloc(token_context, numargs * sizeof(char*));
133
134 /* determine first argument */
135 for (argnum = 0; argnum < (numargs-1); argnum++) {
136 SKIP_WHITESPACE(cp);
137 CHECK_QUOTATION(cp, isquoted)
138 ;
139 ct = cp;
140 DETERMINE_NEXTSTRING(cp, isquoted);
141 cp++;
142 if (isquoted && keepquotes) {
143 (*argv_out)[argnum] = apr_palloc(token_context, cp - ct + 2);
144 apr_cpystrn((*argv_out)[argnum], ct -1, cp - ct + 2);
145 } else {
146 (*argv_out)[argnum] = apr_palloc(token_context, cp - ct);
147 apr_cpystrn((*argv_out)[argnum], ct, cp - ct);
148 }
149 cleaned = dirty = (*argv_out)[argnum];
150 REMOVE_ESCAPE_CHARS(cleaned, dirty, escaped)
151 ;
152 }
153 (*argv_out)[argnum] = NULL;
154
155 return APR_SUCCESS;
156}
157
158apr_status_t parse_logfile(config_t *cfg, const char *filename)
159{
160 apr_pool_t *tp, *argp;
161 apr_file_t *file;
162 apr_status_t rv;
163 char buff[2048];
164 char **targv;
165 int targc;
166 int line;
167
168 apr_pool_create(&tp, cfg->pool);
169 apr_pool_create(&argp, tp);
170
171 rv = apr_file_open(&file, filename, APR_FOPEN_READ | APR_BUFFERED,
172 APR_OS_DEFAULT, tp);
173 if (rv != APR_SUCCESS) {
174 printf("Could not open %s\n", filename);
175 return rv;
176 }
177
178 line = 0;
179 do {
180 rv = apr_file_gets(buff, 1024, file);
181 if (rv == APR_SUCCESS) {
182 line++;
183 char *ptr;
184 // chomp off newline
185 for (ptr = buff + strlen(buff); *ptr != '\r' && *ptr != '\n'; ptr--)
186 ;
187 *ptr = '\0';
188 apr_pool_clear(argp);
189 tokenize_logline(buff, &targv, argp, 1);
190 targc = 0;
191 while (targv[targc]) targc++;
192 if (targc != 9) {
193 int i;
194 printf("Line %d(%d): %s\n",line, targc, buff);
195 for (i = 0; targv[i]; i++) {
196 printf("Arg (%d): '%s'\n", i, targv[i]);
197 }
198 }
199 }
200 } while (rv == APR_SUCCESS);
201 printf("Total Lines: %d\n", line);
202 apr_file_close(file);
203 apr_pool_destroy(tp);
204 return APR_SUCCESS;
205}