diff options
author | Christopher Powell | 2001-11-28 05:26:53 +0000 |
---|---|---|
committer | Christopher Powell | 2001-11-28 05:26:53 +0000 |
commit | 92d85f793b1a41bbbde1811004ae2708a47a44aa (patch) | |
tree | 69ecadaf47ae83197b8c92ff3f8b7c2002b15b19 |
Initial revision1.09
-rw-r--r-- | CHANGELOG | 99 | ||||
-rw-r--r-- | CONFIGURATION | 8 | ||||
-rw-r--r-- | INSTALL | 180 | ||||
-rw-r--r-- | LICENSE | 58 | ||||
-rw-r--r-- | Makefile | 28 | ||||
-rw-r--r-- | README | 204 | ||||
-rw-r--r-- | access_log.sql | 13 | ||||
-rwxr-xr-x | make_combined_log.pl | 133 | ||||
-rw-r--r-- | mod_log_sql.c | 786 |
9 files changed, 1509 insertions, 0 deletions
diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..674fbec --- /dev/null +++ b/CHANGELOG | |||
@@ -0,0 +1,99 @@ | |||
1 | $Id: CHANGELOG,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | |||
4 | TODO: | ||
5 | * Full commenting of the code. | ||
6 | |||
7 | |||
8 | CHANGES: | ||
9 | |||
10 | 1.09: | ||
11 | |||
12 | * If the MySQL INSERT causes an error, we re-try that INSERT after | ||
13 | a short 1/2-second sleep just to make sure it wasn't due to a | ||
14 | network glitch or other gremlin. | ||
15 | * Made the default log format: huSUsbTvRA. This provides everything | ||
16 | required to reproduce Combined Log Format data. | ||
17 | |||
18 | |||
19 | 1.08: | ||
20 | |||
21 | * Now log a single '-' (instead of a zero-length string) when | ||
22 | User-Agent is blank. This is similar to what Apache does in its own | ||
23 | logs. (Should have caught this when I did the same thing for | ||
24 | Referer.) | ||
25 | * Separated documentation into README, INSTALL, CHANGELOG, etc. as | ||
26 | appropriate. | ||
27 | |||
28 | |||
29 | 1.07: | ||
30 | |||
31 | * Renamed TransferIgnore directive to RequestIgnore, since that's | ||
32 | really a more specific and accurate description of what that | ||
33 | directive means. | ||
34 | * Now log a single '-' (instead of a zero-length string) when Referer | ||
35 | is blank. This is similar to what Apache does in its own logs. | ||
36 | |||
37 | |||
38 | 1.06: | ||
39 | |||
40 | * Added 'R' and 'A' options to TransferLogMySQLFormat so that we now | ||
41 | can log Referer and Agent respectively. | ||
42 | * Code cleanup: all compilation warnings are now gone, even with -Wall. | ||
43 | (They were mainly "const" issues that needed straightening up.) | ||
44 | * Added RemhostIgnore configuration directive to permit non-logging of | ||
45 | any request coming from a specific host, e.g. a local network | ||
46 | machine, etc. | ||
47 | * Now use the non-obsolete ap_compat.h headerfile instead of | ||
48 | compat.h. This simply gets rid of a compilation warning, nothing | ||
49 | more. | ||
50 | * Now include a headerfile (http_log.h) that was missing. Its | ||
51 | absence was giving us this warning message: "implicit declaration of | ||
52 | function `ap_log_error_old'." | ||
53 | * For numerics that Apache customarily logs as a "-" we now log a zero | ||
54 | instead of a -1. This seems to be more intuitive, e.g. in the | ||
55 | "bytes_sent" column. | ||
56 | * We now have a Makefile and a full "make" process that does all | ||
57 | the real work. | ||
58 | * New maintainer. | ||
59 | |||
60 | |||
61 | 1.05: | ||
62 | |||
63 | * Removed some redundant code, after being noted by Vivek Khera that | ||
64 | this code doesn't even get called with the current apache code. It | ||
65 | can be done in apache 1.3, but it works ok without it anyway. | ||
66 | * Added the necessary include file to make the module compile under | ||
67 | Apache 1.3b6. I haven't actually tested that it works, though. | ||
68 | indent'd the code. | ||
69 | |||
70 | |||
71 | 1.04: | ||
72 | |||
73 | * Rearranged some code to allow for a successful apache 1.3beta | ||
74 | compilation. Please note that this is *untested*, I only got it to | ||
75 | compile, I haven't actually tried to run apache 1.3. | ||
76 | |||
77 | |||
78 | 1.03: | ||
79 | |||
80 | * Changed the check for 'mysql server has gone away' to be case | ||
81 | insensitive, so that it works with MySQL 3.21 | ||
82 | * Changed the behavior so that a link isn't established until it's | ||
83 | necessary (e.g., if SQL logging is used for one virtual IP, a link | ||
84 | won't be opened until there's an access to that IP). | ||
85 | |||
86 | |||
87 | 1.02: | ||
88 | |||
89 | * Managed to track down that segmentation fault that occured once, and | ||
90 | fixed it. No known bugs now exist. | ||
91 | |||
92 | |||
93 | 1.01: | ||
94 | |||
95 | * Segmentation fault in case of certain parameters lacking fixed. | ||
96 | * Worked around the SIGPIPE signal that's sent in certain events from | ||
97 | * mysql_query(). Minor modifications | ||
98 | |||
99 | |||
diff --git a/CONFIGURATION b/CONFIGURATION new file mode 100644 index 0000000..75117c3 --- /dev/null +++ b/CONFIGURATION | |||
@@ -0,0 +1,8 @@ | |||
1 | $Id: CONFIGURATION,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | |||
4 | Run-time configuration directives are fully documented on the | ||
5 | mod_log_mysql homepage: | ||
6 | |||
7 | http://www.grubbybaby.com/mod_log_mysql/directives.html | ||
8 | |||
@@ -0,0 +1,180 @@ | |||
1 | $Id: INSTALL,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | |||
4 | Requirements | ||
5 | ============ | ||
6 | |||
7 | 0) I run a Red Hat 6.2 system, but these instructions should easily | ||
8 | adapt to any modern distro. | ||
9 | |||
10 | 1) Apache 1.2.x or higher installed. (I run 1.3.22 and it works fine). | ||
11 | You should have already successfully compiled Apache and know what | ||
12 | you're doing there. In fact, you should already have any other | ||
13 | modules and add-ons like mod_ssl or PHP configured and installed | ||
14 | before you start this process. | ||
15 | |||
16 | 2) The MySQL development headers. (I run MySQL-devel-3.23.44-1.i386.rpm). | ||
17 | |||
18 | 3) MySQL configured, installed and running on either localhost or an | ||
19 | accessible networked machine. You should already have a basic | ||
20 | understanding of MySQL and how it functions. | ||
21 | |||
22 | 4) Again, basic administrative skills with Apache and MySQL. I try to | ||
23 | make things as easy as possible in this README, but its purpose is | ||
24 | not to be an administrative tutorial. | ||
25 | |||
26 | Installation | ||
27 | ============ | ||
28 | |||
29 | 0) Perform all the following steps as root so that you have install | ||
30 | privs, etc. | ||
31 | |||
32 | 1) Unpack the archive into a working directory. | ||
33 | |||
34 | # tar zxf mod_log_mysql.tar.gz -C /usr/local/src | ||
35 | # cd /usr/local/src/mod_log_mysql | ||
36 | |||
37 | 2) Edit Makefile and make any adjustments for your system: make sure | ||
38 | that APACHEDIR points to the location of your Apache source code, | ||
39 | and that CFLAGS points to the location of your Apache installation | ||
40 | directory (where your httpd binary lives). | ||
41 | |||
42 | 3) # make all | ||
43 | (You should receive NO warnings or errors of any kind. | ||
44 | If you see messages like this: "mod_log_mysql.c:69: httpd.h: No such | ||
45 | file or directory" then you do not have your CFLAGS correctly | ||
46 | pointing to the right include directory.) | ||
47 | |||
48 | 4) # make install | ||
49 | |||
50 | 5) Change to your Apache source dir. | ||
51 | |||
52 | # cd /usr/local/src/apache-1.3.22/src | ||
53 | |||
54 | 6) Re-make your httpd binary as follows. | ||
55 | |||
56 | 6a) Edit Configuration.apaci as follows... | ||
57 | |||
58 | * Append the following string to the EXTRA_LIBS= line. (/usr/lib/mysql is where your libmysqlclient.a file lives): | ||
59 | -L/usr/lib/mysql -lmysqlclient -lm | ||
60 | |||
61 | * Add this line at the end of the file: | ||
62 | Module mysql_log_module mod_log_mysql.o | ||
63 | |||
64 | 6b) # cp Configuration.apaci Configuration | ||
65 | |||
66 | 6c) # ./Configure | ||
67 | |||
68 | 6d) # make | ||
69 | |||
70 | 6e) # strip httpd | ||
71 | |||
72 | 7) Test your new apache binary: | ||
73 | |||
74 | # ./httpd -l | ||
75 | |||
76 | You should see something like: | ||
77 | |||
78 | Compiled-in modules: | ||
79 | http_core.c | ||
80 | mod_log_mysql.c <-- That's the line you're looking for. | ||
81 | mod_env.c | ||
82 | mod_log_config.c | ||
83 | mod_mime.c | ||
84 | mod_negotiation.c | ||
85 | ...etc... | ||
86 | |||
87 | 8) Install your httpd binary. Copy it over your old httpd binary, | ||
88 | wherever it lives. You can and should rename your old httpd first so | ||
89 | that you can easily revert to that working version in case of bugs or | ||
90 | whatever. | ||
91 | |||
92 | # /etc/rc.d/init.d/httpd stop | ||
93 | # cp -f ./httpd /usr/local/Apache/bin/ | ||
94 | |||
95 | 9) Configure your apache daemon to log to your database. Here's a very | ||
96 | basic set of config lines to start you off. Full docs on them are | ||
97 | included after this section. | ||
98 | |||
99 | EXAMPLE: Connect to the MySQL database called "apache" running | ||
100 | on "dbmachine.foo.com". The module uses username "loguser" and | ||
101 | password "l0gger" to authenticate to the database; this user must, | ||
102 | of course, exist in the MySQL user table and have the proper | ||
103 | permissions -- more on that in step 11. The log entries will be | ||
104 | INSERTed into the table called "access_log". | ||
105 | |||
106 | |||
107 | LogMySQLInfo dbmachine.foo.com loguser l0gger | ||
108 | LogMySQLDB apache | ||
109 | |||
110 | <VirtualHost 1.2.3.4> | ||
111 | [snip] | ||
112 | |||
113 | TransferLogMySQLTable access_log | ||
114 | TransferLogMySQLFormat huSUsbTvRA | ||
115 | |||
116 | [snip] | ||
117 | </VirtualHost> | ||
118 | |||
119 | |||
120 | 10) Create a database and table to hold the new log data. I log the | ||
121 | same data as the regular "combined log" plus a little extra information | ||
122 | that can be useful. | ||
123 | |||
124 | The order that the fields appear in the table is irrelevant | ||
125 | because you can SELECT them in any order you choose. To create | ||
126 | this table I first created a new database called "apache": | ||
127 | |||
128 | # mysql -uadmin -pmypassword | ||
129 | mysql> create database apache; | ||
130 | |||
131 | Then I created the table called "access_log". You should use the | ||
132 | enclosed SQL file to do this for you. | ||
133 | |||
134 | mysql> source access_log.sql | ||
135 | |||
136 | |||
137 | 11) Create a specific mysql userid that httpd will use to authenticate | ||
138 | and enter data. This userid need not be an actual Unix user. It | ||
139 | is a userid specific to mysql with specific privileges. To create a | ||
140 | user called "loguser" with the password "l0gger" with only the | ||
141 | capability of INSERT to "access_log": | ||
142 | |||
143 | mysql> grant insert on apache.access_log to loguser@my.apachemachine.com identified by 'l0gger'; | ||
144 | |||
145 | |||
146 | 12) Enable full logging of your MySQL daemon (at least temporarily | ||
147 | for debugging purposes) if you don't do this already: | ||
148 | |||
149 | Edit /etc/my.cnf and add the following line to your [mysqld] section: | ||
150 | |||
151 | log=/var/log/mysql-messages | ||
152 | |||
153 | Then restart MySQL. | ||
154 | |||
155 | 13) Restart apache. | ||
156 | |||
157 | # /etc/rc.d/init.d/httpd start | ||
158 | |||
159 | 13) Load your web site in a browser to trigger some hits, then confirm that | ||
160 | the entries are being successfully logged: | ||
161 | |||
162 | # mysql -hmysql.host.com -umysqladmin -p -e "select * from access_log" apache; | ||
163 | Enter password: | ||
164 | |||
165 | +---------------------------------------------------+-------------+-------------+------------------+------------------+------------+--------+------------+------------------------------------+ | ||
166 | | remote_host | remote_user | request_uri | request_duration | virtual_host | time_stamp | status | bytes_sent | referer | | ||
167 | +---------------------------------------------------+-------------+-------------+------------------+------------------+------------+--------+------------+------------------------------------+ | ||
168 | [snipped lines] | ||
169 | . | ||
170 | . | ||
171 | . | ||
172 | +---------------------------------------------------+-------------+-------------+------------------+------------------+------------+--------+------------+------------------------------------+ | ||
173 | |||
174 | 14) You have basic functionality. Don't disable your regular Apache logs until | ||
175 | you feel comfortable that the database is behaving as you'd like and that | ||
176 | things are going well. | ||
177 | |||
178 | |||
179 | |||
180 | |||
@@ -0,0 +1,58 @@ | |||
1 | $Id: LICENSE,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | |||
4 | In compliance with the requirements set forth by The Apache Group, | ||
5 | the original license to this code is hereby reproduced: | ||
6 | |||
7 | |||
8 | ==================================================================== | ||
9 | Copyright (c) 1995-1997 The Apache Group. All rights reserved. | ||
10 | |||
11 | Redistribution and use in source and binary forms, with or without | ||
12 | modification, are permitted provided that the following conditions | ||
13 | are met: | ||
14 | |||
15 | 1. Redistributions of source code must retain the above copyright | ||
16 | notice, this list of conditions and the following disclaimer. | ||
17 | |||
18 | 2. Redistributions in binary form must reproduce the above copyright | ||
19 | notice, this list of conditions and the following disclaimer in | ||
20 | the documentation and/or other materials provided with the | ||
21 | distribution. | ||
22 | |||
23 | 3. All advertising materials mentioning features or use of this | ||
24 | software must display the following acknowledgment: | ||
25 | "This product includes software developed by the Apache Group | ||
26 | for use in the Apache HTTP server project (http://www.apache.org/)." | ||
27 | |||
28 | 4. The names "Apache Server" and "Apache Group" must not be used to | ||
29 | endorse or promote products derived from this software without | ||
30 | prior written permission. | ||
31 | |||
32 | 5. Redistributions of any form whatsoever must retain the following | ||
33 | acknowledgment: | ||
34 | "This product includes software developed by the Apache Group | ||
35 | for use in the Apache HTTP server project (http://www.apache.org/)." | ||
36 | |||
37 | THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY | ||
38 | EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
39 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
40 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR | ||
41 | ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
42 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
43 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
44 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
45 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
46 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
47 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
48 | OF THE POSSIBILITY OF SUCH DAMAGE. | ||
49 | ==================================================================== | ||
50 | |||
51 | This software consists of voluntary contributions made by many | ||
52 | individuals on behalf of the Apache Group and was originally based | ||
53 | on public domain software written at the National Center for | ||
54 | Supercomputing Applications, University of Illinois, Urbana-Champaign. | ||
55 | For more information on the Apache Group and the Apache HTTP server | ||
56 | project, please see <http://www.apache.org/>. | ||
57 | |||
58 | \ No newline at end of file | ||
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cd349d0 --- /dev/null +++ b/Makefile | |||
@@ -0,0 +1,28 @@ | |||
1 | # $Id: Makefile,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | # Verify that this points to the right place... | ||
4 | APACHEDIR = /usr/local/src/apache_1.3.22/src | ||
5 | |||
6 | # Verify that this include directory is correct for you... | ||
7 | CFLAGS = -fpic -O2 -Wall -I/usr/local/Apache/include | ||
8 | |||
9 | # --------------------------------------------------------- | ||
10 | # You shouldn't have to touch below here! | ||
11 | |||
12 | CC = gcc | ||
13 | DEFS = -DSHARED_MODULE | ||
14 | INSTALL = /usr/bin/install -m 664 | ||
15 | |||
16 | all: mod_log_mysql.o | ||
17 | |||
18 | mod_log_mysql.o: mod_log_mysql.c | ||
19 | $(CC) ${CFLAGS} ${DEFS} -c mod_log_mysql.c | ||
20 | |||
21 | install: all | ||
22 | $(INSTALL) mod_log_mysql.o ${APACHEDIR}/mod_log_mysql.o | ||
23 | |||
24 | distro: all | ||
25 | cd ..; tar zcf mod_log_mysql.tar.gz mod_log_mysql/; $(INSTALL) mod_log_mysql.tar.gz /usr/local/Apache/html/mod_log_mysql/; rm -f mod_log_mysql.tar.gz | ||
26 | |||
27 | clean: | ||
28 | rm -f *.o *~ | ||
@@ -0,0 +1,204 @@ | |||
1 | $Id: README,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
2 | |||
3 | |||
4 | Homepage | ||
5 | -------- | ||
6 | http://www.grubbybaby.com/mod_log_mysql/ | ||
7 | |||
8 | |||
9 | |||
10 | Approach | ||
11 | -------- | ||
12 | |||
13 | In order to save speed and overhead, links are kept alive in between | ||
14 | queries. This module uses one SQL link per httpd process. Among other | ||
15 | things, this means that this module supports logging into only one | ||
16 | MySQL server, and for now, also, only one SQL database (although the | ||
17 | latter limitation can be relatively easily removed). | ||
18 | |||
19 | Different data can be sent to different tables. i.e., it's possible to | ||
20 | define one table for TransferLog, one for RefererLog, and a 3rd for | ||
21 | AgentLog. [ Note: this is now deprecated behavior. Please consider | ||
22 | logging Agent and Referer to the same table as your transfers. ] | ||
23 | |||
24 | Virtual hosts are supported in the same manner they are in the regular | ||
25 | logging modules. If you specify a different table for a virtual | ||
26 | host it will be used, otherwise the 'general' would be used. Note: | ||
27 | since all 3 types of logs are implemented within the same module, if | ||
28 | you specify an overriding table for a virtual host for one type of log, | ||
29 | it'll ignore any previous 'general' defaults (see the example in the | ||
30 | end). | ||
31 | |||
32 | SQL links are opened on demand (i.e., the first time each httpd needs | ||
33 | to log something to SQL, the link is opened). In case the SQL server | ||
34 | is down when trying to connect to it, the module remains silent and | ||
35 | logs no error (I didn't want thousands of error messages in the | ||
36 | logfile). In case the SQL link is broken ("mysql server has gone | ||
37 | away") a proper error message is kept to the error log (textual :), and | ||
38 | the module tries to reestablish the concact (and reports whether it | ||
39 | succeeded or not in the error log). If the link cannot be | ||
40 | reestablished, the module will, again, remain silent. Technical note: | ||
41 | The SQL link is registered using apache's pool mechanism, so SQL links | ||
42 | are properly closed on any normal shutdown, kill -HUP or kill -TERM. | ||
43 | This also means that if you restart the MySQL daemon for any reason you | ||
44 | should restart Apache. | ||
45 | |||
46 | |||
47 | |||
48 | Supported directives | ||
49 | -------------------- | ||
50 | |||
51 | Please see the web-based documentation for full explanation of all | ||
52 | supported run-time directives. | ||
53 | |||
54 | http://www.grubbybaby.com/mod_log_mysql/directives.html | ||
55 | |||
56 | |||
57 | |||
58 | What gets logged by default? | ||
59 | ---------------------------- | ||
60 | |||
61 | All the data that would be contained in the "Combined Log Format" | ||
62 | is logged by default, plus a little extra. Your best bet is to | ||
63 | accept this default and employ the enclosed access_log.sql to | ||
64 | format your table. Customize your logging format after you've | ||
65 | had a chance to experiment with the default first. | ||
66 | |||
67 | The MySQL table looks like this if you use the enclosed access_log.sql: | ||
68 | |||
69 | +------------------+------------------+ | ||
70 | | Field | Type | | ||
71 | +------------------+------------------+ | ||
72 | | remote_host | varchar(50) | | ||
73 | | remote_user | varchar(50) | | ||
74 | | request_uri | varchar(50) | | ||
75 | | request_duration | smallint(6) | | ||
76 | | virtual_host | varchar(50) | | ||
77 | | time_stamp | int(10) unsigned | | ||
78 | | status | smallint(6) | | ||
79 | | bytes_sent | int(11) | | ||
80 | | referer | varchar(255) | | ||
81 | | agent | varchar(255) | | ||
82 | +------------------+------------------+ | ||
83 | |||
84 | remote_host: corresponds to the Apache %h directive. Contains the remote | ||
85 | hostname or IP of the machine accessing your server. | ||
86 | Example: si4002.inktomi.com | ||
87 | |||
88 | remote_user: corresponds to the Apache %u directive. Contains the | ||
89 | userid of people who have authenticated to your server, if applicable. | ||
90 | Example: freddy | ||
91 | |||
92 | request_uri: corresponds to the Apache %U directive. Contains the | ||
93 | URL path requested, excluding any query string. This is different than | ||
94 | the %r information you might be used to seeing: | ||
95 | |||
96 | %r: GET /cgi-bin/neomail.pl?sessionid=freddy-session-0.742143231719&sort=date_rev HTTP/1.1 | ||
97 | %U: /cgi-bin/neomail.pl | ||
98 | |||
99 | We log %U because it contains the real meat of the information that is | ||
100 | needed for log analysis, and saves the database a LOT of wasted growth | ||
101 | on unneeded bytes. | ||
102 | |||
103 | request_duration: corresponds to the Apache %T directive. Contains the | ||
104 | time in seconds that it took to serve the request. | ||
105 | Example: 2 | ||
106 | |||
107 | virtual_host: contains the VirtualHost that is making the log entry. This | ||
108 | allows you to log multiple VirtualHosts to a single MySQL database and | ||
109 | yet still be able to extract them for separate analysis. | ||
110 | Example: www.grubbybaby.com | ||
111 | |||
112 | time_stamp: contains the time that the request was logged. Please see | ||
113 | "Notes" below to get a better understanding of this. | ||
114 | Example: 1014249231 | ||
115 | |||
116 | status: corresponds to the Apache %t directive. Contains the HTTP status | ||
117 | of the request. | ||
118 | Example: 404 | ||
119 | |||
120 | bytes_sent: corresponds to the Apache %b directive. Contains the number | ||
121 | of bytes sent to service the request. | ||
122 | Example: 23123 | ||
123 | |||
124 | referer: corresponds to the Apache "%{Referer}i" directive. Contains the | ||
125 | referring HTML page's URL, if applicable. | ||
126 | Example: http://www.foobar.com/links.html | ||
127 | |||
128 | agent: corresponds to the Apache "%{User-Agent}" directive. Contains the | ||
129 | broswer type (user agent) of the software that made the request. | ||
130 | Example: Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html) | ||
131 | |||
132 | |||
133 | Notes | ||
134 | ----- | ||
135 | |||
136 | * The 'time_stamp' field is stored in an UNSIGNED INTEGER column, in the | ||
137 | standard unix "seconds since 1/1/1970 12:00:00" format. This is | ||
138 | superior to storing the access time as a string due to size | ||
139 | requirements: an UNSIGNED INT type fits in 4 bytes. The Apache date | ||
140 | string (e.g. "18/Nov/2001:13:59:52 -0800") requires 26 bytes -- | ||
141 | significantly larger, and those extra 22 bytes will add up over the | ||
142 | thousands of accesses that a busy server will experience. Besides, | ||
143 | an INT type is far more flexible for comparisons, etc. | ||
144 | |||
145 | In MySQL 3.21 and above you can easily convert this to a human | ||
146 | readable format using from_unixtime(), e.g.: | ||
147 | |||
148 | select remote_host,request_uri,from_unixtime(time_stamp) from access_log; | ||
149 | |||
150 | The enclosed perl program make_combined_log.pl shows how you can | ||
151 | extract your access records in a format that is completely Combined | ||
152 | Log Format compliant. You can then feed this to your favorite web | ||
153 | log analysis tool. | ||
154 | |||
155 | |||
156 | * The table's string values can be CHAR or VARCHAR, at a length of your choice. | ||
157 | VARCHAR is superior because it truncates long strings; CHAR types are | ||
158 | fixed-length and will be padded with spaces. Just like the | ||
159 | time_stamp described above, that kind of space waste will add up over | ||
160 | thousands of records. | ||
161 | |||
162 | |||
163 | * Most fields should probably be set to NOT NULL. The only ones that | ||
164 | shouldn't are extra fields that you don't intend the logging module | ||
165 | to update. (You can have other fields in the logging tables if you'd | ||
166 | like, but if they're set to NOT NULL then the logging module won't be | ||
167 | able to insert rows to these tables.) | ||
168 | |||
169 | |||
170 | * Apache normally logs numeric fields with a '-' character to mean "not | ||
171 | applicable," e.g. bytes_sent on a request with a 304 response code. | ||
172 | Since '-' is an illegal character in an SQL numeric field, such | ||
173 | fields are assigned the value 0 instead of '-' which, of course, | ||
174 | makes perfect sense anyway. | ||
175 | |||
176 | |||
177 | Disclaimer | ||
178 | ---------- | ||
179 | |||
180 | It works for me (I've tested it on my '2 hits/busy day' home Linux box, | ||
181 | and afterwards on our pretty busy tucows mirror (>100K hits a day) and | ||
182 | it appears to be working fine. | ||
183 | |||
184 | If it doesn't, and causes you damage of any sort, including but not | ||
185 | limited to losing logs, losing money or your girlfriend leaving you | ||
186 | (read 'boyfriend' where applicable), I'm not liable to anything. Bug | ||
187 | reports and constructive flame mail are ok, though (both about the code | ||
188 | and this quickly-written README file). | ||
189 | |||
190 | |||
191 | Author / Maintainer | ||
192 | ------------------- | ||
193 | |||
194 | The actual logging code was taken from the already existing flat file | ||
195 | text modules, so all that credit goes to the Apache Server group. | ||
196 | |||
197 | The MySQL routines and directives was added in by Zeev Suraski | ||
198 | <bourbon@netvision.net.il> | ||
199 | |||
200 | Changes from 1.06 on and the new documentation were added by | ||
201 | Chris Powell <chris@grubbybaby.com>. It seems that the module had fallen | ||
202 | into the "unmaintained" category -- it hadn't been updated since 1998 -- | ||
203 | so I've adopted it as the new maintainer. | ||
204 | |||
diff --git a/access_log.sql b/access_log.sql new file mode 100644 index 0000000..c38cb50 --- /dev/null +++ b/access_log.sql | |||
@@ -0,0 +1,13 @@ | |||
1 | create table access_log ( | ||
2 | remote_host varchar(50) not null, | ||
3 | remote_user varchar(50) not null, | ||
4 | request_uri varchar(50) not null, | ||
5 | request_duration smallint not null, | ||
6 | virtual_host varchar(50) not null, | ||
7 | time_stamp int unsigned not null, | ||
8 | status smallint not null, | ||
9 | bytes_sent int not null, | ||
10 | referer varchar(255) not null, | ||
11 | agent varchar(255) not null | ||
12 | ) | ||
13 | |||
diff --git a/make_combined_log.pl b/make_combined_log.pl new file mode 100755 index 0000000..d375f3a --- /dev/null +++ b/make_combined_log.pl | |||
@@ -0,0 +1,133 @@ | |||
1 | #!/usr/bin/perl | ||
2 | |||
3 | # $Id: make_combined_log.pl,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
4 | # | ||
5 | # make_combined_log.pl | ||
6 | # | ||
7 | # Usage: make_combined_log <days> <virtual host> | ||
8 | # | ||
9 | # This perl script extracts the httpd access data from a MySQL database | ||
10 | # and formats it properly for parsing by 3rd-party log analysis tools. | ||
11 | # | ||
12 | # The script is intended to be run out by cron. Its commandline arguments tell | ||
13 | # it how many days' worth of access records to extract, and which virtual_host | ||
14 | # you are interested in (because many people log several virthosts to one MySQL | ||
15 | # db.) This permits you to run it daily, weekly, every 9 days -- whatever you | ||
16 | # decide. | ||
17 | # | ||
18 | # Note: By "days" I mean "chunks of 24 hours prior to the moment this script is | ||
19 | # run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34 | ||
20 | # p.m. on the 11th. | ||
21 | # | ||
22 | # Known issues: | ||
23 | # * Because GET and POST are not discriminated in the MySQL log, we'll just | ||
24 | # assume that all requests are GETs. This should have negligible effect | ||
25 | # on any analysis software. This could be remedied IF you stored the full | ||
26 | # HTTP request in your database instead of just the URI, but that's going to | ||
27 | # cost you a LOT of space really quickly... | ||
28 | # | ||
29 | # * Because this is somewhat of a quick hack it doesn't do the most robust | ||
30 | # error checking in the world. Run it by hand to confirm your usage before | ||
31 | # putting it in crontab. | ||
32 | |||
33 | $| = 1; | ||
34 | |||
35 | use DBI; | ||
36 | |||
37 | # Remember, $#ARGV is parameters minus one... | ||
38 | if ($#ARGV != 1) { | ||
39 | die "Incorrect usage, please read the perl source code for correct usage." | ||
40 | } | ||
41 | |||
42 | $days = $ARGV[0]; | ||
43 | $virthost = $ARGV[1]; | ||
44 | |||
45 | # | ||
46 | # Set up the proper variables to permit database access | ||
47 | # | ||
48 | $serverName = "your.dbmachine.com"; | ||
49 | $serverPort = "3306"; | ||
50 | $serverUser = "someuser"; | ||
51 | $serverPass = "somepass"; | ||
52 | $serverTbl = "acc_log_tbl"; | ||
53 | $serverDb = "apache"; | ||
54 | |||
55 | # | ||
56 | # Other constants | ||
57 | # | ||
58 | $st_tz = "-0800"; | ||
59 | $dt_tz = "-0700"; | ||
60 | $type = "GET"; | ||
61 | $http = "HTTP/1.1"; | ||
62 | |||
63 | $now = time(); | ||
64 | $start = $now - (86400 * $days); | ||
65 | |||
66 | # | ||
67 | # Connect and fetch the records | ||
68 | # | ||
69 | $dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass); | ||
70 | if (not $dbh) { | ||
71 | die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)"; | ||
72 | } | ||
73 | |||
74 | $records = $dbh->prepare("select remote_host,remote_user,request_uri,request_duration,time_stamp,status,bytes_sent,referer,agent from $serverTbl where virtual_host='$virthost' and time_stamp >= $start"); | ||
75 | $records->execute; | ||
76 | if (not $records) { | ||
77 | die "No such table or the select returned no records." | ||
78 | } | ||
79 | |||
80 | #Right | ||
81 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
82 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
83 | |||
84 | #Bad | ||
85 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
86 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
87 | |||
88 | |||
89 | # | ||
90 | # Pull out the data row by row and format it | ||
91 | # | ||
92 | while (@data = $records->fetchrow_array) { | ||
93 | ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[4]); | ||
94 | $year=$year+1900; | ||
95 | |||
96 | # Create format for leading-zero formatting | ||
97 | if ($day < 10) { $day = "0$day"; } | ||
98 | if ($month < 10) { $month = "0$month"; } | ||
99 | if ($hour < 10) { $hour = "0$hour"; } | ||
100 | if ($min < 10) { $min = "0$min"; } | ||
101 | if ($sec < 10) { $sec = "0$sec"; } | ||
102 | |||
103 | # Convert numeric month to string month | ||
104 | for ($mon) { | ||
105 | if (/00/) { $mon = "Jan";} | ||
106 | elsif (/01/) { $mon = "Feb";} | ||
107 | elsif (/02/) { $mon = "Mar";} | ||
108 | elsif (/03/) { $mon = "Apr";} | ||
109 | elsif (/04/) { $mon = "May";} | ||
110 | elsif (/05/) { $mon = "Jun";} | ||
111 | elsif (/06/) { $mon = "Jul";} | ||
112 | elsif (/07/) { $mon = "Aug";} | ||
113 | elsif (/08/) { $mon = "Sep";} | ||
114 | elsif (/09/) { $mon = "Oct";} | ||
115 | elsif (/10/) { $mon = "Nov";} | ||
116 | elsif (/11/) { $mon = "Dec";} | ||
117 | } | ||
118 | |||
119 | # Create the output | ||
120 | print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec "; | ||
121 | if ($isdst) { | ||
122 | print "$dt_tz\] "; | ||
123 | } else { | ||
124 | print "$st_tz\] "; | ||
125 | } | ||
126 | print "\"$type $data[2] $http\" $data[5] $data[6] \"$data[7]\" \"$data[8]\"\n"; | ||
127 | } | ||
128 | |||
129 | # | ||
130 | # Done | ||
131 | # | ||
132 | $records->finish; | ||
133 | |||
diff --git a/mod_log_sql.c b/mod_log_sql.c new file mode 100644 index 0000000..1493e41 --- /dev/null +++ b/mod_log_sql.c | |||
@@ -0,0 +1,786 @@ | |||
1 | /* $Id: mod_log_sql.c,v 1.1 2001/11/28 05:26:55 helios Exp $ | ||
2 | * | ||
3 | * mod_log_mysql.c | ||
4 | * | ||
5 | * Hi, I'm the new maintainer of this code. If you have any questions, | ||
6 | * comments or suggestions (which are always welcome), please contact Chris | ||
7 | * Powell <chris@grubbybaby.com>. This code still falls under the rules of | ||
8 | * the Apache license, and all credit for the code up to my changes is still | ||
9 | * preserved below. | ||
10 | * | ||
11 | * ==================================================================== | ||
12 | * | ||
13 | * The original preface from version 1.05: This module was patched, wrapped | ||
14 | * and coded by Zeev Suraski <bourbon@netvision.net.il> | ||
15 | * | ||
16 | * It may be used freely, with the same restrictions as its predecessors | ||
17 | * (specified below). This module is based on code from standard apache | ||
18 | * modules. Their copyright notice follows. | ||
19 | * | ||
20 | * ==================================================================== | ||
21 | * Copyright (c) 1995-1997 The Apache Group. All rights reserved. | ||
22 | * | ||
23 | * Redistribution and use in source and binary forms, with or without | ||
24 | * modification, are permitted provided that the following conditions | ||
25 | * are met: | ||
26 | * | ||
27 | * 1. Redistributions of source code must retain the above copyright | ||
28 | * notice, this list of conditions and the following disclaimer. | ||
29 | * | ||
30 | * 2. Redistributions in binary form must reproduce the above copyright | ||
31 | * notice, this list of conditions and the following disclaimer in | ||
32 | * the documentation and/or other materials provided with the | ||
33 | * distribution. | ||
34 | * | ||
35 | * 3. All advertising materials mentioning features or use of this | ||
36 | * software must display the following acknowledgment: | ||
37 | * "This product includes software developed by the Apache Group | ||
38 | * for use in the Apache HTTP server project (http://www.apache.org/)." | ||
39 | * | ||
40 | * 4. The names "Apache Server" and "Apache Group" must not be used to | ||
41 | * endorse or promote products derived from this software without | ||
42 | * prior written permission. | ||
43 | * | ||
44 | * 5. Redistributions of any form whatsoever must retain the following | ||
45 | * acknowledgment: | ||
46 | * "This product includes software developed by the Apache Group | ||
47 | * for use in the Apache HTTP server project (http://www.apache.org/)." | ||
48 | * | ||
49 | * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY | ||
50 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
51 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
52 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR | ||
53 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
54 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | ||
55 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
56 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
58 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
59 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
60 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
61 | * ==================================================================== | ||
62 | * | ||
63 | * This software consists of voluntary contributions made by many | ||
64 | * individuals on behalf of the Apache Group and was originally based | ||
65 | * on public domain software written at the National Center for | ||
66 | * Supercomputing Applications, University of Illinois, Urbana-Champaign. | ||
67 | * For more information on the Apache Group and the Apache HTTP server | ||
68 | * project, please see <http://www.apache.org/>. | ||
69 | * | ||
70 | */ | ||
71 | |||
72 | |||
73 | #include "httpd.h" | ||
74 | #include "http_config.h" | ||
75 | #include "http_log.h" | ||
76 | #if MODULE_MAGIC_NUMBER >= 19980324 | ||
77 | #include "ap_compat.h" | ||
78 | #endif | ||
79 | #include "http_core.h" | ||
80 | #include <time.h> | ||
81 | #include <mysql/mysql.h> | ||
82 | |||
83 | module mysql_log_module; | ||
84 | MYSQL log_sql_server, *mysql_log = NULL; | ||
85 | char *log_db_name = NULL, *db_host = NULL, *db_user = NULL, *db_pwd = NULL; | ||
86 | |||
87 | #define MYSQL_ERROR(mysql) ((mysql)?(mysql_error(mysql)):"MySQL server has gone away") | ||
88 | |||
89 | typedef const char *(*item_key_func) (request_rec *, char *); | ||
90 | typedef struct { | ||
91 | char *referer_table_name, *agent_table_name, *transfer_table_name; | ||
92 | array_header *referer_ignore_list; | ||
93 | array_header *transfer_ignore_list; | ||
94 | array_header *remhost_ignore_list; | ||
95 | char *transfer_log_format; | ||
96 | } log_mysql_state; | ||
97 | |||
98 | |||
99 | #if MODULE_MAGIC_NUMBER < 19970103 | ||
100 | extern const char *log_remote_host(request_rec * r, char *a); | ||
101 | extern const char *log_remote_logname(request_rec * r, char *a); | ||
102 | extern const char *log_remote_user(request_rec * r, char *a); | ||
103 | extern const char *log_request_time(request_rec * r, char *a); | ||
104 | extern const char *log_request_timestamp(request_rec * r, char *a); | ||
105 | extern const char *log_request_duration(request_rec * r, char *a); | ||
106 | extern const char *log_request_line(request_rec * r, char *a); | ||
107 | extern const char *log_request_file(request_rec * r, char *a); | ||
108 | extern const char *log_request_uri(request_rec * r, char *a); | ||
109 | extern const char *log_status(request_rec * r, char *a); | ||
110 | extern const char *log_bytes_sent(request_rec * r, char *a); | ||
111 | extern const char *log_header_in(request_rec * r, char *a); | ||
112 | extern const char *log_header_out(request_rec * r, char *a); | ||
113 | extern const char *log_note(request_rec * r, char *a); | ||
114 | extern const char *log_env_var(request_rec * r, char *a); | ||
115 | extern const char *log_virtual_host(request_rec * r, char *a); | ||
116 | extern const char *log_server_port(request_rec * r, char *a); | ||
117 | extern const char *log_child_pid(request_rec * r, char *a); | ||
118 | #else | ||
119 | static char *format_integer(pool *p, int i) | ||
120 | { | ||
121 | char dummy[40]; | ||
122 | ap_snprintf(dummy, sizeof(dummy), "%d", i); | ||
123 | return pstrdup(p, dummy); | ||
124 | } | ||
125 | |||
126 | static char *pfmt(pool *p, int i) | ||
127 | { | ||
128 | if (i <= 0) { | ||
129 | return "-"; | ||
130 | } else { | ||
131 | return format_integer(p, i); | ||
132 | } | ||
133 | } | ||
134 | |||
135 | static const char *log_remote_host(request_rec * r, char *a) | ||
136 | { | ||
137 | return (char *) get_remote_host(r->connection, r->per_dir_config, REMOTE_NAME); | ||
138 | } | ||
139 | |||
140 | static const char *log_remote_logname(request_rec * r, char *a) | ||
141 | { | ||
142 | return (char *) get_remote_logname(r); | ||
143 | } | ||
144 | |||
145 | static const char *log_remote_user(request_rec * r, char *a) | ||
146 | { | ||
147 | char *rvalue = r->connection->user; | ||
148 | |||
149 | if (rvalue == NULL) { | ||
150 | rvalue = "-"; | ||
151 | } else if (strlen(rvalue) == 0) { | ||
152 | rvalue = "\"\""; | ||
153 | } | ||
154 | return rvalue; | ||
155 | } | ||
156 | |||
157 | static const char *log_request_line(request_rec * r, char *a) | ||
158 | { | ||
159 | return r->the_request; | ||
160 | } | ||
161 | |||
162 | static const char *log_request_file(request_rec * r, char *a) | ||
163 | { | ||
164 | return r->filename; | ||
165 | } | ||
166 | |||
167 | static const char *log_request_uri(request_rec * r, char *a) | ||
168 | { | ||
169 | return r->uri; | ||
170 | } | ||
171 | |||
172 | static const char *log_status(request_rec * r, char *a) | ||
173 | { | ||
174 | return pfmt(r->pool, r->status); | ||
175 | } | ||
176 | |||
177 | static const char *log_bytes_sent(request_rec * r, char *a) | ||
178 | { | ||
179 | if (!r->sent_bodyct) { | ||
180 | return "-"; | ||
181 | } else { | ||
182 | long int bs; | ||
183 | char dummy[40]; | ||
184 | bgetopt(r->connection->client, BO_BYTECT, &bs); | ||
185 | ap_snprintf(dummy, sizeof(dummy), "%ld", bs); | ||
186 | return pstrdup(r->pool, dummy); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static const char *log_header_in(request_rec * r, char *a) | ||
191 | { | ||
192 | return table_get(r->headers_in, a); | ||
193 | } | ||
194 | |||
195 | static const char *log_header_out(request_rec * r, char *a) | ||
196 | { | ||
197 | const char *cp = table_get(r->headers_out, a); | ||
198 | if (!strcasecmp(a, "Content-type") && r->content_type) { | ||
199 | cp = r->content_type; | ||
200 | } | ||
201 | if (cp) { | ||
202 | return cp; | ||
203 | } | ||
204 | return table_get(r->err_headers_out, a); | ||
205 | } | ||
206 | |||
207 | static const char *log_request_time(request_rec * r, char *a) | ||
208 | { | ||
209 | int timz; | ||
210 | struct tm *t; | ||
211 | char tstr[MAX_STRING_LEN]; | ||
212 | |||
213 | t = get_gmtoff(&timz); | ||
214 | |||
215 | if (a && *a) { /* Custom format */ | ||
216 | strftime(tstr, MAX_STRING_LEN, a, t); | ||
217 | } else { /* CLF format */ | ||
218 | char sign = (timz < 0 ? '-' : '+'); | ||
219 | |||
220 | if (timz < 0) { | ||
221 | timz = -timz; | ||
222 | } | ||
223 | strftime(tstr, MAX_STRING_LEN, "[%d/%b/%Y:%H:%M:%S ", t); | ||
224 | ap_snprintf(tstr + strlen(tstr), sizeof(tstr) - strlen(tstr), "%c%.2d%.2d]", sign, timz / 60, timz % 60); | ||
225 | } | ||
226 | |||
227 | return pstrdup(r->pool, tstr); | ||
228 | } | ||
229 | |||
230 | static const char *log_request_duration(request_rec * r, char *a) | ||
231 | { | ||
232 | char duration[22]; /* Long enough for 2^64 */ | ||
233 | |||
234 | ap_snprintf(duration, sizeof(duration), "%ld", time(NULL) - r->request_time); | ||
235 | return pstrdup(r->pool, duration); | ||
236 | } | ||
237 | |||
238 | static const char *log_virtual_host(request_rec * r, char *a) | ||
239 | { | ||
240 | return pstrdup(r->pool, r->server->server_hostname); | ||
241 | } | ||
242 | |||
243 | static const char *log_server_port(request_rec * r, char *a) | ||
244 | { | ||
245 | char portnum[22]; | ||
246 | |||
247 | ap_snprintf(portnum, sizeof(portnum), "%u", r->server->port); | ||
248 | return pstrdup(r->pool, portnum); | ||
249 | } | ||
250 | |||
251 | static const char *log_child_pid(request_rec * r, char *a) | ||
252 | { | ||
253 | char pidnum[22]; | ||
254 | ap_snprintf(pidnum, sizeof(pidnum), "%ld", (long) getpid()); | ||
255 | return pstrdup(r->pool, pidnum); | ||
256 | } | ||
257 | |||
258 | static const char *log_referer(request_rec * r, char *a) | ||
259 | { | ||
260 | const char *tempref; | ||
261 | |||
262 | tempref = table_get(r->headers_in, "Referer"); | ||
263 | if (!tempref) | ||
264 | { | ||
265 | return "-"; | ||
266 | } else { | ||
267 | return tempref; | ||
268 | } | ||
269 | |||
270 | } | ||
271 | |||
272 | static const char *log_agent(request_rec * r, char *a) | ||
273 | { | ||
274 | const char *tempag; | ||
275 | |||
276 | tempag = table_get(r->headers_in, "User-Agent"); | ||
277 | if (!tempag) | ||
278 | { | ||
279 | return "-"; | ||
280 | } else { | ||
281 | return tempag; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | const char *log_request_timestamp(request_rec * r, char *a) | ||
286 | { | ||
287 | char tstr[32]; | ||
288 | |||
289 | snprintf(tstr, 32, "%ld", time(NULL)); | ||
290 | return pstrdup(r->pool, tstr); | ||
291 | } | ||
292 | |||
293 | static const char *log_note(request_rec * r, char *a) | ||
294 | { | ||
295 | return table_get(r->notes, a); | ||
296 | } | ||
297 | |||
298 | static const char *log_env_var(request_rec * r, char *a) | ||
299 | { | ||
300 | return table_get(r->subprocess_env, a); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | struct log_mysql_item_list { | ||
305 | char ch; | ||
306 | item_key_func func; | ||
307 | const char *sql_field_name; | ||
308 | int want_orig_default; | ||
309 | int string_contents; | ||
310 | } log_mysql_item_keys[] = { | ||
311 | |||
312 | { 'h', log_remote_host, "remote_host", 0, 1 }, | ||
313 | { 'l', log_remote_logname, "remote_logname", 0, 1 }, | ||
314 | { 'u', log_remote_user, "remote_user", 0, 1 }, | ||
315 | { 't', log_request_time, "request_time", 0, 1 }, | ||
316 | { 'S', log_request_timestamp, "time_stamp", 0, 0 }, | ||
317 | { 'T', log_request_duration, "request_duration", 1, 0 }, | ||
318 | { 'r', log_request_line, "request_line", 1, 1 }, | ||
319 | { 'f', log_request_file, "request_file", 0, 1 }, | ||
320 | { 'U', log_request_uri, "request_uri", 1, 1 }, | ||
321 | { 's', log_status, "status", 1, 0 }, | ||
322 | { 'b', log_bytes_sent, "bytes_sent", 0, 0 }, | ||
323 | { 'i', log_header_in, "header_in", 0, 1 }, | ||
324 | { 'o', log_header_out, "header_out", 0, 1 }, | ||
325 | { 'n', log_note, "note", 0, 1 }, | ||
326 | { 'e', log_env_var, "env_var", 0, 1 }, | ||
327 | { 'v', log_virtual_host, "virtual_host", 0, 1 }, | ||
328 | { 'p', log_server_port, "server_port", 0, 0 }, | ||
329 | { 'P', log_child_pid, "child_pid", 0, 0 }, | ||
330 | { 'R', log_referer, "referer", 0, 1 }, | ||
331 | { 'A', log_agent, "agent", 0, 1 }, | ||
332 | { '\0'} | ||
333 | }; | ||
334 | |||
335 | |||
336 | /* Routine to escape 'dangerous' characters that would otherwise | ||
337 | * corrupt the INSERT string. | ||
338 | */ | ||
339 | const char *mysql_escape_log(const char *str, pool *p) | ||
340 | { | ||
341 | register int i = 0, j = 0; | ||
342 | int need_to_escape = 0; | ||
343 | |||
344 | if (!str) { | ||
345 | return NULL; | ||
346 | } | ||
347 | /* first find out if we need to escape */ | ||
348 | i = 0; | ||
349 | while (str[i]) { | ||
350 | if (str[i] != '\'' || str[i] != '\\' || str[i] != '\"') { | ||
351 | need_to_escape = 1; | ||
352 | break; | ||
353 | } | ||
354 | i++; | ||
355 | } | ||
356 | |||
357 | if (need_to_escape) { | ||
358 | char *tmp_str; | ||
359 | int length = strlen(str); | ||
360 | |||
361 | tmp_str = (char *) palloc(p, length * 2 + 1); /* worst case situation, which wouldn't be a pretty sight :) */ | ||
362 | if (!tmp_str) { | ||
363 | return str; | ||
364 | } | ||
365 | for (i = 0, j = 0; i < length; i++, j++) { | ||
366 | switch (str[i]) { | ||
367 | case '\'': | ||
368 | case '\"': | ||
369 | case '\\': | ||
370 | tmp_str[j] = '\\'; | ||
371 | j++; | ||
372 | default: | ||
373 | tmp_str[j] = str[i]; | ||
374 | } | ||
375 | } | ||
376 | tmp_str[j] = 0; | ||
377 | return tmp_str; | ||
378 | } else { | ||
379 | return str; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | |||
384 | void open_log_dblink() | ||
385 | { | ||
386 | if (mysql_log != NULL) { /* virtual database link shared with main server */ | ||
387 | return; | ||
388 | } | ||
389 | if (log_db_name) { /* open an SQL link */ | ||
390 | /* link to the MySQL database and register its cleanup!@$ */ | ||
391 | mysql_log = mysql_connect(&log_sql_server, db_host, db_user, db_pwd); | ||
392 | if (mysql_log) { /* link opened */ | ||
393 | if (mysql_select_db(mysql_log, log_db_name) != 0) { /* unable to select database */ | ||
394 | mysql_close(mysql_log); | ||
395 | mysql_log = NULL; | ||
396 | } | ||
397 | } | ||
398 | } | ||
399 | } | ||
400 | |||
401 | |||
402 | void *make_log_mysql_state(pool *p, server_rec * s) | ||
403 | { | ||
404 | log_mysql_state *cls = (log_mysql_state *) palloc(p, sizeof(log_mysql_state)); | ||
405 | |||
406 | cls->referer_table_name = cls->agent_table_name = cls->transfer_table_name = ""; | ||
407 | cls->referer_ignore_list = make_array(p, 1, sizeof(char *)); | ||
408 | cls->transfer_ignore_list = make_array(p, 1, sizeof(char *)); | ||
409 | cls->remhost_ignore_list = make_array(p, 1, sizeof(char *)); | ||
410 | cls->transfer_log_format = ""; | ||
411 | return (void *) cls; | ||
412 | } | ||
413 | |||
414 | const char *set_referer_log_mysql_table(cmd_parms * parms, void *dummy, char *arg) | ||
415 | { | ||
416 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
417 | &mysql_log_module); | ||
418 | |||
419 | cls->referer_table_name = arg; | ||
420 | return NULL; | ||
421 | } | ||
422 | |||
423 | |||
424 | const char *set_agent_log_mysql_table(cmd_parms * parms, void *dummy, char *arg) | ||
425 | { | ||
426 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
427 | &mysql_log_module); | ||
428 | |||
429 | cls->agent_table_name = arg; | ||
430 | return NULL; | ||
431 | } | ||
432 | |||
433 | |||
434 | const char *set_transfer_log_mysql_table(cmd_parms * parms, void *dummy, char *arg) | ||
435 | { | ||
436 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
437 | &mysql_log_module); | ||
438 | |||
439 | cls->transfer_table_name = arg; | ||
440 | return NULL; | ||
441 | } | ||
442 | |||
443 | |||
444 | const char *set_transfer_log_format(cmd_parms * parms, void *dummy, char *arg) | ||
445 | { | ||
446 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
447 | &mysql_log_module); | ||
448 | |||
449 | cls->transfer_log_format = arg; | ||
450 | return NULL; | ||
451 | } | ||
452 | |||
453 | |||
454 | const char *set_log_mysql_db(cmd_parms * parms, void *dummy, char *arg) | ||
455 | { | ||
456 | log_db_name = arg; | ||
457 | return NULL; | ||
458 | } | ||
459 | |||
460 | const char *set_log_mysql_info(cmd_parms * parms, void *dummy, char *host, char *user, char *pwd) | ||
461 | { | ||
462 | if (*host != '.') { | ||
463 | db_host = host; | ||
464 | } | ||
465 | if (*user != '.') { | ||
466 | db_user = user; | ||
467 | } | ||
468 | if (*pwd != '.') { | ||
469 | db_pwd = pwd; | ||
470 | } | ||
471 | return NULL; | ||
472 | } | ||
473 | |||
474 | |||
475 | const char *add_referer_mysql_ignore(cmd_parms * parms, void *dummy, char *arg) | ||
476 | { | ||
477 | char **addme; | ||
478 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
479 | &mysql_log_module); | ||
480 | |||
481 | addme = push_array(cls->referer_ignore_list); | ||
482 | *addme = pstrdup(cls->referer_ignore_list->pool, arg); | ||
483 | return NULL; | ||
484 | } | ||
485 | |||
486 | const char *add_transfer_mysql_ignore(cmd_parms * parms, void *dummy, char *arg) | ||
487 | { | ||
488 | char **addme; | ||
489 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
490 | &mysql_log_module); | ||
491 | |||
492 | addme = push_array(cls->transfer_ignore_list); | ||
493 | *addme = pstrdup(cls->transfer_ignore_list->pool, arg); | ||
494 | return NULL; | ||
495 | } | ||
496 | |||
497 | const char *add_remhost_mysql_ignore(cmd_parms * parms, void *dummy, char *arg) | ||
498 | { | ||
499 | char **addme; | ||
500 | log_mysql_state *cls = get_module_config(parms->server->module_config, | ||
501 | &mysql_log_module); | ||
502 | |||
503 | addme = push_array(cls->remhost_ignore_list); | ||
504 | *addme = pstrdup(cls->remhost_ignore_list->pool, arg); | ||
505 | return NULL; | ||
506 | } | ||
507 | |||
508 | command_rec log_mysql_cmds[] = { | ||
509 | {"RefererLogMySQLTable", set_referer_log_mysql_table, NULL, RSRC_CONF, TAKE1, | ||
510 | "the table of the referer log"} | ||
511 | , | ||
512 | {"AgentLogMySQLTable", set_agent_log_mysql_table, NULL, RSRC_CONF, TAKE1, | ||
513 | "the table of the agent log"} | ||
514 | , | ||
515 | {"TransferLogMySQLTable", set_transfer_log_mysql_table, NULL, RSRC_CONF, TAKE1, | ||
516 | "the table of the transfer log"} | ||
517 | , | ||
518 | {"TransferLogMySQLFormat", set_transfer_log_format, NULL, RSRC_CONF, TAKE1, | ||
519 | "specific format for the MySQL transfer log"} | ||
520 | , | ||
521 | {"RefererIgnore", add_referer_mysql_ignore, NULL, RSRC_CONF, ITERATE, | ||
522 | "referer hostnames to ignore"} | ||
523 | , | ||
524 | {"RequestIgnore", add_transfer_mysql_ignore, NULL, RSRC_CONF, ITERATE, | ||
525 | "transfer log URIs to ignore"} | ||
526 | , | ||
527 | {"RemhostIgnore", add_remhost_mysql_ignore, NULL, RSRC_CONF, ITERATE, | ||
528 | "transfer log remote hosts to ignore"} | ||
529 | , | ||
530 | {"LogMySQLDB", set_log_mysql_db, NULL, RSRC_CONF, TAKE1, | ||
531 | "the database of the referer log"} | ||
532 | , | ||
533 | {"LogMySQLInfo", set_log_mysql_info, NULL, RSRC_CONF, TAKE3, | ||
534 | "host, user and password for MySQL link"} | ||
535 | , | ||
536 | {NULL} | ||
537 | }; | ||
538 | |||
539 | |||
540 | void log_mysql_child(void *cmd) | ||
541 | { | ||
542 | /* Child process code for 'RefererLog "|..."'; | ||
543 | * may want a common framework for this, since I expect it will | ||
544 | * be common for other foo-loggers to want this sort of thing... | ||
545 | */ | ||
546 | |||
547 | cleanup_for_exec(); | ||
548 | signal(SIGHUP, SIG_IGN); | ||
549 | #ifdef __EMX__ | ||
550 | /* For OS/2 we need to use a '/' */ | ||
551 | execl(SHELL_PATH, SHELL_PATH, "/c", (char *) cmd, NULL); | ||
552 | #else | ||
553 | execl(SHELL_PATH, SHELL_PATH, "-c", (char *) cmd, NULL); | ||
554 | #endif | ||
555 | perror("execl"); | ||
556 | fprintf(stderr, "Exec of shell for logging failed!!!\n"); | ||
557 | exit(1); | ||
558 | } | ||
559 | |||
560 | |||
561 | int safe_mysql_query(request_rec * r, const char *query) | ||
562 | { | ||
563 | int error = 1; | ||
564 | struct timespec delay, remainder; | ||
565 | int ret; | ||
566 | char *str; | ||
567 | void (*handler) (int); | ||
568 | |||
569 | handler = signal(SIGPIPE, SIG_IGN); /* a failed mysql_query() may send a SIGPIPE */ | ||
570 | |||
571 | if ( !mysql_log || | ||
572 | ( | ||
573 | (error = mysql_query(mysql_log, query)) && | ||
574 | !strcasecmp(mysql_error(mysql_log), "MySQL server has gone away") | ||
575 | ) | ||
576 | ) | ||
577 | |||
578 | { /* We need to restart the server link */ | ||
579 | |||
580 | mysql_log = NULL; | ||
581 | log_error("MySQL: connection lost, attempting reconnect", r->server); | ||
582 | |||
583 | open_log_dblink(); | ||
584 | |||
585 | if (mysql_log == NULL) { /* unable to link */ | ||
586 | signal(SIGPIPE, handler); | ||
587 | log_error("MySQL: reconnect failed.", r->server); | ||
588 | return error; | ||
589 | } | ||
590 | |||
591 | log_error("MySQL: reconnect successful.", r->server); | ||
592 | error = mysql_query(mysql_log, query); | ||
593 | } | ||
594 | |||
595 | signal(SIGPIPE, handler); | ||
596 | |||
597 | if (error) { | ||
598 | /* Attempt a single re-try... First sleep for a tiny amount of time. */ | ||
599 | |||
600 | delay.tv_sec = 0; | ||
601 | delay.tv_nsec = 500000000; /* max is 999999999 (nine nines) */ | ||
602 | ret = nanosleep(&delay, &remainder); | ||
603 | if (ret && errno != EINTR) | ||
604 | perror("nanosleep"); | ||
605 | |||
606 | /* Now re-attempt */ | ||
607 | error = mysql_query(mysql_log,query); | ||
608 | |||
609 | if (error) { | ||
610 | str = pstrcat(r->pool, "MySQL query failed: ", query, NULL); | ||
611 | log_error(str, r->server); | ||
612 | str = pstrcat(r->pool, "MySQL failure reason: ", MYSQL_ERROR(mysql_log), NULL); | ||
613 | log_error(str, r->server); | ||
614 | } else { | ||
615 | log_error("MySQL: INSERT successful after a delayed retry.", r->server); | ||
616 | } | ||
617 | } | ||
618 | return error; | ||
619 | } | ||
620 | |||
621 | |||
622 | /* Routine to perform the actual construction and execution of the relevant | ||
623 | * INSERT statements. | ||
624 | */ | ||
625 | int log_mysql_transaction(request_rec * orig) | ||
626 | { | ||
627 | char **ptrptr, **ptrptr2; | ||
628 | log_mysql_state *cls = get_module_config(orig->server->module_config, | ||
629 | &mysql_log_module); | ||
630 | char *str; | ||
631 | const char *referer; | ||
632 | request_rec *r; | ||
633 | int retvalue = DECLINED; | ||
634 | int referer_needed, agent_needed, transfer_needed; | ||
635 | |||
636 | /* Are there configuration directives for these logs? For each found | ||
637 | * config directive that is found, mark that type as 'needed'. | ||
638 | */ | ||
639 | referer_needed = ((cls->referer_table_name[0] != '\0') ? 1 : 0); | ||
640 | agent_needed = ((cls->agent_table_name[0] != '\0') ? 1 : 0); | ||
641 | transfer_needed = ((cls->transfer_table_name[0] != '\0') ? 1 : 0); | ||
642 | |||
643 | if (!referer_needed && !agent_needed && !transfer_needed) { | ||
644 | return OK; | ||
645 | } | ||
646 | if (mysql_log == NULL) { /* mysql link not up, hopefully we can do something about it */ | ||
647 | open_log_dblink(); | ||
648 | if (mysql_log == NULL) { | ||
649 | return OK; | ||
650 | } | ||
651 | } | ||
652 | for (r = orig; r->next; r = r->next) { | ||
653 | continue; | ||
654 | } | ||
655 | |||
656 | /* Log the 'referer' to its own log if configured to do so. */ | ||
657 | if (referer_needed) { | ||
658 | retvalue = OK; | ||
659 | referer = table_get(orig->headers_in, "Referer"); | ||
660 | if (referer != NULL) { | ||
661 | |||
662 | /* The following is an upsetting mess of pointers, I'm sorry | ||
663 | Anyone with the motiviation and/or the time should feel free | ||
664 | to make this cleaner... */ | ||
665 | ptrptr2 = (char **) (cls->referer_ignore_list->elts + (cls->referer_ignore_list->nelts * cls->referer_ignore_list->elt_size)); | ||
666 | |||
667 | /* Go through each element of the ignore list and compare it to the | ||
668 | referer_host. If we get a match, return without logging */ | ||
669 | for (ptrptr = (char **) cls->referer_ignore_list->elts; ptrptr < ptrptr2; ptrptr = (char **) ((char *) ptrptr + cls->referer_ignore_list->elt_size)) { | ||
670 | if (strstr(referer, *ptrptr)) { | ||
671 | return OK; | ||
672 | } | ||
673 | } | ||
674 | str = pstrcat(orig->pool, "insert into ", cls->referer_table_name, " (referer,url,time_stamp) values ('", mysql_escape_log(referer, orig->pool), "','", mysql_escape_log(r->uri, orig->pool), "',unix_timestamp(now()) )", NULL); | ||
675 | safe_mysql_query(orig, str); | ||
676 | } | ||
677 | } | ||
678 | |||
679 | /* Log the 'user agent' to its own log if configured to do so. */ | ||
680 | if (agent_needed) { | ||
681 | const char *agent, *str; | ||
682 | retvalue = OK; | ||
683 | agent = table_get(orig->headers_in, "User-Agent"); | ||
684 | if (agent != NULL) { | ||
685 | str = pstrcat(orig->pool, "insert into ", cls->agent_table_name, "(agent,time_stamp) values ('", mysql_escape_log(agent, orig->pool), "',unix_timestamp(now()) )", NULL); | ||
686 | safe_mysql_query(orig, str); | ||
687 | } | ||
688 | } | ||
689 | |||
690 | /* Log the transfer to its own log if configured to do so. */ | ||
691 | if (transfer_needed) { | ||
692 | const char *thehost; | ||
693 | |||
694 | char *fields = "", *values = "", *query; | ||
695 | const char *formatted_item; | ||
696 | int i, j, length; | ||
697 | |||
698 | retvalue = OK; | ||
699 | |||
700 | |||
701 | /* The following is a stolen upsetting mess of pointers, I'm sorry | ||
702 | Anyone with the motiviation and/or the time should feel free | ||
703 | to make this cleaner, and while at it, clean the same mess at the RefererLog part :) */ | ||
704 | ptrptr2 = (char **) (cls->transfer_ignore_list->elts + (cls->transfer_ignore_list->nelts * cls->transfer_ignore_list->elt_size)); | ||
705 | |||
706 | /* Go through each element of the ignore list and compare it to the | ||
707 | request_uri. If we get a match, return without logging */ | ||
708 | if (r->uri) { | ||
709 | for (ptrptr = (char **) cls->transfer_ignore_list->elts; ptrptr < ptrptr2; ptrptr = (char **) ((char *) ptrptr + cls->transfer_ignore_list->elt_size)) { | ||
710 | if (strstr(r->uri, *ptrptr)) { | ||
711 | return retvalue; | ||
712 | } | ||
713 | } | ||
714 | } | ||
715 | |||
716 | |||
717 | /* Go through each element of the ignore list and compare it to the | ||
718 | remote host. If we get a match, return without logging */ | ||
719 | ptrptr2 = (char **) (cls->remhost_ignore_list->elts + (cls->remhost_ignore_list->nelts * cls->remhost_ignore_list->elt_size)); | ||
720 | thehost = get_remote_host(r->connection, r->per_dir_config, REMOTE_NAME); | ||
721 | if (thehost) { | ||
722 | for (ptrptr = (char **) cls->remhost_ignore_list->elts; ptrptr < ptrptr2; ptrptr = (char **) ((char *) ptrptr + cls->remhost_ignore_list->elt_size)) { | ||
723 | if (strstr(thehost, *ptrptr)) { | ||
724 | return retvalue; | ||
725 | } | ||
726 | } | ||
727 | } | ||
728 | |||
729 | |||
730 | if (cls->transfer_log_format[0] == '\0') { | ||
731 | /* If not specified by the user, use the default format */ | ||
732 | cls->transfer_log_format = "huSUsbTvRA"; | ||
733 | } | ||
734 | length = strlen(cls->transfer_log_format); | ||
735 | |||
736 | /* Iterate through the characters and set up the INSERT string according to | ||
737 | * what the user has configured. */ | ||
738 | for (i = 0; i < length; i++) { | ||
739 | j = 0; | ||
740 | while (log_mysql_item_keys[j].ch) { | ||
741 | if (log_mysql_item_keys[j].ch == cls->transfer_log_format[i]) { | ||
742 | /* Yes, this key is one of the configured keys */ | ||
743 | formatted_item = log_mysql_item_keys[j].func(log_mysql_item_keys[j].want_orig_default ? orig : r, ""); | ||
744 | if (!formatted_item) { | ||
745 | formatted_item = ""; | ||
746 | } else if (formatted_item[0] == '-' && formatted_item[1] == '\0' && !log_mysql_item_keys[j].string_contents) { | ||
747 | /* If apache tried to log a '-' character for a numeric field, convert that to a zero | ||
748 | * because the database expects an integer. */ | ||
749 | formatted_item = "0"; | ||
750 | } | ||
751 | fields = pstrcat(orig->pool, fields, (i > 0 ? "," : ""), log_mysql_item_keys[j].sql_field_name, NULL); | ||
752 | values = pstrcat(orig->pool, values, (i > 0 ? "," : ""), (log_mysql_item_keys[j].string_contents ? "'" : ""), mysql_escape_log(formatted_item, orig->pool), (log_mysql_item_keys[j].string_contents ? "'" : ""), NULL); | ||
753 | break; | ||
754 | } | ||
755 | j++; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | /* Set up the actual INSERT statement and execute it. */ | ||
760 | query = pstrcat(orig->pool, "insert into ", cls->transfer_table_name, " (", fields, ") values (", values, ")", NULL); | ||
761 | safe_mysql_query(orig, query); | ||
762 | |||
763 | } | ||
764 | return retvalue; | ||
765 | } | ||
766 | |||
767 | |||
768 | |||
769 | module mysql_log_module = { | ||
770 | STANDARD_MODULE_STUFF, | ||
771 | NULL, /* initializer */ | ||
772 | NULL, /* create per-dir config */ | ||
773 | NULL, /* merge per-dir config */ | ||
774 | make_log_mysql_state, /* server config */ | ||
775 | NULL, /* merge server config */ | ||
776 | log_mysql_cmds, /* command table */ | ||
777 | NULL, /* handlers */ | ||
778 | NULL, /* filename translation */ | ||
779 | NULL, /* check_user_id */ | ||
780 | NULL, /* check auth */ | ||
781 | NULL, /* check access */ | ||
782 | NULL, /* type_checker */ | ||
783 | NULL, /* fixups */ | ||
784 | log_mysql_transaction, /* logger */ | ||
785 | NULL /* header parser */ | ||
786 | }; | ||