summaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rw-r--r--contrib/Makefile.in2
-rw-r--r--contrib/create_tables.sql51
-rw-r--r--contrib/make_combined_log.pl131
3 files changed, 183 insertions, 1 deletions
diff --git a/contrib/Makefile.in b/contrib/Makefile.in
index c40a711..7a020d9 100644
--- a/contrib/Makefile.in
+++ b/contrib/Makefile.in
@@ -3,7 +3,7 @@
3# Modify these top variables. 3# Modify these top variables.
4SUBDIRS = 4SUBDIRS =
5 5
6EXTRA_DIST = README mysql_import_combined_log.pl 6EXTRA_DIST = README create_tables.sql make_combined_log.pl
7 7
8#Don't modify anything below here 8#Don't modify anything below here
9 9
diff --git a/contrib/create_tables.sql b/contrib/create_tables.sql
new file mode 100644
index 0000000..0fe0f4b
--- /dev/null
+++ b/contrib/create_tables.sql
@@ -0,0 +1,51 @@
1create table access_log (
2 id char(19) ,
3 agent varchar(255) ,
4 bytes_sent int unsigned ,
5 child_pid smallint unsigned,
6 cookie varchar(255),
7 machine_id varchar(25),
8 request_file varchar(255),
9 referer varchar(255) ,
10 remote_host varchar(50) ,
11 remote_logname varchar(50) ,
12 remote_user varchar(50) ,
13 request_duration smallint unsigned ,
14 request_line varchar(255),
15 request_method varchar(10) ,
16 request_protocol varchar(10) ,
17 request_time char(28),
18 request_uri varchar(255),
19 request_args varchar(255),
20 server_port smallint unsigned,
21 ssl_cipher varchar(25),
22 ssl_keysize smallint unsigned,
23 ssl_maxkeysize smallint unsigned,
24 status smallint unsigned ,
25 time_stamp int unsigned ,
26 virtual_host varchar(255)
27);
28
29create table notes (
30 id char(19),
31 item varchar(80),
32 val varchar(80)
33);
34
35create table headers_in (
36 id char(19),
37 item varchar(80),
38 val varchar(80)
39);
40
41create table headers_out (
42 id char(19),
43 item varchar(80),
44 val varchar(80)
45);
46
47create table cookies (
48 id char(19),
49 item varchar(80),
50 val varchar(80)
51);
diff --git a/contrib/make_combined_log.pl b/contrib/make_combined_log.pl
new file mode 100644
index 0000000..00786de
--- /dev/null
+++ b/contrib/make_combined_log.pl
@@ -0,0 +1,131 @@
1#!/usr/bin/perl
2
3# $Id: make_combined_log.pl,v 1.1 2004/02/12 03:44:12 urkle Exp $
4#
5# make_combined_log.pl
6#
7# Usage: make_combined_log <days> <virtual host>
8#
9# This perl script extracts the httpd access data from a MySQL database
10# and formats it properly for parsing by 3rd-party log analysis tools.
11#
12# The script is intended to be run out by cron. Its commandline arguments tell
13# it how many days' worth of access records to extract, and which virtual_host
14# you are interested in (because many people log several virthosts to one MySQL
15# db.) This permits you to run it daily, weekly, every 9 days -- whatever you
16# decide.
17#
18# Note: By "days" I mean "chunks of 24 hours prior to the moment this script is
19# run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34
20# p.m. on the 11th.
21#
22# Known issues:
23# * Because GET and POST are not discriminated in the MySQL log, we'll just
24# assume that all requests are GETs. This should have negligible effect
25# on any analysis software. This could be remedied IF you stored the full
26# HTTP request in your database instead of just the URI, but that's going to
27# cost you a LOT of space really quickly...
28#
29# * Because this is somewhat of a quick hack it doesn't do the most robust
30# error checking in the world. Run it by hand to confirm your usage before
31# putting it in crontab.
32
33$| = 1;
34
35use DBI;
36
37# Remember, $#ARGV is parameters minus one...
38if ($#ARGV != 1) {
39 die "Incorrect usage, please read the perl source code for correct usage."
40}
41
42$days = $ARGV[0];
43$virthost = $ARGV[1];
44
45#
46# Set up the proper variables to permit database access
47#
48$serverName = "your.dbhost.com";
49$serverPort = "3306";
50$serverUser = "someuser";
51$serverPass = "somepass";
52$serverTbl = "acc_log_tbl";
53$serverDb = "apache";
54
55#
56# Other constants
57#
58$st_tz = "-0800";
59$dt_tz = "-0700";
60
61$now = time();
62$start = $now - (86400 * $days);
63
64#
65# Connect and fetch the records
66#
67$dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
68if (not $dbh) {
69 die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)";
70}
71
72$records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from `$serverTbl` where virtual_host='$virthost' and time_stamp >= $start order by time_stamp");
73$records->execute;
74if (not $records) {
75 die "No such table or the select returned no records."
76}
77
78#Right
79#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
80#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
81
82#Bad
83#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
84#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
85
86
87#
88# Pull out the data row by row and format it
89#
90while (@data = $records->fetchrow_array) {
91 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]);
92 $year=$year+1900;
93
94 # Create format for leading-zero formatting
95 if ($mday < 10) { $mday = "0$mday"; }
96 if ($mon < 10) { $mon = "0$mon"; }
97 if ($hour < 10) { $hour = "0$hour"; }
98 if ($min < 10) { $min = "0$min"; }
99 if ($sec < 10) { $sec = "0$sec"; }
100
101 # Convert numeric month to string month
102 for ($mon) {
103 if (/00/) { $mon = "Jan";}
104 elsif (/01/) { $mon = "Feb";}
105 elsif (/02/) { $mon = "Mar";}
106 elsif (/03/) { $mon = "Apr";}
107 elsif (/04/) { $mon = "May";}
108 elsif (/05/) { $mon = "Jun";}
109 elsif (/06/) { $mon = "Jul";}
110 elsif (/07/) { $mon = "Aug";}
111 elsif (/08/) { $mon = "Sep";}
112 elsif (/09/) { $mon = "Oct";}
113 elsif (/10/) { $mon = "Nov";}
114 elsif (/11/) { $mon = "Dec";}
115 }
116
117 # Create the output
118 print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec ";
119 if ($isdst) {
120 print "$dt_tz\] ";
121 } else {
122 print "$st_tz\] ";
123 }
124 print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n";
125}
126
127#
128# Done
129#
130$records->finish;
131