summaryrefslogtreecommitdiffstats
path: root/contrib/make_combined_log.pl
diff options
context:
space:
mode:
authorGravatar Edward Rudd 2004-02-12 03:44:12 +0000
committerGravatar Edward Rudd 2004-02-12 03:44:12 +0000
commitbdb5c0e5ea921d274b797f3b9b311b713eb0b792 (patch)
treebb70807189c0e14d5f5c87b57d345bca6ab98115 /contrib/make_combined_log.pl
parent6caf2429c3ff47657c8ce2579be1224063505f80 (diff)
moved create_tables.sql and make_combined_log.pl into contrib directory.
Added contrib directory to build system. Updated new maintainer information in AUTHORS,LICENSE, and manual.xml
Diffstat (limited to 'contrib/make_combined_log.pl')
-rw-r--r--contrib/make_combined_log.pl131
1 files changed, 131 insertions, 0 deletions
diff --git a/contrib/make_combined_log.pl b/contrib/make_combined_log.pl
new file mode 100644
index 0000000..00786de
--- /dev/null
+++ b/contrib/make_combined_log.pl
@@ -0,0 +1,131 @@
1#!/usr/bin/perl
2
3# $Id: make_combined_log.pl,v 1.1 2004/02/12 03:44:12 urkle Exp $
4#
5# make_combined_log.pl
6#
7# Usage: make_combined_log <days> <virtual host>
8#
9# This perl script extracts the httpd access data from a MySQL database
10# and formats it properly for parsing by 3rd-party log analysis tools.
11#
12# The script is intended to be run out by cron. Its commandline arguments tell
13# it how many days' worth of access records to extract, and which virtual_host
14# you are interested in (because many people log several virthosts to one MySQL
15# db.) This permits you to run it daily, weekly, every 9 days -- whatever you
16# decide.
17#
18# Note: By "days" I mean "chunks of 24 hours prior to the moment this script is
19# run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34
20# p.m. on the 11th.
21#
22# Known issues:
23# * Because GET and POST are not discriminated in the MySQL log, we'll just
24# assume that all requests are GETs. This should have negligible effect
25# on any analysis software. This could be remedied IF you stored the full
26# HTTP request in your database instead of just the URI, but that's going to
27# cost you a LOT of space really quickly...
28#
29# * Because this is somewhat of a quick hack it doesn't do the most robust
30# error checking in the world. Run it by hand to confirm your usage before
31# putting it in crontab.
32
33$| = 1;
34
35use DBI;
36
37# Remember, $#ARGV is parameters minus one...
38if ($#ARGV != 1) {
39 die "Incorrect usage, please read the perl source code for correct usage."
40}
41
42$days = $ARGV[0];
43$virthost = $ARGV[1];
44
45#
46# Set up the proper variables to permit database access
47#
48$serverName = "your.dbhost.com";
49$serverPort = "3306";
50$serverUser = "someuser";
51$serverPass = "somepass";
52$serverTbl = "acc_log_tbl";
53$serverDb = "apache";
54
55#
56# Other constants
57#
58$st_tz = "-0800";
59$dt_tz = "-0700";
60
61$now = time();
62$start = $now - (86400 * $days);
63
64#
65# Connect and fetch the records
66#
67$dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
68if (not $dbh) {
69 die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)";
70}
71
72$records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from `$serverTbl` where virtual_host='$virthost' and time_stamp >= $start order by time_stamp");
73$records->execute;
74if (not $records) {
75 die "No such table or the select returned no records."
76}
77
78#Right
79#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
80#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
81
82#Bad
83#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
84#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
85
86
87#
88# Pull out the data row by row and format it
89#
90while (@data = $records->fetchrow_array) {
91 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]);
92 $year=$year+1900;
93
94 # Create format for leading-zero formatting
95 if ($mday < 10) { $mday = "0$mday"; }
96 if ($mon < 10) { $mon = "0$mon"; }
97 if ($hour < 10) { $hour = "0$hour"; }
98 if ($min < 10) { $min = "0$min"; }
99 if ($sec < 10) { $sec = "0$sec"; }
100
101 # Convert numeric month to string month
102 for ($mon) {
103 if (/00/) { $mon = "Jan";}
104 elsif (/01/) { $mon = "Feb";}
105 elsif (/02/) { $mon = "Mar";}
106 elsif (/03/) { $mon = "Apr";}
107 elsif (/04/) { $mon = "May";}
108 elsif (/05/) { $mon = "Jun";}
109 elsif (/06/) { $mon = "Jul";}
110 elsif (/07/) { $mon = "Aug";}
111 elsif (/08/) { $mon = "Sep";}
112 elsif (/09/) { $mon = "Oct";}
113 elsif (/10/) { $mon = "Nov";}
114 elsif (/11/) { $mon = "Dec";}
115 }
116
117 # Create the output
118 print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec ";
119 if ($isdst) {
120 print "$dt_tz\] ";
121 } else {
122 print "$st_tz\] ";
123 }
124 print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n";
125}
126
127#
128# Done
129#
130$records->finish;
131