summaryrefslogtreecommitdiffstatsabout
path: root/make_combined_log.pl
diff options
context:
space:
mode:
authorChristopher Powell <chris@grubbybaby.com>2001-11-28 05:26:53 (GMT)
committer Christopher Powell <chris@grubbybaby.com>2001-11-28 05:26:53 (GMT)
commit92d85f793b1a41bbbde1811004ae2708a47a44aa (patch)
tree69ecadaf47ae83197b8c92ff3f8b7c2002b15b19 /make_combined_log.pl
Initial revision1.09
Diffstat (limited to 'make_combined_log.pl')
-rwxr-xr-xmake_combined_log.pl133
1 files changed, 133 insertions, 0 deletions
diff --git a/make_combined_log.pl b/make_combined_log.pl
new file mode 100755
index 0000000..d375f3a
--- /dev/null
+++ b/make_combined_log.pl
@@ -0,0 +1,133 @@
1#!/usr/bin/perl
2
3# $Id: make_combined_log.pl,v 1.1 2001/11/28 05:26:54 helios Exp $
4#
5# make_combined_log.pl
6#
7# Usage: make_combined_log <days> <virtual host>
8#
9# This perl script extracts the httpd access data from a MySQL database
10# and formats it properly for parsing by 3rd-party log analysis tools.
11#
12# The script is intended to be run out by cron. Its commandline arguments tell
13# it how many days' worth of access records to extract, and which virtual_host
14# you are interested in (because many people log several virthosts to one MySQL
15# db.) This permits you to run it daily, weekly, every 9 days -- whatever you
16# decide.
17#
18# Note: By "days" I mean "chunks of 24 hours prior to the moment this script is
19# run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34
20# p.m. on the 11th.
21#
22# Known issues:
23# * Because GET and POST are not discriminated in the MySQL log, we'll just
24# assume that all requests are GETs. This should have negligible effect
25# on any analysis software. This could be remedied IF you stored the full
26# HTTP request in your database instead of just the URI, but that's going to
27# cost you a LOT of space really quickly...
28#
29# * Because this is somewhat of a quick hack it doesn't do the most robust
30# error checking in the world. Run it by hand to confirm your usage before
31# putting it in crontab.
32
33$| = 1;
34
35use DBI;
36
37# Remember, $#ARGV is parameters minus one...
38if ($#ARGV != 1) {
39 die "Incorrect usage, please read the perl source code for correct usage."
40}
41
42$days = $ARGV[0];
43$virthost = $ARGV[1];
44
45#
46# Set up the proper variables to permit database access
47#
48$serverName = "your.dbmachine.com";
49$serverPort = "3306";
50$serverUser = "someuser";
51$serverPass = "somepass";
52$serverTbl = "acc_log_tbl";
53$serverDb = "apache";
54
55#
56# Other constants
57#
58$st_tz = "-0800";
59$dt_tz = "-0700";
60$type = "GET";
61$http = "HTTP/1.1";
62
63$now = time();
64$start = $now - (86400 * $days);
65
66#
67# Connect and fetch the records
68#
69$dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
70if (not $dbh) {
71 die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)";
72}
73
74$records = $dbh->prepare("select remote_host,remote_user,request_uri,request_duration,time_stamp,status,bytes_sent,referer,agent from $serverTbl where virtual_host='$virthost' and time_stamp >= $start");
75$records->execute;
76if (not $records) {
77 die "No such table or the select returned no records."
78}
79
80#Right
81#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
82#ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
83
84#Bad
85#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
86#ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)"
87
88
89#
90# Pull out the data row by row and format it
91#
92while (@data = $records->fetchrow_array) {
93 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[4]);
94 $year=$year+1900;
95
96 # Create format for leading-zero formatting
97 if ($day < 10) { $day = "0$day"; }
98 if ($month < 10) { $month = "0$month"; }
99 if ($hour < 10) { $hour = "0$hour"; }
100 if ($min < 10) { $min = "0$min"; }
101 if ($sec < 10) { $sec = "0$sec"; }
102
103 # Convert numeric month to string month
104 for ($mon) {
105 if (/00/) { $mon = "Jan";}
106 elsif (/01/) { $mon = "Feb";}
107 elsif (/02/) { $mon = "Mar";}
108 elsif (/03/) { $mon = "Apr";}
109 elsif (/04/) { $mon = "May";}
110 elsif (/05/) { $mon = "Jun";}
111 elsif (/06/) { $mon = "Jul";}
112 elsif (/07/) { $mon = "Aug";}
113 elsif (/08/) { $mon = "Sep";}
114 elsif (/09/) { $mon = "Oct";}
115 elsif (/10/) { $mon = "Nov";}
116 elsif (/11/) { $mon = "Dec";}
117 }
118
119 # Create the output
120 print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec ";
121 if ($isdst) {
122 print "$dt_tz\] ";
123 } else {
124 print "$st_tz\] ";
125 }
126 print "\"$type $data[2] $http\" $data[5] $data[6] \"$data[7]\" \"$data[8]\"\n";
127}
128
129#
130# Done
131#
132$records->finish;
133