diff options
Diffstat (limited to 'make_combined_log.pl')
-rwxr-xr-x | make_combined_log.pl | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/make_combined_log.pl b/make_combined_log.pl new file mode 100755 index 0000000..d375f3a --- /dev/null +++ b/make_combined_log.pl | |||
@@ -0,0 +1,133 @@ | |||
1 | #!/usr/bin/perl | ||
2 | |||
3 | # $Id: make_combined_log.pl,v 1.1 2001/11/28 05:26:54 helios Exp $ | ||
4 | # | ||
5 | # make_combined_log.pl | ||
6 | # | ||
7 | # Usage: make_combined_log <days> <virtual host> | ||
8 | # | ||
9 | # This perl script extracts the httpd access data from a MySQL database | ||
10 | # and formats it properly for parsing by 3rd-party log analysis tools. | ||
11 | # | ||
12 | # The script is intended to be run out by cron. Its commandline arguments tell | ||
13 | # it how many days' worth of access records to extract, and which virtual_host | ||
14 | # you are interested in (because many people log several virthosts to one MySQL | ||
15 | # db.) This permits you to run it daily, weekly, every 9 days -- whatever you | ||
16 | # decide. | ||
17 | # | ||
18 | # Note: By "days" I mean "chunks of 24 hours prior to the moment this script is | ||
19 | # run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34 | ||
20 | # p.m. on the 11th. | ||
21 | # | ||
22 | # Known issues: | ||
23 | # * Because GET and POST are not discriminated in the MySQL log, we'll just | ||
24 | # assume that all requests are GETs. This should have negligible effect | ||
25 | # on any analysis software. This could be remedied IF you stored the full | ||
26 | # HTTP request in your database instead of just the URI, but that's going to | ||
27 | # cost you a LOT of space really quickly... | ||
28 | # | ||
29 | # * Because this is somewhat of a quick hack it doesn't do the most robust | ||
30 | # error checking in the world. Run it by hand to confirm your usage before | ||
31 | # putting it in crontab. | ||
32 | |||
33 | $| = 1; | ||
34 | |||
35 | use DBI; | ||
36 | |||
37 | # Remember, $#ARGV is parameters minus one... | ||
38 | if ($#ARGV != 1) { | ||
39 | die "Incorrect usage, please read the perl source code for correct usage." | ||
40 | } | ||
41 | |||
42 | $days = $ARGV[0]; | ||
43 | $virthost = $ARGV[1]; | ||
44 | |||
45 | # | ||
46 | # Set up the proper variables to permit database access | ||
47 | # | ||
48 | $serverName = "your.dbmachine.com"; | ||
49 | $serverPort = "3306"; | ||
50 | $serverUser = "someuser"; | ||
51 | $serverPass = "somepass"; | ||
52 | $serverTbl = "acc_log_tbl"; | ||
53 | $serverDb = "apache"; | ||
54 | |||
55 | # | ||
56 | # Other constants | ||
57 | # | ||
58 | $st_tz = "-0800"; | ||
59 | $dt_tz = "-0700"; | ||
60 | $type = "GET"; | ||
61 | $http = "HTTP/1.1"; | ||
62 | |||
63 | $now = time(); | ||
64 | $start = $now - (86400 * $days); | ||
65 | |||
66 | # | ||
67 | # Connect and fetch the records | ||
68 | # | ||
69 | $dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass); | ||
70 | if (not $dbh) { | ||
71 | die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)"; | ||
72 | } | ||
73 | |||
74 | $records = $dbh->prepare("select remote_host,remote_user,request_uri,request_duration,time_stamp,status,bytes_sent,referer,agent from $serverTbl where virtual_host='$virthost' and time_stamp >= $start"); | ||
75 | $records->execute; | ||
76 | if (not $records) { | ||
77 | die "No such table or the select returned no records." | ||
78 | } | ||
79 | |||
80 | #Right | ||
81 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
82 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
83 | |||
84 | #Bad | ||
85 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
86 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
87 | |||
88 | |||
89 | # | ||
90 | # Pull out the data row by row and format it | ||
91 | # | ||
92 | while (@data = $records->fetchrow_array) { | ||
93 | ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[4]); | ||
94 | $year=$year+1900; | ||
95 | |||
96 | # Create format for leading-zero formatting | ||
97 | if ($day < 10) { $day = "0$day"; } | ||
98 | if ($month < 10) { $month = "0$month"; } | ||
99 | if ($hour < 10) { $hour = "0$hour"; } | ||
100 | if ($min < 10) { $min = "0$min"; } | ||
101 | if ($sec < 10) { $sec = "0$sec"; } | ||
102 | |||
103 | # Convert numeric month to string month | ||
104 | for ($mon) { | ||
105 | if (/00/) { $mon = "Jan";} | ||
106 | elsif (/01/) { $mon = "Feb";} | ||
107 | elsif (/02/) { $mon = "Mar";} | ||
108 | elsif (/03/) { $mon = "Apr";} | ||
109 | elsif (/04/) { $mon = "May";} | ||
110 | elsif (/05/) { $mon = "Jun";} | ||
111 | elsif (/06/) { $mon = "Jul";} | ||
112 | elsif (/07/) { $mon = "Aug";} | ||
113 | elsif (/08/) { $mon = "Sep";} | ||
114 | elsif (/09/) { $mon = "Oct";} | ||
115 | elsif (/10/) { $mon = "Nov";} | ||
116 | elsif (/11/) { $mon = "Dec";} | ||
117 | } | ||
118 | |||
119 | # Create the output | ||
120 | print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec "; | ||
121 | if ($isdst) { | ||
122 | print "$dt_tz\] "; | ||
123 | } else { | ||
124 | print "$st_tz\] "; | ||
125 | } | ||
126 | print "\"$type $data[2] $http\" $data[5] $data[6] \"$data[7]\" \"$data[8]\"\n"; | ||
127 | } | ||
128 | |||
129 | # | ||
130 | # Done | ||
131 | # | ||
132 | $records->finish; | ||
133 | |||