diff options
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/Makefile.in | 2 | ||||
-rw-r--r-- | contrib/create_tables.sql | 51 | ||||
-rw-r--r-- | contrib/make_combined_log.pl | 131 |
3 files changed, 183 insertions, 1 deletions
diff --git a/contrib/Makefile.in b/contrib/Makefile.in index c40a711..7a020d9 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in | |||
@@ -3,7 +3,7 @@ | |||
3 | # Modify these top variables. | 3 | # Modify these top variables. |
4 | SUBDIRS = | 4 | SUBDIRS = |
5 | 5 | ||
6 | EXTRA_DIST = README mysql_import_combined_log.pl | 6 | EXTRA_DIST = README create_tables.sql make_combined_log.pl |
7 | 7 | ||
8 | #Don't modify anything below here | 8 | #Don't modify anything below here |
9 | 9 | ||
diff --git a/contrib/create_tables.sql b/contrib/create_tables.sql new file mode 100644 index 0000000..0fe0f4b --- /dev/null +++ b/contrib/create_tables.sql | |||
@@ -0,0 +1,51 @@ | |||
1 | create table access_log ( | ||
2 | id char(19) , | ||
3 | agent varchar(255) , | ||
4 | bytes_sent int unsigned , | ||
5 | child_pid smallint unsigned, | ||
6 | cookie varchar(255), | ||
7 | machine_id varchar(25), | ||
8 | request_file varchar(255), | ||
9 | referer varchar(255) , | ||
10 | remote_host varchar(50) , | ||
11 | remote_logname varchar(50) , | ||
12 | remote_user varchar(50) , | ||
13 | request_duration smallint unsigned , | ||
14 | request_line varchar(255), | ||
15 | request_method varchar(10) , | ||
16 | request_protocol varchar(10) , | ||
17 | request_time char(28), | ||
18 | request_uri varchar(255), | ||
19 | request_args varchar(255), | ||
20 | server_port smallint unsigned, | ||
21 | ssl_cipher varchar(25), | ||
22 | ssl_keysize smallint unsigned, | ||
23 | ssl_maxkeysize smallint unsigned, | ||
24 | status smallint unsigned , | ||
25 | time_stamp int unsigned , | ||
26 | virtual_host varchar(255) | ||
27 | ); | ||
28 | |||
29 | create table notes ( | ||
30 | id char(19), | ||
31 | item varchar(80), | ||
32 | val varchar(80) | ||
33 | ); | ||
34 | |||
35 | create table headers_in ( | ||
36 | id char(19), | ||
37 | item varchar(80), | ||
38 | val varchar(80) | ||
39 | ); | ||
40 | |||
41 | create table headers_out ( | ||
42 | id char(19), | ||
43 | item varchar(80), | ||
44 | val varchar(80) | ||
45 | ); | ||
46 | |||
47 | create table cookies ( | ||
48 | id char(19), | ||
49 | item varchar(80), | ||
50 | val varchar(80) | ||
51 | ); | ||
diff --git a/contrib/make_combined_log.pl b/contrib/make_combined_log.pl new file mode 100644 index 0000000..00786de --- /dev/null +++ b/contrib/make_combined_log.pl | |||
@@ -0,0 +1,131 @@ | |||
1 | #!/usr/bin/perl | ||
2 | |||
3 | # $Id: make_combined_log.pl,v 1.1 2004/02/12 03:44:12 urkle Exp $ | ||
4 | # | ||
5 | # make_combined_log.pl | ||
6 | # | ||
7 | # Usage: make_combined_log <days> <virtual host> | ||
8 | # | ||
9 | # This perl script extracts the httpd access data from a MySQL database | ||
10 | # and formats it properly for parsing by 3rd-party log analysis tools. | ||
11 | # | ||
12 | # The script is intended to be run out by cron. Its commandline arguments tell | ||
13 | # it how many days' worth of access records to extract, and which virtual_host | ||
14 | # you are interested in (because many people log several virthosts to one MySQL | ||
15 | # db.) This permits you to run it daily, weekly, every 9 days -- whatever you | ||
16 | # decide. | ||
17 | # | ||
18 | # Note: By "days" I mean "chunks of 24 hours prior to the moment this script is | ||
19 | # run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34 | ||
20 | # p.m. on the 11th. | ||
21 | # | ||
22 | # Known issues: | ||
23 | # * Because GET and POST are not discriminated in the MySQL log, we'll just | ||
24 | # assume that all requests are GETs. This should have negligible effect | ||
25 | # on any analysis software. This could be remedied IF you stored the full | ||
26 | # HTTP request in your database instead of just the URI, but that's going to | ||
27 | # cost you a LOT of space really quickly... | ||
28 | # | ||
29 | # * Because this is somewhat of a quick hack it doesn't do the most robust | ||
30 | # error checking in the world. Run it by hand to confirm your usage before | ||
31 | # putting it in crontab. | ||
32 | |||
33 | $| = 1; | ||
34 | |||
35 | use DBI; | ||
36 | |||
37 | # Remember, $#ARGV is parameters minus one... | ||
38 | if ($#ARGV != 1) { | ||
39 | die "Incorrect usage, please read the perl source code for correct usage." | ||
40 | } | ||
41 | |||
42 | $days = $ARGV[0]; | ||
43 | $virthost = $ARGV[1]; | ||
44 | |||
45 | # | ||
46 | # Set up the proper variables to permit database access | ||
47 | # | ||
48 | $serverName = "your.dbhost.com"; | ||
49 | $serverPort = "3306"; | ||
50 | $serverUser = "someuser"; | ||
51 | $serverPass = "somepass"; | ||
52 | $serverTbl = "acc_log_tbl"; | ||
53 | $serverDb = "apache"; | ||
54 | |||
55 | # | ||
56 | # Other constants | ||
57 | # | ||
58 | $st_tz = "-0800"; | ||
59 | $dt_tz = "-0700"; | ||
60 | |||
61 | $now = time(); | ||
62 | $start = $now - (86400 * $days); | ||
63 | |||
64 | # | ||
65 | # Connect and fetch the records | ||
66 | # | ||
67 | $dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass); | ||
68 | if (not $dbh) { | ||
69 | die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)"; | ||
70 | } | ||
71 | |||
72 | $records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from `$serverTbl` where virtual_host='$virthost' and time_stamp >= $start order by time_stamp"); | ||
73 | $records->execute; | ||
74 | if (not $records) { | ||
75 | die "No such table or the select returned no records." | ||
76 | } | ||
77 | |||
78 | #Right | ||
79 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
80 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
81 | |||
82 | #Bad | ||
83 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
84 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
85 | |||
86 | |||
87 | # | ||
88 | # Pull out the data row by row and format it | ||
89 | # | ||
90 | while (@data = $records->fetchrow_array) { | ||
91 | ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]); | ||
92 | $year=$year+1900; | ||
93 | |||
94 | # Create format for leading-zero formatting | ||
95 | if ($mday < 10) { $mday = "0$mday"; } | ||
96 | if ($mon < 10) { $mon = "0$mon"; } | ||
97 | if ($hour < 10) { $hour = "0$hour"; } | ||
98 | if ($min < 10) { $min = "0$min"; } | ||
99 | if ($sec < 10) { $sec = "0$sec"; } | ||
100 | |||
101 | # Convert numeric month to string month | ||
102 | for ($mon) { | ||
103 | if (/00/) { $mon = "Jan";} | ||
104 | elsif (/01/) { $mon = "Feb";} | ||
105 | elsif (/02/) { $mon = "Mar";} | ||
106 | elsif (/03/) { $mon = "Apr";} | ||
107 | elsif (/04/) { $mon = "May";} | ||
108 | elsif (/05/) { $mon = "Jun";} | ||
109 | elsif (/06/) { $mon = "Jul";} | ||
110 | elsif (/07/) { $mon = "Aug";} | ||
111 | elsif (/08/) { $mon = "Sep";} | ||
112 | elsif (/09/) { $mon = "Oct";} | ||
113 | elsif (/10/) { $mon = "Nov";} | ||
114 | elsif (/11/) { $mon = "Dec";} | ||
115 | } | ||
116 | |||
117 | # Create the output | ||
118 | print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec "; | ||
119 | if ($isdst) { | ||
120 | print "$dt_tz\] "; | ||
121 | } else { | ||
122 | print "$st_tz\] "; | ||
123 | } | ||
124 | print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n"; | ||
125 | } | ||
126 | |||
127 | # | ||
128 | # Done | ||
129 | # | ||
130 | $records->finish; | ||
131 | |||