diff options
| author | 2004-02-12 03:44:12 +0000 | |
|---|---|---|
| committer | 2004-02-12 03:44:12 +0000 | |
| commit | bdb5c0e5ea921d274b797f3b9b311b713eb0b792 (patch) | |
| tree | bb70807189c0e14d5f5c87b57d345bca6ab98115 /contrib | |
| parent | 6caf2429c3ff47657c8ce2579be1224063505f80 (diff) | |
moved create_tables.sql and make_combined_log.pl into contrib directory.
Added contrib directory to build system.
Updated new maintainer information in AUTHORS,LICENSE, and manual.xml
Diffstat (limited to 'contrib')
| -rw-r--r-- | contrib/Makefile.in | 2 | ||||
| -rw-r--r-- | contrib/create_tables.sql | 51 | ||||
| -rw-r--r-- | contrib/make_combined_log.pl | 131 |
3 files changed, 183 insertions, 1 deletions
diff --git a/contrib/Makefile.in b/contrib/Makefile.in index c40a711..7a020d9 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | # Modify these top variables. | 3 | # Modify these top variables. |
| 4 | SUBDIRS = | 4 | SUBDIRS = |
| 5 | 5 | ||
| 6 | EXTRA_DIST = README mysql_import_combined_log.pl | 6 | EXTRA_DIST = README create_tables.sql make_combined_log.pl |
| 7 | 7 | ||
| 8 | #Don't modify anything below here | 8 | #Don't modify anything below here |
| 9 | 9 | ||
diff --git a/contrib/create_tables.sql b/contrib/create_tables.sql new file mode 100644 index 0000000..0fe0f4b --- /dev/null +++ b/contrib/create_tables.sql | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | create table access_log ( | ||
| 2 | id char(19) , | ||
| 3 | agent varchar(255) , | ||
| 4 | bytes_sent int unsigned , | ||
| 5 | child_pid smallint unsigned, | ||
| 6 | cookie varchar(255), | ||
| 7 | machine_id varchar(25), | ||
| 8 | request_file varchar(255), | ||
| 9 | referer varchar(255) , | ||
| 10 | remote_host varchar(50) , | ||
| 11 | remote_logname varchar(50) , | ||
| 12 | remote_user varchar(50) , | ||
| 13 | request_duration smallint unsigned , | ||
| 14 | request_line varchar(255), | ||
| 15 | request_method varchar(10) , | ||
| 16 | request_protocol varchar(10) , | ||
| 17 | request_time char(28), | ||
| 18 | request_uri varchar(255), | ||
| 19 | request_args varchar(255), | ||
| 20 | server_port smallint unsigned, | ||
| 21 | ssl_cipher varchar(25), | ||
| 22 | ssl_keysize smallint unsigned, | ||
| 23 | ssl_maxkeysize smallint unsigned, | ||
| 24 | status smallint unsigned , | ||
| 25 | time_stamp int unsigned , | ||
| 26 | virtual_host varchar(255) | ||
| 27 | ); | ||
| 28 | |||
| 29 | create table notes ( | ||
| 30 | id char(19), | ||
| 31 | item varchar(80), | ||
| 32 | val varchar(80) | ||
| 33 | ); | ||
| 34 | |||
| 35 | create table headers_in ( | ||
| 36 | id char(19), | ||
| 37 | item varchar(80), | ||
| 38 | val varchar(80) | ||
| 39 | ); | ||
| 40 | |||
| 41 | create table headers_out ( | ||
| 42 | id char(19), | ||
| 43 | item varchar(80), | ||
| 44 | val varchar(80) | ||
| 45 | ); | ||
| 46 | |||
| 47 | create table cookies ( | ||
| 48 | id char(19), | ||
| 49 | item varchar(80), | ||
| 50 | val varchar(80) | ||
| 51 | ); | ||
diff --git a/contrib/make_combined_log.pl b/contrib/make_combined_log.pl new file mode 100644 index 0000000..00786de --- /dev/null +++ b/contrib/make_combined_log.pl | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | #!/usr/bin/perl | ||
| 2 | |||
| 3 | # $Id: make_combined_log.pl,v 1.1 2004/02/12 03:44:12 urkle Exp $ | ||
| 4 | # | ||
| 5 | # make_combined_log.pl | ||
| 6 | # | ||
| 7 | # Usage: make_combined_log <days> <virtual host> | ||
| 8 | # | ||
| 9 | # This perl script extracts the httpd access data from a MySQL database | ||
| 10 | # and formats it properly for parsing by 3rd-party log analysis tools. | ||
| 11 | # | ||
| 12 | # The script is intended to be run out by cron. Its commandline arguments tell | ||
| 13 | # it how many days' worth of access records to extract, and which virtual_host | ||
| 14 | # you are interested in (because many people log several virthosts to one MySQL | ||
| 15 | # db.) This permits you to run it daily, weekly, every 9 days -- whatever you | ||
| 16 | # decide. | ||
| 17 | # | ||
| 18 | # Note: By "days" I mean "chunks of 24 hours prior to the moment this script is | ||
| 19 | # run." So if you run it at 4:34 p.m. on the 12th, it will go back through 4:34 | ||
| 20 | # p.m. on the 11th. | ||
| 21 | # | ||
| 22 | # Known issues: | ||
| 23 | # * Because GET and POST are not discriminated in the MySQL log, we'll just | ||
| 24 | # assume that all requests are GETs. This should have negligible effect | ||
| 25 | # on any analysis software. This could be remedied IF you stored the full | ||
| 26 | # HTTP request in your database instead of just the URI, but that's going to | ||
| 27 | # cost you a LOT of space really quickly... | ||
| 28 | # | ||
| 29 | # * Because this is somewhat of a quick hack it doesn't do the most robust | ||
| 30 | # error checking in the world. Run it by hand to confirm your usage before | ||
| 31 | # putting it in crontab. | ||
| 32 | |||
| 33 | $| = 1; | ||
| 34 | |||
| 35 | use DBI; | ||
| 36 | |||
| 37 | # Remember, $#ARGV is parameters minus one... | ||
| 38 | if ($#ARGV != 1) { | ||
| 39 | die "Incorrect usage, please read the perl source code for correct usage." | ||
| 40 | } | ||
| 41 | |||
| 42 | $days = $ARGV[0]; | ||
| 43 | $virthost = $ARGV[1]; | ||
| 44 | |||
| 45 | # | ||
| 46 | # Set up the proper variables to permit database access | ||
| 47 | # | ||
| 48 | $serverName = "your.dbhost.com"; | ||
| 49 | $serverPort = "3306"; | ||
| 50 | $serverUser = "someuser"; | ||
| 51 | $serverPass = "somepass"; | ||
| 52 | $serverTbl = "acc_log_tbl"; | ||
| 53 | $serverDb = "apache"; | ||
| 54 | |||
| 55 | # | ||
| 56 | # Other constants | ||
| 57 | # | ||
| 58 | $st_tz = "-0800"; | ||
| 59 | $dt_tz = "-0700"; | ||
| 60 | |||
| 61 | $now = time(); | ||
| 62 | $start = $now - (86400 * $days); | ||
| 63 | |||
| 64 | # | ||
| 65 | # Connect and fetch the records | ||
| 66 | # | ||
| 67 | $dbh = DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass); | ||
| 68 | if (not $dbh) { | ||
| 69 | die "Unable to connect to the database. Please check your connection variables. (Bad password? Incorrect perms?)"; | ||
| 70 | } | ||
| 71 | |||
| 72 | $records = $dbh->prepare("select remote_host,remote_user,request_uri,time_stamp,status,bytes_sent,referer,agent,request_method,request_protocol from `$serverTbl` where virtual_host='$virthost' and time_stamp >= $start order by time_stamp"); | ||
| 73 | $records->execute; | ||
| 74 | if (not $records) { | ||
| 75 | die "No such table or the select returned no records." | ||
| 76 | } | ||
| 77 | |||
| 78 | #Right | ||
| 79 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
| 80 | #ariston.netcraft.com - - [14/Nov/2001:05:13:39 -0800] "GET / HTTP/1.0" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
| 81 | |||
| 82 | #Bad | ||
| 83 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
| 84 | #ariston.netcraft.com - - [2001-11-14 05:13:39 -0800] "GET / HTTP/1.1" 200 502 "-" "Mozilla/4.08 [en] (Win98; I)" | ||
| 85 | |||
| 86 | |||
| 87 | # | ||
| 88 | # Pull out the data row by row and format it | ||
| 89 | # | ||
| 90 | while (@data = $records->fetchrow_array) { | ||
| 91 | ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($data[3]); | ||
| 92 | $year=$year+1900; | ||
| 93 | |||
| 94 | # Create format for leading-zero formatting | ||
| 95 | if ($mday < 10) { $mday = "0$mday"; } | ||
| 96 | if ($mon < 10) { $mon = "0$mon"; } | ||
| 97 | if ($hour < 10) { $hour = "0$hour"; } | ||
| 98 | if ($min < 10) { $min = "0$min"; } | ||
| 99 | if ($sec < 10) { $sec = "0$sec"; } | ||
| 100 | |||
| 101 | # Convert numeric month to string month | ||
| 102 | for ($mon) { | ||
| 103 | if (/00/) { $mon = "Jan";} | ||
| 104 | elsif (/01/) { $mon = "Feb";} | ||
| 105 | elsif (/02/) { $mon = "Mar";} | ||
| 106 | elsif (/03/) { $mon = "Apr";} | ||
| 107 | elsif (/04/) { $mon = "May";} | ||
| 108 | elsif (/05/) { $mon = "Jun";} | ||
| 109 | elsif (/06/) { $mon = "Jul";} | ||
| 110 | elsif (/07/) { $mon = "Aug";} | ||
| 111 | elsif (/08/) { $mon = "Sep";} | ||
| 112 | elsif (/09/) { $mon = "Oct";} | ||
| 113 | elsif (/10/) { $mon = "Nov";} | ||
| 114 | elsif (/11/) { $mon = "Dec";} | ||
| 115 | } | ||
| 116 | |||
| 117 | # Create the output | ||
| 118 | print "$data[0] $data[1] - [$mday/$mon/$year:$hour:$min:$sec "; | ||
| 119 | if ($isdst) { | ||
| 120 | print "$dt_tz\] "; | ||
| 121 | } else { | ||
| 122 | print "$st_tz\] "; | ||
| 123 | } | ||
| 124 | print "\"$data[8] $data[2] $data[9]\" $data[4] $data[5] \"$data[6]\" \"$data[7]\"\n"; | ||
| 125 | } | ||
| 126 | |||
| 127 | # | ||
| 128 | # Done | ||
| 129 | # | ||
| 130 | $records->finish; | ||
| 131 | |||
