1 | 1 | simandl | # |
2 | | | # $Id: HotSaNIClogparse.pm,v 1.1 2004/02/08 20:42:05 bernisys Exp $ |
3 | | | # |
4 | | | |
5 | | | package HotSaNIClogparse; |
6 | | | |
7 | | | ($VERSION = '$Revision: 1.1 $') =~ s/.*(\d+\.\d+).*/$1/; |
8 | | | |
9 | | | my %datehash=(Jan=>0,Feb=>1,Mar=>2,Apr=>3,May=>4,Jun=>5,Jul=>6,Aug=>7,Sep=>8,Oct=>9,Nov=>10,Dec=>11); |
10 | | | |
11 | | | ###################################################################### |
12 | | | # |
13 | | | # finds logfiles that are not already parsed. |
14 | | | # |
15 | | | # USAGE: ($position,@files)=findlogs($dir,$names,$infofile); |
16 | | | # |
17 | | | # $dir the directory that contains the desired logfiles |
18 | | | # |
19 | | | # $names a regex that matches all desired logs |
20 | | | # |
21 | | | # $infofile is the path to a file where some information about |
22 | | | # the last parsing process has been stored. |
23 | | | # |
24 | | | # |
25 | | | # $position will be the position where the last parsing process stopped |
26 | | | # |
27 | | | # @files is an array containing all "newer" (i.e. unparsed) logs |
28 | | | # in hopefully the right order. This list can be parsed |
29 | | | # within a "foreach (@files) {...}" construct. |
30 | | | # |
31 | | | sub findlogs { |
32 | | | my $logdir = shift; |
33 | | | my $logfile = shift; |
34 | | | my $lastinfo = shift; |
35 | | | |
36 | | | my @files; |
37 | | | use File::Find; |
38 | | | File::Find::find( {wanted => sub { /^$logfile\z/s && push @files,$File::Find::name; } }, $logdir); |
39 | | | |
40 | | | my @sfiles = sort @files; |
41 | | | |
42 | | | my $firstline="*** no lastfile found ***"; |
43 | | | my $position=0; |
44 | | | if (-e $lastinfo) { |
45 | | | open FILE,$lastinfo; |
46 | | | $firstline=<FILE> || "*** empty lastfile ***"; |
47 | | | $position=<FILE> || 0; |
48 | | | close FILE; |
49 | | | } |
50 | | | |
51 | | | undef @files; |
52 | | | my $found=0; |
53 | | | while ($found==0) { |
54 | | | if (@sfiles) { |
55 | | | my $file=shift @sfiles; |
56 | | | open FILE,"$file"; |
57 | | | my $line=<FILE>; |
58 | | | close FILE; |
59 | | | unshift @files,$file; |
60 | | | if ($line eq $firstline) { $found=1; } |
61 | | | } |
62 | | | else { $found=-1 } |
63 | | | } |
64 | | | |
65 | | | if ($found < 0) { $position=0; } |
66 | | | |
67 | | | return ($position,@files); |
68 | | | } |
69 | | | |
70 | | | |
71 | | | |
72 | | | ###################################################################### |
73 | | | # |
74 | | | # parse a line of an apache logfile and return it splitted in a hash |
75 | | | # |
76 | | | sub parseline_apache { |
77 | | | my $line=shift || ""; |
78 | | | my %info; |
79 | | | |
80 | | | $line =~ /^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (.*?) (.*?) \[(.*?) (.*?)\] \"(\w+) (.*?) (.*?)\" ([0-9]+) ([0-9]+) \"(.*?)\" \"(.*?)\"$/, $info{IP}=$1,$info{REQuser}=$2,$info{REQauthuser}=$3,$info{TS}=$4,$info{TSzone}=$5,$info{REQtype}=$6,$info{REQwhat}=$7,$info{REQformat}=$8,$info{RETcode}=$9,$info{RETsize}=$10,$info{REQreferer}=$11,$info{REQagent}=$12; |
81 | | | |
82 | | | $info{TS} =~ /([0-9]+)\/(\w+)\/([0-9]+)\:([0-9]+)\:([0-9]+)\:([0-9]+)/,$info{TSday}=$1*1,$info{TSmname}=$2,$info{TSyear}=$3,$info{TShour}=$4*1,$info{TSminute}=$5*1,$info{TSsecond}=$6*1; |
83 | | | $info{TSmonth}=$datehash{$info{TSmname}}; |
84 | | | |
85 | | | $info{TSzone} =~ /^(.)([0-9][0-9])/,$info{TSzdiff}=$1.$2*1; |
86 | | | |
87 | | | $info{TStime} = time_to_seconds($info{TSyear},$info{TSmonth},$info{TSday},$info{TShour},$info{TSminute},$info{TSsecond}); |
88 | | | |
89 | | | return %info; |
90 | | | } |
91 | | | |
92 | | | |
93 | | | ###################################################################### |
94 | | | # |
95 | | | # converts year, month, mday, hour, minute and second |
96 | | | # into a timestamp such as time() produces. |
97 | | | # |
98 | | | # How? heh, heh. Binary search. :) |
99 | | | # Try setting/unsetting bits starting from the high end until |
100 | | | # localtime($result) matches the input time. |
101 | | | # |
102 | | | sub time_to_seconds { |
103 | | | my $a = sprintf "%04d%02d%02d%02d%02d%02d",@_; |
104 | | | my ($l_sec,$l_min,$l_hour,$l_mday,$l_mon,$l_year,$l_wday,$l_yday,$l_isdst); |
105 | | | my ($result, $bit, $i); |
106 | | | |
107 | | | $result = 0; |
108 | | | |
109 | | | # For each bit in the value, starting with the highest bit... |
110 | | | # |
111 | | | $bit=1024*1024*1024; |
112 | | | for (my $n=30;$n>=0; $n--) { |
113 | | | $bit=1<<$n; |
114 | | | |
115 | | | $result += $bit; # Try setting it... |
116 | | | |
117 | | | # Get the time info for that time... |
118 | | | ($l_sec,$l_min,$l_hour,$l_mday,$l_mon,$l_year,$l_wday,$l_yday,$l_isdst) = localtime($result); |
119 | | | |
120 | | | $l_year+=1900; |
121 | | | $l_mon+=1; |
122 | | | |
123 | | | my $b = sprintf "%04d%02d%02d%02d%02d%02d",$l_year,$l_mon,$l_mday,$l_hour,$l_min,$l_sec; |
124 | | | |
125 | | | $result -= $bit if ($a lt $b); |
126 | | | } |
127 | | | |
128 | | | return($result); |
129 | | | } |
130 | | | |
131 | | | 1; |
132 | | | |