1 | 1 | simandl | # |
2 | | | # $Id: HotSaNIClogparse.pm,v 1.3 2004/03/07 23:29:40 bernisys Exp $ |
3 | | | # |
4 | | | |
5 | | | package HotSaNIClogparse; |
6 | | | |
7 | | | use Time::Local; |
8 | | | |
9 | | | ($VERSION = '$Revision: 1.3 $') =~ s/.*(\d+\.\d+).*/$1/; |
10 | | | |
11 | | | my %datehash=(Jan=>0,Feb=>1,Mar=>2,Apr=>3,May=>4,Jun=>5,Jul=>6,Aug=>7,Sep=>8,Oct=>9,Nov=>10,Dec=>11); |
12 | | | |
13 | | | ###################################################################### |
14 | | | # |
15 | | | # finds logfiles that are not already parsed. |
16 | | | # |
17 | | | # USAGE: ($position,@files)=findlogs($dir,$names,$infofile); |
18 | | | # |
19 | | | # $dir the directory that contains the desired logfiles |
20 | | | # |
21 | | | # $names a regex that matches all desired logs |
22 | | | # |
23 | | | # $infofile is the path to a file where some information about |
24 | | | # the last parsing process has been stored. |
25 | | | # |
26 | | | # |
27 | | | # $position will be the position where the last parsing process stopped |
28 | | | # |
29 | | | # @files is an array containing all "newer" (i.e. unparsed) logs |
30 | | | # in hopefully the right order. This list can be parsed |
31 | | | # within a "foreach (@files) {...}" construct. |
32 | | | # |
33 | | | sub findlogs { |
34 | | | my $logdir = shift; |
35 | | | my $logfile = shift; |
36 | | | my $lastinfo = shift; |
37 | | | |
38 | | | my @files; |
39 | | | use File::Find; |
40 | | | File::Find::find( {wanted => sub { /^$logfile\z/s && push @files,$File::Find::name; } }, $logdir); |
41 | | | |
42 | | | my @sfiles = sort @files; |
43 | | | |
44 | | | my $firstline="*** no lastfile found ***"; |
45 | | | my $position=0; |
46 | | | if (-e $lastinfo) { |
47 | | | open FILE,$lastinfo; |
48 | | | $firstline=<FILE> || "*** empty lastfile ***"; |
49 | | | $position=<FILE> || 0; |
50 | | | close FILE; |
51 | | | } |
52 | | | |
53 | | | undef @files; |
54 | | | my $found=0; |
55 | | | while ($found==0) { |
56 | | | if (@sfiles) { |
57 | | | my $file=shift @sfiles; |
58 | | | open FILE,"$file"; |
59 | | | my $line=<FILE>; |
60 | | | close FILE; |
61 | | | unshift @files,$file; |
62 | | | if ($line eq $firstline) { $found=1; } |
63 | | | } |
64 | | | else { $found=-1 } |
65 | | | } |
66 | | | |
67 | | | if ($found < 0) { $position=0; } |
68 | | | |
69 | | | return ($position,@files); |
70 | | | } |
71 | | | |
72 | | | |
73 | | | |
74 | | | ###################################################################### |
75 | | | # |
76 | | | # parse a line of an apache logfile and return it splitted in a hash |
77 | | | # |
78 | | | sub parseline_apache { |
79 | | | my $line=shift || ""; |
80 | | | my %info; |
81 | | | |
82 | | | $line =~ /^(.*?) (.*?) (.*?) \[(.*?) (.*?)\] \"(.*?)\" ([0-9]+) ([0-9-]+) \"(.*?)\" \"(.*?)\"$/, |
83 | | | $info{IP_DN}=$1 || "", |
84 | | | $info{REQuser}=$2 || "", |
85 | | | $info{REQauthuser}=$3 || "", |
86 | | | $info{TS}=$4 || "", |
87 | | | $info{TSzone}=$5 || "", |
88 | | | $info{REQwhat}=$6 || "", |
89 | | | $info{RETcode}=$7 || "", |
90 | | | $info{RETsize}=$8 || "", |
91 | | | $info{REQreferer}=$9 || "", |
92 | | | $info{REQagent}=$10 || ""; |
93 | | | |
94 | | | $info{TS} =~ /([0-9]+)\/(\w+)\/([0-9]+)\:([0-9]+)\:([0-9]+)\:([0-9]+)/, |
95 | | | $info{TSday}=$1 || 1, |
96 | | | $info{TSmname}=$2 || "", |
97 | | | $info{TSyear}=$3 || 1900, |
98 | | | $info{TShour}=$4 || 0, |
99 | | | $info{TSminute}=$5 || 0, |
100 | | | $info{TSsecond}=$6 || 0; |
101 | | | |
102 | | | $info{TSmonth}=$datehash{$info{TSmname}} || 1; |
103 | | | |
104 | | | $info{TSzone} =~ /^([-+][0-9][0-9])/,$info{TSzdiff}=$1 || 0; |
105 | | | |
106 | | | $info{TSzdiff}*=1; |
107 | | | $info{TSyear}*=1; |
108 | | | $info{TSday}*=1; |
109 | | | $info{TShour}*=1; |
110 | | | $info{TSminute}*=1; |
111 | | | $info{TSsecond}*=1; |
112 | | | |
113 | | | $info{TStime} = Time::Local::timelocal($info{TSsecond},$info{TSminute},$info{TShour},$info{TSday},$info{TSmonth}-1,$info{TSyear}); |
114 | | | |
115 | | | return %info; |
116 | | | } |
117 | | | |
118 | | | |
119 | | | ###################################################################### |
120 | | | # |
121 | | | # parse a line of an amavisd-new logfile and return it splitted in a hash |
122 | | | # |
123 | | | # returns all elements as a hash. |
124 | | | # the "result" key contains one of these keywords: |
125 | | | # BANNED |
126 | | | # INFECTED |
127 | | | # mail_via_smtp |
128 | | | # Not-Delivered |
129 | | | # NOTICE |
130 | | | # Passed |
131 | | | # |
132 | | | sub parseline_amavis { |
133 | | | my $line=shift || ""; |
134 | | | my %info; |
135 | | | |
136 | | | #Mar 5 11:11:03 some.host.org amavisd-new[30127]: (30127-09) Passed, <sender@send.host.org> -> <recipient@receiving.host.org>, Message-ID: <MSG_ID>, Hits: - |
137 | | | |
138 | | | $line =~ /^(\w+)\s+(\d+) (\d+):(\d+):(\d+) (.*?) (.*\[\d+\]): \((\d+-\d+)\) (\w+)[,:]* (.*)$/, |
139 | | | $info{TSmname}=$1 || "", |
140 | | | $info{TSday}=$2 || 1, |
141 | | | $info{TShour}=$3 || 0, |
142 | | | $info{TSminute}=$4 || 0, |
143 | | | $info{TSsecond}=$5 || 0, |
144 | | | $info{host}=$6 || "", |
145 | | | $info{process}=$7 || "", |
146 | | | $info{ID}=$8 || "", |
147 | | | $info{result}=$9 || "", |
148 | | | $info{info}=$10 || ""; |
149 | | | |
150 | | | ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); |
151 | | | |
152 | | | $info{TSmonth}=$datehash{$info{TSmname}} || 1; |
153 | | | |
154 | | | $info{TSyear}=$year+1900; |
155 | | | $info{TSyear}-- if ($mon+1 < $info{TSmonth}); |
156 | | | |
157 | | | $info{TSday}*=1; |
158 | | | $info{TShour}*=1; |
159 | | | $info{TSminute}*=1; |
160 | | | $info{TSsecond}*=1; |
161 | | | |
162 | | | $info{TStime} = Time::Local::timelocal($info{TSsecond},$info{TSminute},$info{TShour},$info{TSday},$info{TSmonth}-1,$info{TSyear}); |
163 | | | |
164 | | | return %info; |
165 | | | } |
166 | | | |
167 | | | |
168 | | | ###################################################################### |
169 | | | # |
170 | | | # converts year, month, mday, hour, minute and second |
171 | | | # into a timestamp such as time() produces. |
172 | | | # |
173 | | | # How? heh, heh. Binary search. :) |
174 | | | # Try setting/unsetting bits starting from the high end until |
175 | | | # localtime($result) matches the input time. |
176 | | | # |
177 | | | sub time_to_seconds { |
178 | | | my $a = sprintf "%04d%02d%02d%02d%02d%02d",@_; |
179 | | | my ($l_sec,$l_min,$l_hour,$l_mday,$l_mon,$l_year,$l_wday,$l_yday,$l_isdst); |
180 | | | my ($result, $bit, $i); |
181 | | | |
182 | | | $result = 0; |
183 | | | |
184 | | | # For each bit in the value, starting with the highest bit... |
185 | | | # |
186 | | | $bit=1024*1024*1024; |
187 | | | for (my $n=30;$n>=0; $n--) { |
188 | | | $bit=1<<$n; |
189 | | | |
190 | | | $result += $bit; # Try setting it... |
191 | | | |
192 | | | # Get the time info for that time... |
193 | | | ($l_sec,$l_min,$l_hour,$l_mday,$l_mon,$l_year,$l_wday,$l_yday,$l_isdst) = localtime($result); |
194 | | | |
195 | | | $l_year+=1900; |
196 | | | $l_mon+=1; |
197 | | | |
198 | | | my $b = sprintf "%04d%02d%02d%02d%02d%02d",$l_year,$l_mon,$l_mday,$l_hour,$l_min,$l_sec; |
199 | | | |
200 | | | $result -= $bit if ($a lt $b); |
201 | | | } |
202 | | | |
203 | | | return($result); |
204 | | | } |
205 | | | |
206 | | | 1; |
207 | | | |