#!/usr/bin/perl # # analyses a set of mboxes and figures out frequencies and average precedences # of each host/IPaddr range in the Received headers. This allows us to track # frequent relayers, or forged hostnames. "precedence" BTW is the position of # the Received header in the list, normalized between 0..100; higher means # nearer to the recipient. foreach my $file (@ARGV) { open (IN, "<$file") or warn "cannot read $file"; domail(); close IN; } report(); exit; sub domail { my $rcvd = ''; while () { nextline: /^$/ and last; /^Received: / or next; $rcvd .= $_; nextrcvdline: $_ = ; if (/^\s+/) { $rcvd .= $_; goto nextrcvdline; } else { goto nextline; } } $rcvd =~ s/\s+/ /gs; study $rcvd; my $host_precedence = 0; my @hits = (); while ($rcvd =~ /\b (?: (?:from|by)\s([-_+a-zA-Z0-9]\S+)\s\( | ([-_+a-zA-Z0-9]+\.[-_+a-zA-Z0-9]+\.[-_+a-zA-Z0-9\.]+) ) \b/gx) { $_ = $1; $_ ||= $2; next if /^\d\.\d/; # version numbers next if /\d\.\d\./; # version numbers push (@hits, $_); } my $hitcalibration = 99.0 / ($#hits+1); foreach $_ (@hits) { $host_precedence += int ($hitcalibration); if (/^[\d\.]+$/) { # IPs s/\.\d+$//gs; # turn into a network $hits{$_}++; $desc{$_} = "iprange: $_"; $prec{$_} ||= ''; $prec{$_} .= $host_precedence." "; } else { $_ = lc $_; $hits{$_}++; $desc{$_} = "host: $_"; $prec{$_} ||= ''; $prec{$_} .= $host_precedence." "; s/^.*(\.[^\.]+\.[^\.]+)$/$1/gs; if (/\S/) { $hits{$_}++; $desc{$_} = "netname: $_"; $prec{$_} ||= ''; $prec{$_} .= $host_precedence." "; } } } } sub report { foreach my $host (sort {$hits{$b} <=> $hits{$a}} keys %hits) { my $avgprec = average_precedences ($prec{$host}); printf "%6d %3d: %s\n", $hits{$host}, $avgprec, $desc{$host}; } } sub average_precedences { my $precs = shift; my $avgprec = 0; my $numprec = 0; foreach my $prec (split (' ', $precs)) { $avgprec += $prec; $numprec++; } $avgprec /= $numprec; $avgprec /= 10; }