#!/usr/bin/perl -w # options # # -c=file set $cffile # -f=file read list of name folders from # --fork use fork-based model to save RAM (used for mailbox files) # --all don't skip big messages # --mh use mh-style folder (directory of messages named with numbers) # --mn find_missed('nonspam.log') # --ms find_missed('spam.log') # --single folder is a single mail message # --loghits log the text hit for patterns (useful for debugging) # --showdots print a dot for each scanned message # --nokillfile don't use the "QUICK HACK" # # options for maildir and mh-style folders # --head=N only check first N messages # --sort sort contents of folders # --tail=N only check last N messages # # all following arguments are used as the name of mail folders ########################################################################### # QUICK HACK: some of our spamtraps are getting nonspam newsletters. ignore # those mails here... my $killfile_message_ids = qr{(?: <\d\d\d\d\S+\@dev\.lifetimetv\.com> |<\d+\.\S+\@fmail..\.real-net\.net> |<3D53A59C000A4029\@mta3n\.bluewin\.ch> )}x; my $killfile_receiveds = qr{(?: from\s\S+\s\S+\sby\s(?:anclsmtp\d\d|myfamlsmtp\d\d)\.myfamily\.com |for :include:\S+/MapQuest/ )}x; ########################################################################### use vars qw($opt_c $opt_f $opt_fork $opt_all $opt_head $opt_mh $opt_ms $opt_mn $opt_single $opt_sort $opt_tail $opt_showdots $opt_nokillfile); use FindBin; use lib "$FindBin::Bin/lib"; use lib "$FindBin::Bin/../lib"; use Mail::ArchiveIterator; use Mail::SpamAssassin; use Mail::SpamAssassin::NoMailAudit; use Getopt::Long; # flush buffer immediately so if mass-check fails or stops we don't get a corrupt line $|++; GetOptions("c=s", "f=s", "fork", "all", "head=i", "mh", "ms", "mn", "single", "sort", "tail=i", "loghits", "showdots", "nokillfile"); #use Devel::Peek; if ($opt_c) { $cffile = $opt_c; } else { $cffile = "$FindBin::Bin/../rules"; } if ($opt_ms) { find_missed('spam.log'); } elsif ($opt_mn) { find_missed('nonspam.log'); } $spamtest = new Mail::SpamAssassin ({ 'rules_filename' => $cffile, 'userprefs_filename' => "$FindBin::Bin/spamassassin.prefs", 'save_pattern_hits' => $opt_loghits, 'dont_copy_prefs' => 1, 'local_tests_only' => 1 }); $spamtest->compile_now(1); $spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf"); my $iter = new Mail::ArchiveIterator ({ 'opt_fork' => $opt_fork, 'opt_mh' => $opt_mh, 'opt_single' => $opt_single, 'opt_sort' => $opt_sort, 'opt_head' => $opt_head, 'opt_tail' => $opt_tail, 'opt_all' => $opt_all, }); my @targets = @ARGV; if ($opt_f) { open (F, $opt_f) || die $!; push (@targets, map { chomp; $_ } ); close (F); } my $who = `id -un 2>/dev/null`; chop $who; my $where = `uname -n 2>/dev/null`; chop $where; my $when = `date`; chop $when; print "# mass-check results from $who\@$where, on $when\n"; print "# M:SA version ".$spamtest->Version()."\n"; print '# CVS tag: $Name: $',"\n"; $iter->set_function (\&wanted); $iter->run (@targets); print STDERR "\n" if ($opt_showdots); exit; ########################################################################### sub wanted { my ($id, $dataref) = @_; # my $ma = Mail::Audit->new('data' => \@msg); my $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref); unless ($opt_nokillfile) { # killfile for mass-checks $_ = $ma->get_header("Message-Id"); if (defined($_) && /${killfile_message_ids}/os) { logkilled($ma,$id, "unwanted Message-Id"); return; } $_ = join ('', $ma->get_header("Received")); if (defined($_) && /${killfile_receiveds}/os) { logkilled($ma,$id, "unwanted Received chain"); return; } } $ma->{noexit} = 1; # this API doesn't work this way ;) anyway, slowing down mass-check to # do this is not good. instead, remove the markup in advance. #$spamtest->remove_spamassassin_markup($ma); my $status = $spamtest->check ($ma); my $body = $ma->get_body(); if ( defined $body && defined $body->[0] && $body->[0] =~ /^SPAM:/ ) { warn "# contains SpamAssassin markup, results may be incorrect: $id\n"; } my $yorn = $status->is_spam(); my $hits = $status->get_hits(); my $tests = $status->get_names_of_tests_hit(); $tests = join(',', sort(split(/,/, $tests))); $id =~ s/\s/_/g; printf "%s %2d %s %s\n", ($yorn ? 'Y' : '.'), $hits, $id, $tests; if ($tests =~ /MICROSOFT_EXECUTABLE|MIME_SUSPECT_NAME/) { logkilled($ma,$id, "possible virus"); } if ($opt_loghits) { my $out = ''; foreach my $t (sort keys %{$status->{pattern_hits}}) { $_ = $status->{pattern_hits}->{$t}; $_ ||= ''; s/\r/\\r/gs; # fix unprintables s/\n/\\n/gs; $out .= "$t=\"$_\" "; } if ($out ne '') { chop $out; print "# ".$out."\n"; } } $status->finish(); undef $ma; # clean 'em up undef $status; print STDERR '.' if ($opt_showdots); #system ("ps alxww | grep mass-check | grep perl | grep -v grep");#JMD } sub logkilled { my ($ma, $id, $reason) = @_; my $from = $ma->get_header ("From"); my $to = $ma->get_header ("To"); my $subj = $ma->get_header ("Subject"); my $msgid = $ma->get_header ("Message-Id"); $msgid ||= $ma->get_header ("Message-ID"); $msgid ||= '(??)'; chop ($from); chop ($to); chop ($subj); chop ($msgid); print STDERR "\n" if ($opt_showdots); warn "# skipped killfiled message ($reason): from=$from to=$to subj=$subj msgid=$msgid id=$id\n"; } ########################################################################### sub find_missed { my $file = shift; my $threshold = 5; my $shouldbespam = 1; if ($file =~ /nonspam/) { $shouldbespam = 0; } my $scores = readscores(); open (IN, "<$file"); while () { next if /^#/; /^.\s+\d+\s+(\S+)\s*/ or next; my $id = $1; my $score = 0.0; $_ = $'; s/,,+/,/g; s/^\s+//; s/\s+$//; foreach my $tst (split (/,/, $_)) { next if ($tst eq ''); if (!defined $scores->{$tst}) { warn "unknown test in $file, ignored: $tst\n"; next; } $score += $scores->{$tst}; } if ($shouldbespam && $score < $threshold) { found_missed ($score, $id, $_); } elsif (!$shouldbespam && $score > $threshold) { found_missed ($score, $id, $_); } } close IN; } sub readscores { print "Reading scores from \"$cffile\"...\n"; system ("./parse-rules-for-masses -d \"$cffile\"") and die; my %scores; require "./tmp/rules.pl"; return \%scores; } sub found_missed { my $score = shift; my $id = shift; my $tests = shift; print "$score $id $tests\n"; }