#!/usr/bin/perl -w # options # # -c=file set configuration/rules directory # -f=file read list of targets from # -j=jobs specify the number of check jobs to run simultaneously # --mh find_missed('ham.log') # --ms find_missed('spam.log') # --net turn on network checks! # --mid report Message-ID from each message # --bayes report score from Bayesian classifier # --debug report debugging information # --showdots print a dot for each scanned message # # log options # -o write all logs to stdout # --loghits log the text hit for patterns (useful for debugging) # --hamlog=log use as ham log ('ham.log' is default) # --spamlog=log use as spam log ('spam.log' is default) # # message selection options # -n no date sorting or spam/ham interleaving # --all don't skip big messages # --head=N only check first N ham and N spam (N messages if -n used) # --tail=N only check last N ham and N spam (N messages if -n used) # # simple target options (implies -o and no ham/spam classification) # --dir subsequent targets are directories # --file subsequent targets are files in RFC 822 format # --mbox subsequent targets are mbox files # # non-option arguments are used as target names (mail files and folders), # the target format is: :: # is "spam" or "ham" # is "dir", "file", or "mbox" # is a file or directory name ########################################################################### use vars qw($opt_c $opt_f $opt_j $opt_n $opt_o $opt_all $opt_bayes $opt_debug $opt_format $opt_hamlog $opt_head $opt_loghits $opt_mid $opt_mh $opt_ms $opt_net $opt_nosort $opt_showdots $opt_spamlog $opt_tail); use FindBin; use lib "$FindBin::Bin/../lib"; eval "use bytes"; use Mail::SpamAssassin::ArchiveIterator; use Mail::SpamAssassin; use Mail::SpamAssassin::NoMailAudit; use Getopt::Long; # default settings $opt_c = "$FindBin::Bin/../rules"; $opt_j = 1; $opt_net = 0; $opt_hamlog = "ham.log"; $opt_spamlog = "spam.log"; GetOptions("c=s", "f=s", "j=i", "n", "o", "all", "bayes", "debug", "hamlog=s", "head=i", "loghits", "mh", "mid", "ms", "net", "showdots", "spamlog=s", "tail=i", "dir" => sub { $opt_format = "dir"; }, "file" => sub { $opt_format = "file"; }, "mbox" => sub { $opt_format = "mbox"; }, '<>' => \&target); if ($opt_f) { open (F, $opt_f) || die $!; push (@targets, map { chomp; $_ } ); close (F); } if ($opt_ms) { find_missed($opt_spamlog); } elsif ($opt_mh) { find_missed($opt_hamlog); } $spamtest = new Mail::SpamAssassin ({ 'debug' => $opt_debug, 'rules_filename' => $opt_c, 'userprefs_filename' => "$FindBin::Bin/spamassassin/user_prefs", 'userstate_dir' => "$FindBin::Bin/spamassassin", 'save_pattern_hits' => $opt_loghits, 'dont_copy_prefs' => 1, 'local_tests_only' => $opt_net ? 0 : 1, PREFIX => '', DEF_RULES_DIR => $opt_c, LOCAL_RULES_DIR => '', }); $spamtest->compile_now(1); $spamtest->read_scoreonly_config("$FindBin::Bin/mass-check.cf"); my $who = `id -un 2>/dev/null`; chop $who; my $where = `uname -n 2>/dev/null`; chop $where; my $when = `date -u`; chomp $when; my $log_header = "# mass-check results from $who\@$where, on $when\n" . "# M:SA version ".$spamtest->Version()."\n" . '# CVS tag: $Name: $' . "\n"; my $host = $ENV{'HOSTNAME'} || $ENV{'HOST'} || `hostname` || 'localhost'; chomp $host; if ($opt_o) { autoflush STDOUT 1; print STDOUT $log_header; } else { open(HAM, "> $opt_hamlog"); open(SPAM, "> $opt_spamlog"); autoflush HAM 1; autoflush SPAM 1; print HAM $log_header; print SPAM $log_header; } my $iter = new Mail::SpamAssassin::ArchiveIterator({ 'opt_j' => $opt_j, 'opt_n' => $opt_n, 'opt_all' => $opt_all, 'opt_head' => $opt_head, 'opt_tail' => $opt_tail, }); $iter->set_functions(\&wanted, \&result); $iter->run(@targets); print STDERR "\n" if ($opt_showdots); exit; ########################################################################### sub target { my ($target) = @_; if (!defined($opt_format)) { push (@targets, $target); } else { $opt_o = 1; push (@targets, "spam:$opt_format:$target"); } } ########################################################################### sub result { my ($class, $result) = @_; if ($opt_o) { print STDOUT $result; } elsif ($class eq "s") { print SPAM $result; } elsif ($class eq "h") { print HAM $result; } } sub wanted { my ($id, $time, $dataref) = @_; my $out; my $ma = Mail::SpamAssassin::NoMailAudit->new('data' => $dataref); $ma->{noexit} = 1; # remove SpamAssassin markup, if present and the mail was spam $_ = $ma->get ("X-Spam-Status"); if (defined($_) && /^Yes, hits=/) { my $newtext = $spamtest->remove_spamassassin_markup($ma); my @newtext = split (/^/m, $newtext); $dataref = \@newtext; $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref); } my $status = $spamtest->check($ma); my @extra; if (defined($time)) { push(@extra, "time=$time"); } if ($opt_bayes && defined $status->{bayes_score}) { push(@extra, "bayes=" . $status->{bayes_score}); } if ($opt_mid) { my $mid = $ma->get_header("Message-Id") || "<$id\@$host.masses.spamassasin.org>"; $mid =~ s/^\s*//s; $mid =~ s/\s*$//s; $mid =~ s/[\s,]/./gs; push(@extra, "mid=$mid"); } my $yorn = $status->is_spam() ? 'Y' : '.'; my $hits = $status->get_hits(); my $tests = join(",", sort(grep(length,$status->get_names_of_tests_hit(),$status->get_names_of_subtests_hit()))); my $extra = join(",", @extra); # print STDERR $status->get_full_message_as_text(); $id =~ s/\s/_/g; $out .= sprintf("%s %2d %s %s %s\n", $yorn, $hits, $id, $tests, $extra); if ($tests =~ /MICROSOFT_EXECUTABLE|MIME_SUSPECT_NAME/) { $out .= logkilled($ma, $id, "possible virus"); } if ($opt_loghits) { my $log = ''; foreach my $t (sort keys %{$status->{pattern_hits}}) { $_ = $status->{pattern_hits}->{$t}; $_ ||= ''; s/\r/\\r/gs; # fix unprintables s/\n/\\n/gs; $log .= "$t=\"$_\" "; } if ($log) { chomp $log; $out .= "# $log\n"; } } $status->finish(); undef $ma; # clean 'em up undef $status; print STDERR '.' if ($opt_showdots); return $out; } sub logkilled { my ($ma, $id, $reason) = @_; my $from = $ma->get_header("From") || 'undef'; my $to = $ma->get_header("To") || 'undef'; my $subj = $ma->get_header("Subject") || 'undef'; my $mid = $ma->get_header("Message-Id") || 'undef'; chomp ($from); chomp ($to); chomp ($subj); chomp ($mid); return "# skipped killfiled message ($reason): from=$from to=$to subj=$subj mid=$mid id=$id\n"; } ########################################################################### sub find_missed { my $file = shift; my $threshold = 5; my $shouldbespam = 1; if ($file =~ /ham/) { $shouldbespam = 0; } my $scores = readscores(); open (IN, "<$file"); while () { next if /^#/; /^.\s+\d+\s+(\S+)\s*/ or next; my $id = $1; my $score = 0.0; $_ = $'; s/bayes=\S+//; s/,,+/,/g; s/^\s+//; s/\s+$//; foreach my $tst (split (/,/, $_)) { next if ($tst eq ''); if (!defined $scores->{$tst}) { warn "unknown test in $file, ignored: $tst\n"; next; } $score += $scores->{$tst}; } if ($shouldbespam && $score < $threshold) { found_missed($score, $id, $_); } elsif (!$shouldbespam && $score > $threshold) { found_missed($score, $id, $_); } } close IN; } sub readscores { warn "Reading scores from \"$opt_c\"...\n"; system("./parse-rules-for-masses -d \"$opt_c\"") and die; use vars qw(%scores); require "./tmp/rules.pl"; return \%scores; } sub found_missed { my $score = shift; my $id = shift; my $tests = shift; print "$score $id $tests\n"; }