#!/usr/bin/perl sub usage { die " parse-rules-for-masses: parse the SpamAssassin rules files for mass-checks, evolving, and frequency analysis usage: ./parse-rules-for-masses [-d rulesdir] [-o outputfile] [-s scoreset] rulesdir defaults to ../rules outputfile defaults to ./tmp/rules.pl scoreset default to 0 "; } use Getopt::Long; use Data::Dumper; use vars qw(@rulesdirs $outputfile $scoreset); GetOptions ( "d=s" => \@rulesdirs, "o=s" => \$outputfile, "s=i" => \$scoreset, "help|h|?" => sub { usage(); } ); if ($#rulesdirs < 0) { @rulesdirs = ("../rules"); } if (!defined $outputfile) { $outputfile = "./tmp/rules.pl"; mkdir ("tmp", 0755); } $scoreset = 0 if ( !defined $scoreset ); my $rules = { }; readrules(@rulesdirs); my $scores = { }; foreach my $key (keys %{$rules}) { $scores->{$key} = $rules->{$key}->{score}; } writerules($outputfile); exit; sub readrules { foreach my $indir (@_) { my @files = <$indir/[0-9]*.cf>; my $file; %rulesfound = (); %langs = (); foreach $file (sort @files) { open (IN, "<$file"); while () { s/#.*$//g; s/^\s+//; s/\s+$//; next if /^$/; my $lang = ''; if (s/^lang\s+(\S+)\s+//) { $lang = $1; } if (/^(header|rawbody|body|full|uri|meta)\s+(\S+)\s+/) { my $type = $1; my $name = $2; my $issubrule = '0'; if ($name =~ /^__/) { $issubrule = '1'; } $rules->{$1} ||= { }; $rules->{$name}->{type} = $type; $rules->{$name}->{lang} = $lang; $rules->{$name}->{issubrule} = $issubrule; $rules->{$name}->{tflags} = ''; } elsif (/^describe\s+(\S+)\s+(.+)$/) { $rules->{$1} ||= { }; $rules->{$1}->{describe} = $2; } elsif (/^tflags\s+(\S+)\s+(.+)$/) { $rules->{$1} ||= { }; $rules->{$1}->{tflags} = $2; } elsif (/^score\s+(\S+)\s+(.+)$/) { my($name,$score) = ($1,$2); $rules->{$name} ||= { }; if ( $score =~ /\s/ ) { # there are multiple scores ($score) = (split(/\s+/,$score))[$scoreset]; } $rules->{$name}->{score} = $score; } } close IN; } } foreach my $rule (keys %{$rules}) { if (!defined $rules->{$rule}->{type}) { delete $rules->{$rule}; # no rule definition -> no rule next; } if (!defined $rules->{$rule}->{score}) { my $def = 1.0; if ($rule =~ /^T_/) { $def = 0.01; } if ($rules->{$rule}->{tflags} =~ /nice/) { $rules->{$rule}->{score} = -$def; } else { $rules->{$rule}->{score} = $def; } } } } sub writerules { my $outfile = shift; # quick hack to create the tmp directory system ("mkdir -p $outfile 2>/dev/null ; rmdir $outfile 2>/dev/null"); open (OUT, ">$outfile") or die "cannot write to $outfile"; print OUT "# dumped at ".`date`."\n"; $Data::Dumper::Purity = 1; print OUT Data::Dumper->Dump ([$rules, $scores], ['*rules', '*scores']); print OUT "1;"; close OUT; }