#!/usr/bin/perl -w # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; sub usage { die " parse-rules-for-masses: parse the SpamAssassin rules files for mass-checks, evolving, and frequency analysis usage: ./parse-rules-for-masses [-d rulesdir] [-o outputfile] [-s scoreset] [-x] rulesdir defaults to ../rules outputfile defaults to ./tmp/rules.pl scoreset default to 0 -x do not include test rules files (ie 70_*) "; } use Getopt::Long; use Data::Dumper; use vars qw(@rulesdirs $outputfile $scoreset $skip_test_rules); GetOptions ( "d=s" => \@rulesdirs, "o=s" => \$outputfile, "s=i" => \$scoreset, "x" => \$skip_test_rules, "help|h|?" => sub { usage(); } ); if ($#rulesdirs < 0) { @rulesdirs = ("../rules"); } if (!defined $outputfile) { $outputfile = "./tmp/rules.pl"; mkdir ("tmp", 0755); } $scoreset = 0 if ( !defined $scoreset ); my $rules = { }; my $reuse = { }; $rules->{_scoreset} = $scoreset; readrules(@rulesdirs); my $scores = { }; foreach my $key (keys %{$rules}) { next if $key eq '_scoreset'; $scores->{$key} = $rules->{$key}->{score}; } writerules($outputfile); exit; sub readrules { foreach my $indir (@_) { my @files = <$indir/*.cf>; my $file; my $scores_mutable = 1; my %rulesfound = (); my %langs = (); foreach $file (sort @files) { $scores_mutable = 1; # always start off mutable in each file if ($skip_test_rules) { next if ($file =~ /70_/); } open (IN, "<$file"); while () { # these appear in comments, so deal with them before comment stripping # takes place if (/<\/gen:mutable>/i) { $scores_mutable = 0; } elsif (//i) { $scores_mutable = 1; } # oh, this is a dirty dirty hack, but we don't need this at runtime s/^#reuse/reuse/; s/#.*$//g; s/^\s+//; s/\s+$//; next if /^$/; # TODO: this could be overwriting stuff my $lang = ''; if (s/^lang\s+(\S+)\s+//) { $lang = $1; } if (/^(header|rawbody|body|full|uri|meta|mimeheader)\s+(\S+)\s+(.*)$/) { my $type = $1; my $name = $2; my $val = $3; $rules->{$name} ||= { }; $rules->{$name}->{type} = $type; $rules->{$name}->{lang} = $lang; $rules->{$name}->{issubrule} = ($name =~ /^__/) ? '1' : '0'; $rules->{$name}->{tflags} = ''; $rules->{$name}->{eval} = ($val =~ /\beval:(\w+)/) ? $1 : '0'; if ($type eq "meta") { my @depends = grep { !/^\d+$/ } ($val =~ m/(\w+)/g); push(@{ $rules->{$name}->{depends} }, @depends); } $rules->{$name}->{code} = $val; } elsif (/^describe\s+(\S+)\s+(.+)$/) { $rules->{$1} ||= { }; if ($lang) { $rules->{$1}->{describe} ||= $2; } else { $rules->{$1}->{describe} = $2; } } elsif (/^tflags\s+(\S+)\s+(.+)$/) { $rules->{$1} ||= { }; $rules->{$1}->{tflags} = $2; } elsif (/^score\s+(\S+)\s+(.+)$/) { my($name,$score) = ($1,$2); $rules->{$name} ||= { }; if ( $score =~ /\s/ ) { # there are multiple scores ($score) = (split(/\s+/,$score))[$scoreset]; } $rules->{$name}->{score} = $score; $rules->{$name}->{mutable} = $scores_mutable; } elsif (/^reuse\s+(.*)$/) { my ($new, @old) = split(' ', $1); push @old, $new; for my $old (@old) { $reuse->{$old} ||= { }; $reuse->{$old}->{reuse} = $new; } $reuse->{$new} ||= { }; $reuse->{$new}->{skip} = 1; } } close IN; } } foreach my $rule (keys %{$rules}) { next if ($rule eq '_scoreset'); if (!defined $rules->{$rule}->{type}) { delete $rules->{$rule}; # no rule definition -> no rule next; } my $tflags = $rules->{$rule}->{tflags}; if (!defined $rules->{$rule}->{score}) { my $def = 1.0; if ($rule =~ /^T_/) { $def = 0.01; } if ($tflags =~ /\bnice\b/) { $rules->{$rule}->{score} = -$def; } else { $rules->{$rule}->{score} = $def; } } # ignore net rules in set 0 or set 2 if ($tflags =~ /\bnet\b/ && ($scoreset & 1) == 0) { $rules->{$rule}->{mutable} = 0; $rules->{$rule}->{score} = 0; } # ignore bayes rules in set 0 or set 2 if ($tflags =~ /\blearn\b/ && ($scoreset & 2) == 0) { $rules->{$rule}->{mutable} = 0; $rules->{$rule}->{score} = 0; } # if a rule didn't have a score specified, assume it's # mutable if (!defined $rules->{$rule}->{mutable}) { $rules->{$rule}->{mutable} = 1; } # although T_ test rules are clamped to 0.01. this works well # for release mass-checks, at least if ($rule =~ /^T_/) { $rules->{$rule}->{mutable} = 0; } elsif ($rule eq 'AWL') { # ignore entirely $rules->{$rule}->{mutable} = 0; $rules->{$rule}->{score} = 0; } } } sub writerules { my $outfile = shift; # quick hack to create the tmp directory system ("mkdir -p $outfile 2>/dev/null ; rmdir $outfile 2>/dev/null"); open (OUT, ">$outfile") or die "cannot write to $outfile"; print OUT "# dumped at ".`date`."\n"; $Data::Dumper::Purity = 1; print OUT Data::Dumper->Dump ([$rules, $scores, $reuse], ['*rules', '*scores', '*reuse']); print OUT "1;"; close OUT; }