#!/usr/bin/perl -w # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # my $argcffile = shift @ARGV; my $scoreset = shift @ARGV; $scoreset = 0 if ( !defined $scoreset ); my %freq_spam = (); my %freq_nonspam = (); my $num_spam; my $num_nonspam; my $num_total; my %mutable_tests = (); my %ranking = (); my %soratio = (); my %is_nice = (); if (!defined $argcffile) { $argcffile = "../rules"; } my $tmpf = "tmp/rules$$.pl"; system "../build/parse-rules-for-masses ". "-d \"$argcffile\" -s $scoreset -o $tmpf" and die; require $tmpf; unlink $tmpf; while (<>) { /^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+\S+\s+(.+)\s*$/ or next; my $overall = $1+0; my $spam = $2+0; my $nonspam = $3+0; my $soratio = $4+0; my $ranking = $5+0; my $test = $6; if ($test eq '(all messages)') { $num_spam = $spam; $num_nonspam = $nonspam; $num_total = $spam+$nonspam; next; } next if ($test eq '(all messages as %)'); if (!defined ($rules{$test})) { warn "$test: rule no longer exists; ignoring\n"; next; } $freq{$test} = $overall; $freq_spam{$test} = $spam; $freq_nonspam{$test} = $nonspam; my $tflags = $rules{$test}->{tflags}; $tflags ||= ''; $mutable_tests{$test} = 1; # "userconf" rules, or "net" rules in set 0/2, or "learn" rules # in set 1/3, are nonmutable. if ($tflags =~ /\buserconf\b/) { print "$test: immutable due to 'userconf'\n"; $mutable_tests{$test} = 0; } elsif ( ($scoreset & 1) == 0 && $tflags =~ /\bnet\b/ ) { print "$test: immutable due to 'net'\n"; $mutable_tests{$test} = 0; } elsif ( ($scoreset & 2) == 0 && $tflags =~ /\blearn\b/ ) { print "$test: immutable due to 'learn'\n"; $mutable_tests{$test} = 0; } elsif (!$rules{$test}->{mutable}) { # rules read from the non-mutable section print "$test: immutable according to parse-rules\n"; $mutable_tests{$test} = 0; } elsif ($rules{$test}->{score} == 0) { # this causes trouble, since rewrite-with-new-scores has a tendency # to "simplify" scores down to 0. comment, since real zero-scored rules # that were scored zero when the mass-check ran, will also have no hits # and the 'less than 0.01%' case below takes care of that. # print "$test: immutable since score is 0\n"; # $mutable_tests{$test} = 0; } if ($tflags =~ m/\bnice\b/i) { $is_nice{$test} = 1; } else { $is_nice{$test} = 0; } # less than 0.01% of messages were hit: force these rules to 0.0 if ($overall < 0.01) { print "$test: zeroing rule and marking immutable, due to low hitrate\n"; $mutable_tests{$test} = 0; $soratio{$test} = 0.5; $ranking{$test} = 0.0; $rules{$test}->{score} = 0; # tvd - disable these rules automagically } else { $soratio{$test} = $soratio; $ranking{$test} = $ranking; } } if ( ! mkdir "tmp", 0755 ) { warn "Couldn't create tmp directory!: $!\n"; } open (OUT, ">tmp/ranges.data"); foreach my $test (sort { $ranking{$b} <=> $ranking{$a} } keys %freq) { if (!defined ($rules{$test})) { warn "$test: rule not found! forcing score to 0"; print OUT ("0 0 0 $test\n"); next; } my $overall = $freq{$test}; my $spam = $freq_spam{$test}; my $nonspam = $freq_nonspam{$test}; my $soratio = $soratio{$test}; my $ranking = $ranking{$test}; my $mutable = $mutable_tests{$test}; # non-mutable, or score of 0 -- lock down to current score. if (!$mutable) { printf OUT ("%3.3f %3.3f 0 $test\n", $rules{$test}->{score}, $rules{$test}->{score}); next; } my ($lo, $hi); if ($is_nice{$test}) { $hi = 0; $lo = $ranking{$test} * -4.5; } else { $lo = 0; $hi = $ranking{$test} * 4.5; } printf OUT ("%3.1f %3.1f $mutable $test\n", $lo, $hi); } close OUT; exit;