#!/usr/bin/perl use strict; use warnings; use File::Temp (); use LWP::Simple; use URI::Escape; use Data::Dumper; my $FROM_CACHE; if (@ARGV && $ARGV[0] eq '--fromcache') { $FROM_CACHE = 1; } my $MAKE_CACHE; $MAKE_CACHE = 1; # turn this on by default, no harm # we allow promotion of rules that are "ifplugin" one of these my @def_plugins = map { s,^lib/Mail/SpamAssassin/Plugin/(\S+)\.pm$,Mail::SpamAssassin::Plugin::$1,gs; $_; } ; my $PROMOTABLE_PLUGINS_RE = "^" . join("|", @def_plugins) . "\$"; # number of days to look back; if a rule isn't listed as promotable on # all of these days, it won't be listed. (we grant an exception for # new rules that didn't exist on previous days, however, so new rules # can be published quickly to handle sudden outbreaks without requiring # manual update work) my @DAYS_REQUIRED = (1, 2, 3); ########################################################################### my $cgi_url = "http://ruleqa.spamassassin.org/"; my @doc = (); my $cache = 'ruleqa.cache.'; my $submitters = ''; my $url; # tracks the last day used my $dayoffset = 0; foreach my $day (@DAYS_REQUIRED) { if (!$FROM_CACHE || !-f $cache.$day || (-M $cache.$day) > 0.5) { with_new_offset: $url = $cgi_url.($day+$dayoffset)."-days-ago?xml=1"; warn "HTTP get: $url\n"; $doc[$day] = get ($url); if (!$doc[$day]) { die "HTTP get failed: $doc[$day]\n"; } if ($MAKE_CACHE) { open(O, ">$cache$day"); print O $doc[$day]; close O; } } else { open(I, "<$cache$day") or die; $doc[$day] = join('',); close I; } ########################################################################### # the HTML looks like: # # # ... # ....... # ... # # # in other words, the machine-parseable metadata is embedded in the HTML # as a microformat. if ($doc[$day] =~ m{ \s*(.*?)\s* }sx) { my $daysubs = $1; # ignore days when the mass-check sets contain a --net log, since # it's the weekly --net run. That generally contains a much # smaller set of logs (since it takes longer to run mass-check --net) # so the results are untrustworthy. if ($daysubs =~ /(?:^|\s)net-/) { warn "day $day contains a --net mass-check! offsetting by an extra day\n"; $dayoffset++; goto with_new_offset; } ($submitters ne '') and $submitters .= "; "; $submitters .= "day $day: $daysubs"; } else { loghtml_die("no 'mcviewing', 'mcsubmitters' microformats on day $day"); } } ########################################################################### # __HIGHBITS0 # 8.76540.20560.977 # ruleqa%3Fdaterev%3Dlast-night%26rule%3D__HIGHBITS%26s_detail%3D1 my $plist; foreach my $day (@DAYS_REQUIRED) { while ($doc[$day] =~ m!(.*?)!xg) { my $xml = $1; my $obj = { }; while ($xml =~ s!<([A-Za-z0-9_]+)>(.*?)!!) { $obj->{$1} = $2; } while ($xml =~ s!<([A-Za-z0-9_]+)\s+esc=["']1["']>(.*?)!!) { $obj->{$1} = uri_unescape($2); } my $name = $obj->{test}; $obj->{detailhref} = $cgi_url.$obj->{detailhref}; $plist->[$day]->{$name} = $obj; } if (!scalar keys %{$plist->[$day]}) { loghtml_die("no rules found? on day $day"); } } ########################################################################### ## my $dump = Data::Dumper->Dump([$plist], ['promolist']); print $dump; # use SpamAssassin classes directly, so we can lint rules # as we go use lib 'lib'; use Mail::SpamAssassin; my $mailsa = Mail::SpamAssassin->new({ rules_filename => "rules", site_rules_filename => join("\000", qw( rulesrc/core rulesrc/sandbox )), local_tests_only => 1, dont_copy_prefs => 1, config_tree_recurse => 1, keep_config_parsing_metadata => 1, # debug => 1, }); # hack hack hack!! we don't want to load plugin files twice, # and since the mkrules compiler copies from rulesrc/sandbox/*/*.pm # to rules/*.pm, they would otherwise appear twice. foreach my $fname () { my $path = File::Spec->rel2abs($fname); $INC{$path} = 1; # warn "JMD $path"; } my %rules_with_errors = (); $mailsa->{lint_callback} = sub { my %opts = @_; # ignore non-rule-issue lint failures return if ($opts{msg} =~ /(?: score\sset\sfor\snon-existent| description\sexists )/x); warn "demoting $opts{rule}: $opts{msg}"; if ($opts{iserror}) { $rules_with_errors{$opts{rule}}++; } }; $mailsa->lint_rules(); print "# active ruleset list, automatically generated from $cgi_url\n"; print "# with results from: $submitters\n"; my @spcs = ($submitters =~ /\s+/g); if (scalar @spcs < 2) { die "not generating results; less than 3 submitter results available!\n"; } # base most of our decisions off day 1 (last night's mass-checks) foreach my $plistkey (sort keys %{$plist->[1]}) { my $name = $plistkey; my $plistobj = $plist->[1]->{$plistkey}; my $notes = ''; # rules in sandboxes without a T_ prefix, will be renamed during the # ruleqa process... in other words, the output freqs line will talk # about rule "T_FOO". if there's a rule "FOO" defined, assume that's # the one being talked about. my $no_t = $name; if ($no_t =~ s/^T_//) { if (defined $mailsa->{conf}->{scores}->{$no_t}) { $name = $no_t; } } # now that it's ok to have sandbox rules without a T_ prefix, # "T_" prefix implies "tflags nopublish" next if ($name =~ /^T_/); # ignore rules that don't exist (if they have a desc or score, # they exist according to the Conf parser) next unless ($mailsa->{conf}->{descriptions}->{$name} || $mailsa->{conf}->{scores}->{$name}); # "nopublish" tflags my $tfs = $mailsa->{conf}->{tflags}->{$name}; if ($tfs) { next if ($tfs =~ /\bnopublish\b/); if ($tfs =~ /\b(publish)\b/) { $notes = "tflags ".$1; goto publish; } } # rule was from a file marked with "#testrules" (bug 5545) # note: this is after "tflags publish" support, so you can override # it on a rule-by-rule basis anyway next if ($mailsa->{conf}->{testrules}->{$name}); # bug 6560, unless specifically declared #testrules; # all of these tflags force publication; # include "net", since otherwise this script has to be aware # what day of the week it is for weekly net/non-net mass-checks! # very messy. TODO? if ($tfs && $tfs =~ /\b(userconf|learn|net)\b/) { $notes = "tflags ".$1; goto publish; } # ignore rules that are not marked as promotable in ANY of the days. # ie a rule must be promotable in all 3, to be listed. (Also allow # rules that weren't in existence in the earlier mass-checks.) next unless ($plist->[1]->{$plistkey}->{promo}); next unless ($plist->[2]->{$plistkey}->{promo} or !defined($plist->[2]->{$plistkey}->{promo})); next unless ($plist->[3]->{$plistkey}->{promo} or !defined($plist->[3]->{$plistkey}->{promo})); # only rules from "rulesrc" dirs my $src = $mailsa->{conf}->{source_file}->{$name}; next if (!$src || $src !~ /rulesrc/); # rules that fail lint next if $rules_with_errors{$name}; # that require a plugin we won't have my $skip = 0; my $ifs = $mailsa->{conf}->{if_stack}->{$name}; while ($ifs && $ifs =~ /plugin\s*\((.+?)\)/g) { my $pkg = $1; # grep out the ones we *do* have, and do use in "ifplugin" # lines in "rulesrc", here... next; #JMD: next if ($pkg =~ /${PROMOTABLE_PLUGINS_RE}/o); print "\n# not publishing $name: needs $ifs\n"; $skip++; } next if $skip; # don't output the ever-changing bits of data # $notes = "spam=$plistobj->{spc} ham=$plistobj->{hpc} so=$plistobj->{so}"; $notes = "good enough"; publish: print "\n# $notes\n$name\n"; } exit; sub loghtml_die { die "$_[0]\nURL: $url\n"; }