#!/usr/bin/perl -w # # build/mkrules -- compile the SpamAssassin rules into installable form # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # This is an implementation of # http://wiki.apache.org/spamassassin/RulesProjPromotion sub usage { die "build/mkrules [--src srcdir] [--exit_on_no_src] [--out outputdir]\n"; } my $RULE_DEFINE_KEYWORDS_RE = qr{ header|rawbody|body|full|uri |meta|mimeheader|urirhssub|uridnsbl }x; my $RULE_KEYWORDS_RE = qr{ ${RULE_DEFINE_KEYWORDS_RE}| describe|tflags|reuse|score }x; use strict; use File::Find; use File::Copy; use File::Basename; use Getopt::Long; # use SpamAssassin classes directly, so we can lint rules # as we go use lib 'lib'; use Mail::SpamAssassin; use vars qw( @opt_srcs $opt_out $opt_sandboxout $opt_manifest $opt_manifestskip $opt_listpromotable $opt_active $opt_activeout $default_file_header $opt_rulemetadata $opt_exit_on_no_src ); GetOptions("src=s" => \@opt_srcs, "out=s", "sandboxout=s", "activeout=s", "active=s", "manifest=s", "manifestskip=s", "rulemetadata=s", "exit_on_no_src", ); if (!@opt_srcs) { foreach ( 'rulescode', 'rulesrc' ) { if (-d $_) { # print "using default src $_\n"; push(@opt_srcs, $_); } } } if (!$opt_manifest && -f "MANIFEST") { $opt_manifest = "MANIFEST"; } if (!$opt_manifestskip && -f "MANIFEST.SKIP") { $opt_manifestskip = "MANIFEST.SKIP"; } if (!$opt_active && -f "rules/active.list") { $opt_active = "rules/active.list"; } if ($opt_exit_on_no_src) { my $foundone = 0; foreach my $src (@opt_srcs) { if (-d $src) { $foundone++; last; } } if (!$foundone) { print "no source directory found: exiting\n"; exit 0; } } # else die "no src" unless (@opt_srcs >= 1); my $promolist; die "no out" unless ($opt_out); die "unreadable out" unless (-d $opt_out); die "unreadable active" unless (-f $opt_active); $opt_sandboxout ||= "70_sandbox.cf"; $opt_activeout ||= "72_active.cf"; # source files that need compilation, and their targets my $needs_compile = { }; my $found_output = { }; my $current_src; my $newest_src_mtime = 0; my $newest_out_mtime = 0; my $default_file_header = join('', ); compile_utf8ify_function(); foreach my $src (@opt_srcs) { if (!-d $src) { warn "WARNING: unreadable src '$src'\n"; next; } $current_src = $src; File::Find::find ({ wanted => \&src_wanted, no_chdir => 1 }, $src); } # get mtimes of output files; we can be sure that all # output is under the "opt_out" dir, so recurse there File::Find::find ({ wanted => \&out_wanted, no_chdir => 1 }, $opt_out); # we must rebuild if a compiled .pm is missing, too my $found_all_pm_files = 1; foreach my $f (keys %{$needs_compile}) { next unless ($f =~ /\.pm$/i); if (!exists $found_output->{basename $f}) { $found_all_pm_files = 0; } } # check mtime on the active.list file, too { my @st = stat $opt_active; if ($st[9] && $st[9] > $newest_src_mtime) { $newest_src_mtime = $st[9]; } } # check mtimes, and also require that the two required output files # really do exist if ($newest_src_mtime && $newest_out_mtime && $newest_src_mtime < $newest_out_mtime && -f $opt_out.'/'.$opt_sandboxout && -f $opt_out.'/'.$opt_activeout && $found_all_pm_files) { print "mkrules: no rules updated\n"; exit 0; } my $rules = { }; my $file_manifest = { }; my $file_manifest_skip = [ ]; if ($opt_manifest) { read_manifest($opt_manifest); } if ($opt_manifestskip) { read_manifest_skip($opt_manifestskip); } my $active_rules = { }; read_active($opt_active); # context for the rules compiler my $seen_rules = { }; my $renamed_rules = { }; my $output_files = { }; my $output_file_text = { }; my $files_to_lint = { }; # $COMMENTS is a "catch-all" "name", for lines that appear after the last line # that refers to a rule by name. Those lines are not published by themselves; # they'll be published to all pubfiles found in the file. # # It's assumed they are comments, because they generally are, but could be all # sorts of unparseable lines. my $COMMENTS = '!comments!'; # another "fake name" for lines that should always be published. They'll # be published to the non-sandbox file. my $ALWAYS_PUBLISH = '!always_publish!'; read_all_rules($needs_compile); read_rules_from_output_dir(); compile_output_files(); lint_output_files(); write_output_files(); exit; # --------------------------------------------------------------------------- sub lint_output_files { foreach my $file (keys %{$files_to_lint}) { my $text = join("\n", "file start $file", $output_file_text->{$file}, "file end $file"); if (lint_rule_text($text) != 0) { warn "\nERROR: LINT FAILED, suppressing output: $file\n\n"; # don't suppress entirely, otherwise 'make distcheck'/'disttest' # will fail since the MANIFEST-listed output files will be # empty. # delete $output_file_text->{$file}; $output_file_text->{$file} = ''; } } } sub lint_rule_text { my ($text) = @_; # ensure we turn off slow/optional stuff for linting! my $pretext = q{ loadplugin Mail::SpamAssassin::Plugin::Check use_bayes 0 }; my $mailsa = Mail::SpamAssassin->new({ rules_filename => "./rules", # debug => 1, local_tests_only => 1, dont_copy_prefs => 1, config_text => $pretext.$text }); my $errors = 0; $mailsa->{lint_callback} = sub { my %opts = @_; return if ($opts{msg} =~ / (?:score\sset\sfor\snon-existent|description\sexists) /x); warn "lint: $opts{msg}"; if ($opts{iserror}) { $errors++; } }; $mailsa->lint_rules(); $mailsa->finish(); return $errors; # 0 means good } sub src_wanted { my $path = $File::Find::name; # record stat times of directories, too, to catch file additions/removals # in the source tree my @st = stat $path; if ($st[9] && $st[9] > $newest_src_mtime) { $newest_src_mtime = $st[9]; } # only files from now on, though return if (!-f $path); # limit what will be copied from sandboxes return if ($path =~ /\bsandbox\b/ && !/(?:\d.*\.cf|\.pm)$/i); # don't use generated scores; they can be out of sync with what is currently # in the sandboxes or the most current active.list file at any given time return if ($path =~ /\bscores\b/); # a bit of sanity please - no svn metadata ;) return if ($path =~ /\.svn/); my $dir = $path; $dir =~ s/^${current_src}[\/\\\:]//s; $dir =~ s/([^\/\\\:]+)$//; my $filename = $1; my $f = "$current_src/$dir$filename"; my $t; $t = "$opt_out/$filename"; $needs_compile->{$f} = { f => $f, t => $t, dir => $dir, filename => $filename }; } sub out_wanted { my $path = $File::Find::name; return unless (-f $path); return if ($path =~ /\.svn/); return unless ($path =~ /\.(?:cf|pm)$/i); my @st = stat $path; if ($st[9] && $st[9] > $newest_out_mtime) { $newest_out_mtime = $st[9]; } my $dir = $path; $dir =~ s/^${current_src}[\/\\\:]//s; $dir =~ s/([^\/\\\:]+)$//; my $filename = $1; if ($path =~ /\.pm$/i) { $found_output->{$filename} = 1; } } # compile all the source files found by the src_wanted() sub, in sorted # order so that the order of precedence makes sense. sub read_all_rules { my ($sources) = @_; # deal with the perl modules first, so that later linting w/ loadplugin will # work appropriately. foreach my $f (sort { my ($ae) = $a =~ /\.(cf|pm)$/; my ($be) = $b =~ /\.(cf|pm)$/; return $be cmp $ae || $a cmp $b; } keys %$sources) { my $entry = $needs_compile->{$f}; my $t = $entry->{t}; # TODO: dependency checking optimization? ## my $needs_rebuild = 0; ## if (!-f $t || -M $t > -M $f) { ## # the source file is newer, or dest is not there ## $needs_rebuild = 1; ## } my $needs_rebuild = 1; if ($entry->{filename} =~ /\.pm$/) { plugin_file_compile($entry); } elsif ($entry->{dir} =~ /sandbox/) { rule_file_compile($f, $t, $entry->{filename}, { issandbox => 1 }); } elsif ($entry->{dir} =~ /scores/) { rule_file_compile($f, $t, $entry->{filename}, { issandbox => 1, isscores => 1 }); } elsif ($entry->{dir} =~ /extra/) { # 'extra' rulesets; not built by default (TODO) next; } else { # rules in "core" and "lang" are always copied if ($needs_rebuild) { rule_file_compile($f, $t, $entry->{filename}, { }); } } } } ########################################################################### # Rules are compiled from source dir to output dir. # # Rules in "rules/active.list" are promoted to "72_active.cf"; rules not # listed there are relegated to "70_sandbox.cf". There is code to allow # other filenames to be selected from the rulesrc .cf file, but I'm not # sure if it works anymore ;) # # Rules will be autorenamed, if there's a collision between a new rule name and # one that's already been output by the compiler in another source file. The # autorenaming is very simple -- portions of the current source path are # appended to the rule name, sanitised. sub rule_file_compile { my ($f, $t, $filename, $flags) = @_; my $issandbox = $flags->{issandbox}; my $isscores = $flags->{isscores}; open (IN, "<$f") or die "cannot read $f"; # a fast parser for the config file format; don't need the # full deal here, and it must be fast, since it's run on every # "make" invocation my $rule_order = [ ]; my $lastrule = $COMMENTS; if (!defined $rules->{$ALWAYS_PUBLISH}) { $rules->{$ALWAYS_PUBLISH} = rule_entry_create(); } # an "ifplugin" or "if" scope my $current_conditional; my $current_comments = ''; while () { my $orig = $_; s/^#reuse/reuse/; # TODO - dirty hack. we need to fix this to just be # a keyword which the engine ignores, this is absurd! s/#.*$//g; s/^\s+//; s/\s+$//; # drop comments/blank lines from output next if (/^$/); # save "lang" declarations my $lang = ''; if (s/^lang\s+(\S+)\s+//) { $lang = $1; } if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) { # rule definitions my $type = $1; my $name = $2; my $val = $3; my $origname = $name; if ($issandbox) { $name = sandbox_rule_name_avoid_collisions($name, $f); } # non-sandbox rules always use the same name # comment "score" lines for sandbox rules (bug 5558) # use generated scores, though, if the rule is active if ($type eq 'score' && $issandbox && !($isscores && $active_rules->{$name})) { $orig =~ s/^/#/g; } if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } $rules->{$name}->{issandbox} = $issandbox; $rules->{$name}->{isscores} = $isscores; $rules->{$name}->{origname} = $origname; $rules->{$name}->{cond} ||= $current_conditional; $rules->{$name}->{text} .= $current_comments . $orig; # note if the conditional is a plugin reference, as we need to # ensure that "loadplugin" lines stay in the same place if ($current_conditional && $current_conditional =~ /ifplugin\s+(\S+)/) { $rules->{$name}->{ifplugin} = $1; } # note if we found the rule defn or not. if we did not, # that means the rule was a code-tied rule, which should always # have its descriptions/scores/etc. published in "active". if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) { $rules->{$name}->{found_definition} = 1; $rules->{$name}->{srcfile} = $f; $rules->{$name}->{code} = $orig; } elsif ($type eq 'tflags') { # userconf rules are always published in "active" if ($val =~ /\buserconf\b/) { $rules->{$name}->{forceactive} = 1; } # record for rulemetadata code $val =~ s/\s+/ /gs; if ($rules->{$name}->{tflags}) { $rules->{$name}->{tflags} .= ' '.$val; } else { $rules->{$name}->{tflags} = $val; } } $current_comments = ''; $lastrule = $name; push (@$rule_order, $name); } elsif (/^ (pubfile|publish) \s+(\S+)\s*(.*?)$ /x) { # preprocessor directives my $command = $1; my $name = $2; my $val = $3; my $origname = $name; # note: if we call sandbox_rule_name_avoid_collisions(), it'll # rename to 'T_RULENAME' -- which is exactly what we're trying # to avoid in 'publish RULENAME' lines! so don't call it here. # if ($issandbox) { # $name = sandbox_rule_name_avoid_collisions($name, $f); # } if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } $rules->{$name}->{origname} = $origname; if ($command eq 'publish') { # the 'publish' command defaults to "1", unless it explicitly # is set to "0". iow: publish RULE_NAME [(0 | 1)] [default: 1] if (!defined $val || $val eq '') { $val = '1'; } } elsif ($command eq 'pubfile') { if (!filename_in_manifest($opt_out.'/'.$val)) { warn "$val: WARNING: not listed in manifest file, using default\n"; next; # don't set 'pubfile' below } } $rules->{$name}->{$command} = $val; # if we see "publish NAMEOFRULE", that means the rule is # considered active if ($rules->{$name}->{publish}) { $rules->{$name}->{forceactive} = 1; } } elsif (/^ (if|ifplugin) \s+(.*?)$ /x) { if ($current_conditional) { $current_conditional .= "#|#".$orig; } else { $current_conditional = $orig; } } elsif (/^endif\b/x) { undef $current_conditional; } elsif (/^require_version\s*(\S+)\b/) { # silently ignored. TODO? (meh) } elsif (/^loadplugin\s*(\S+)\b/) { my $name = 'loadplugin_'.$1; unless ($rules->{$name}) { $rules->{$name} = rule_entry_create(); $rules->{$name}->{issandbox} = $issandbox; $rules->{$name}->{iscommand} = 1; } if (/^loadplugin\s*\S+\s+(\S+)/) { my $fname = $1; my $fpath = dirname($f)."/".$fname; if (!-f $fpath) { warn "$f: WARNING: plugin code file '$fpath' not found, line ignored: $orig"; next; } if ($fpath =~ /sandbox/i) { # Since this is a sandbox plugin, force its output to the sandbox area. $rules->{$name}->{sandbox_plugin} = 1; } # If a 'loadplugin' line is found, and the plugin .pm is not listed in # the MANIFEST file, this will mean that the .pm will not be copied # during "make dist". This causes failures during "make disttest", # since the file does not exist. # # However, we do want to preserve these lines in the 'rules' dir, for # use during development -- without requiring that the .pm's be put # into MANIFEST -- ie. before the plugin is considered release-ready, # ie. sandbox plugins. # # fix: make it a "tryplugin" line instead; these are ignored if the # target file is nonexistent. if (!filename_in_manifest($opt_out.'/'.$fname)) { warn "$f: WARNING: '$opt_out/$fname' not listed in manifest file, making 'tryplugin': $orig"; $orig =~ s/^\s*loadplugin\b/tryplugin/; } } $rules->{$name}->{text} .= $orig; unshift (@$rule_order, $name); } else { # an unhandled configuration line; "redirector_pattern", # "report", something like that. This should be sent to # the active.cf output (or sandbox if it appeared in a sandbox # input file). # use the line itself as a key my $name = $_; /^\s*(\S+)/ and $name = $1; $name =~ s/\s+/ /gs; my $forceactive = 1; # always send 'test' lines to the sandbox files if (/^test\s*/) { $forceactive = 0; $name = $_; # ensure we don't drag rules with us though! $name =~ s/\s+/ /gs; } my $cond; if ($current_conditional) { $name = $current_conditional; $name =~ s/\#\|\#/ /gs; $name =~ s/\s+/ /gs; $cond = $current_conditional; } if ($issandbox) { $name .= "_sandbox"; } unless ($rules->{$name}) { $rules->{$name} = rule_entry_create(); } $rules->{$name}->{cond} ||= $cond; $rules->{$name}->{issandbox} = $issandbox; $rules->{$name}->{forceactive} = $forceactive; # $rules->{$name}->{forceactive} = 1; $rules->{$name}->{iscommand} = 1; # warn "unknown line in rules file '$f', saving to default: $orig"; $rules->{$name}->{text} .= $orig; unshift (@$rule_order, $name); } } close IN; if ($current_comments) { $rules->{$COMMENTS}->{text} .= $current_comments; } # now append all the found text to the output file buffers copy_to_output_buffers($rule_order, $issandbox, $f, $filename); # ok; file complete. now mark all those rules as "seen"; future # refs to those rule names will trigger an autorename. foreach my $name (@$rule_order) { $seen_rules->{$name} = 1; } } # this is only run if we're generating rulemetadata! sub read_rules_from_output_dir { return unless ($opt_rulemetadata); foreach my $file (<$opt_out/*.cf>) { next unless ($file =~ /\d\d_\S+\.cf$/); next if (pubfile_is_activeout($file)); next if (pubfile_is_sandboxout($file)); read_output_file($file); } } sub read_output_file { my ($file) = @_; open (IN, "<$file") or warn "cannot read $file"; while () { my $orig = $_; s/^#reuse/reuse/; # TODO - dirty hack. we need to fix this to just be # a keyword which the engine ignores, this is absurd! s/#.*$//g; s/^\s+//; s/\s+$//; # drop comments/blank lines from output next if (/^$/); # save "lang" declarations my $lang = ''; if (s/^lang\s+(\S+)\s+//) { $lang = $1; } if (/^(${RULE_KEYWORDS_RE})\s+(\S+)\s+(.*)$/) { # rule definitions my $type = $1; my $name = $2; my $val = $3; # note: we only want to do this if --rulemetadata is in use! if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); } if ($type eq 'tflags') { $val =~ s/\s+/ /gs; if ($rules->{$name}->{tflags}) { $rules->{$name}->{tflags} .= ' '.$val; } else { $rules->{$name}->{tflags} = $val; } } if ($type =~ /^${RULE_DEFINE_KEYWORDS_RE}$/x) { $rules->{$name}->{srcfile} = $file; $rules->{$name}->{code} = $orig; } } } close IN; } sub copy_to_output_buffers { my ($rule_order, $issandbox, $f, $filename) = @_; # always output these two files, even if they're empty! foreach my $pubfile ($opt_out.'/'.$opt_sandboxout, $opt_out.'/'.$opt_activeout) { $output_files->{$pubfile} = { header => $default_file_header }; } my %already_done = (); my $copied_active = 0; my $copied_other = 0; foreach my $name (@$rule_order) { # only do each rule once, please ;) next if exists $already_done{$name}; $already_done{$name} = undef; my $text = $rules->{$name}->{text}; if (!$text) { next; # nothing to write! } my $srcfile = $rules->{$name}->{srcfile}; my $pubfile = pubfile_for_rule($rules, $name); my $is_active = 0; if (pubfile_is_activeout($pubfile)) { $is_active++; } my $cond = $rules->{$name}->{cond}; my $pluginclass = $rules->{$name}->{ifplugin}; if ($cond) { my $ifplugin_text_name = "loadplugin_".($pluginclass || ""); if ($pluginclass && $rules->{$ifplugin_text_name}) { # if the plugin is a sandbox plugin, ensure it's not # sent to the active file if ($rules->{$ifplugin_text_name}->{sandbox_plugin}) { $pubfile = $opt_out.'/'.$opt_sandboxout; $is_active = 0; } # either way, ensure the "loadplugin" line, if there is one, # goes to the same file $rules->{$ifplugin_text_name}->{output_file} = $pubfile; } # ensure we produce enough "endif"s to match however many # nested conditions there are my $endifs = "endif\n"; while ($cond =~ s/\#\|\#//) { $endifs .= "endif\n"; } $rules->{$name}->{output_text} = "\n".$cond.$text.$endifs; } else { $rules->{$name}->{output_text} = $text; } # note the target file $rules->{$name}->{output_file} = $pubfile; $output_files->{$pubfile} = { header => $default_file_header }; if ($is_active) { $copied_active++; } else { $copied_other++; } } print "$f: $copied_active active rules, ". "$copied_other other\n"; } sub pubfile_for_rule { my ($rules, $name) = @_; my $pubfile; if ($rules->{$name}->{publish}) { # "publish NAMEOFRULE" => send it to active $pubfile = $opt_out.'/'.$opt_activeout; } # default: "70_sandbox.cf" or "72_active.cf" if (!$pubfile) { if ($active_rules->{$name} # is active || $rules->{$name}->{forceactive} # or is forced to be || (!$rules->{$name}->{found_definition} && !$rules->{$name}->{iscommand} && !$rules->{$name}->{isscores})) # or is a rule-related setting in reference to an unknown rule # but isn't a generated score { $pubfile = $opt_out.'/'.$opt_activeout; } elsif ($rules->{$name}->{issandbox}) { $pubfile = $opt_out.'/'.$opt_sandboxout; } else { warn "oops? inactive rule, non-sandbox, shouldn't be possible anymore"; $pubfile = $opt_out.'/'.$opt_sandboxout; } } return $pubfile; } sub plugin_file_compile { my ($entry) = @_; return if $opt_listpromotable; # just copy the raw perl module over to the new area # we can't really rename to avoid conflicts since the loadplugin lines # are going to be all screwed up in that case. # jm: we always want to update the output file in case the input # has been changed! if (0 && -e $entry->{t}) { warn "The perl module ".$entry->{t}." already exists, can't copy from ".$entry->{f}."\n"; } else { copy($entry->{f}, $entry->{t}) || warn "Couldn't copy ".$entry->{f}.": $!"; } } ########################################################################### sub compile_output_files { my $always = $rules->{$ALWAYS_PUBLISH}->{output_text}; # create all known output files foreach my $file (keys %$output_files) { $output_file_text->{$file} = $output_files->{$file}->{header}; if ($always && pubfile_is_activeout($file)) { $output_file_text->{$file} .= $always; } } # this is a horrible kluge. # at this point in the game, we've lost the ordered list of rules, so the # loadplugin lines have no guarantee that they'll be loaded before the rules # that require them. so we kluge the sort to always have loadplugin lines # appear at the very top of the array so we know they'll be listed before # anything else. my @rulenames = sort { if ($a =~ /^loadplugin_/) { return -1; } elsif ($b =~ /^loadplugin_/) { return 1; } return $a cmp $b; } keys %$rules; my %seen = (); # go through the rules looking for meta subrules we # may have forgotten; this happens if a non-subrule is # listed in active.list, the subrules will not be! fix them # to appear in the same output file as the master rule. foreach my $rule (@rulenames) { fix_up_rule_dependencies($rule); } # now repeat, just for rules in the active set; their dependencies should # always be likewise promoted into the active set, overriding the prev step. foreach my $rule (@rulenames) { my $pubfile = $rules->{$rule}->{output_file}; next unless ($pubfile && pubfile_is_activeout($pubfile)); fix_up_rule_dependencies($rule); } my $rulemd = ''; # output the known rules that are not meta subrules. foreach my $rule (@rulenames) { $rulemd .= get_rulemetadata_string($rule); # all metadata strings next if ($rule =~ /^__/); my $pubfile = $rules->{$rule}->{output_file}; my $text = $rules->{$rule}->{output_text}; next unless defined ($text); $output_file_text->{$pubfile} .= "##{ $rule\n". $text. "##} ".$rule."\n\n"; } # now output all subrules (in a slightly more compact form) foreach my $rule (@rulenames) { next unless ($rule =~ /^__/); my $pubfile = $rules->{$rule}->{output_file}; my $text = $rules->{$rule}->{output_text}; next unless defined ($text); $output_file_text->{$pubfile} .= $text; } # finally, finish off all output files foreach my $file (keys %$output_files) { # and get them lint-checked! $files_to_lint->{$file} = 1; } if ($opt_rulemetadata) { open (RULEMD, ">".$opt_rulemetadata) or die "cannot write rulemd to $opt_rulemetadata"; print RULEMD "\n", "", $rulemd, "\n"; close RULEMD or die "cannot close rulemd to $opt_rulemetadata"; } } # conditionally build a method to UTF-8-encode a string. this is only required # for the rulemetadata XML output, so don't make it mandatory! sub compile_utf8ify_function { if (!eval ' sub utf8ify { use Encode; return Encode::encode("UTF-8", $_[0]); } 1; ') { eval ' sub utf8ify { die "unimplemented -- Encode module required!" } 1; ' } } sub get_rulemetadata_string { my ($rule) = @_; return '' unless ($opt_rulemetadata); my $mod = 0; my $srcfile = ''; my $code = ''; my $name = $rule; # if we found a rule definition with a T_ prefix, use that data if (!$rules->{$name}->{srcfile} && $rules->{"T_".$name}->{srcfile}) { $name = "T_".$name; } if ($rules->{$name}->{srcfile}) { $srcfile = $rules->{$name}->{srcfile}; if ($srcfile) { my @s = stat $srcfile; if (@s) { $mod = $s[9]; } } } if ($rules->{$name}->{code}) { $code = $rules->{$name}->{code}; $code =~ s/\]\]>/\](defanged by mkrules)\]>/gs; # ensure it's CDATA-safe $code = utf8ify($code); } my $tf = $rules->{$name}->{tflags} || ''; return "". "$rule". "$srcfile". "$mod". # don't include blocks; they bloat up the XML badly (to 800KB) # and make it very slow to parse later # "". "$tf". "\n"; } sub fix_up_rule_dependencies { my $rule = shift; my $pubfile = $rules->{$rule}->{output_file}; my $text = $rules->{$rule}->{output_text}; return unless $text; while ($text =~ /^\s*meta\s+(.*)$/mg) { my $line = $1; while ($line =~ /\b([_A-Za-z0-9]+)\b/g) { # force that subrule (if it exists) to output in the # same pubfile my $rule2 = $1; # deal with rules that changed name from "FOO" to "T_FOO" sed_renamed_rule_names(\$rule2); next unless ($rules->{$rule2} && $rules->{$rule2}->{output_file}); # don't do this if the subrule would be moved *out* of the # active file! my $pubfile2 = $rules->{$rule2}->{output_file}; next if (pubfile_is_activeout($pubfile2)); $rules->{$rule2}->{output_file} = $pubfile; } } } sub pubfile_is_activeout { return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_activeout\E$/); return 0; } sub pubfile_is_sandboxout { return 1 if ($_[0] && $_[0] =~ /\b\Q$opt_sandboxout\E$/); return 0; } sub write_output_files { foreach my $pubfile (sort keys %$output_files) { if (-f $pubfile) { unlink $pubfile or die "cannot remove output file '$pubfile'"; } if (!filename_in_manifest($pubfile)) { warn "$pubfile: WARNING: not listed in manifest file\n"; } my $text = $output_file_text->{$pubfile}; if ($text) { open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'"; sed_renamed_rule_names(\$text); print OUT $text; close OUT or die "cannot close output file '$pubfile'"; # print "$pubfile: written\n"; # too noisy } else { print "$pubfile: no rules promoted\n"; # create an empty file anyway to satisfy MANIFEST open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'"; close OUT or die "cannot close output file '$pubfile'"; } } } ########################################################################### sub rule_entry_create { return { text => '', publish => 0 }; } ########################################################################### sub sandbox_rule_name_avoid_collisions { my ($rule, $path) = @_; my $new; my $newreason; my $dowarn = 0; return $rule if $opt_listpromotable; return $rule if $active_rules->{$rule}; return $rule if $rules->{$rule}->{forceactive}; if ($rule !~ /^(?:T_|__)/) { $new = "T_".$rule; $newreason = "missing T_ prefix"; } elsif (!exists $seen_rules->{$rule}) { return $rule; } else { $new = $path; $new =~ s/[^A-Za-z0-9]+/_/gs; $new =~ s/_+/_/gs; $new =~ s/^_//; $new =~ s/_$//; $new = $rule.'_'.$new; $newreason = "collision with existing rule"; $dowarn = 1; } if (!$renamed_rules->{$new}) { $renamed_rules->{$new} = $rule; if ($dowarn) { warn "WARNING: $rule: renamed as $new due to $newreason\n"; } } return $new; } sub sed_renamed_rule_names { my ($textref) = @_; foreach my $new (keys %{$renamed_rules}) { my $rule = $renamed_rules->{$new}; $$textref =~ s/\b${rule}\b/${new}/gs; } } ########################################################################### sub read_manifest { my ($mfest) = @_; open (IN, "<$mfest") or warn "cannot read $mfest"; while () { next if /^#/; /^\s*(.*?)\s*$/ and $file_manifest->{$1} = 1; } close IN; } sub read_manifest_skip { my ($mfest) = @_; open (IN, "<$mfest") or warn "cannot read $mfest"; while () { next if /^#/; /^\s*(.*?)\s*$/ and push (@{$file_manifest_skip}, qr/$1/); } close IN; } sub read_active { my ($fname) = @_; open (IN, "<$fname") or die "cannot read $fname"; while () { s/#.*$//; next if /^\s*$/; /^(\S+)/ and $active_rules->{$1} = 1; } close IN; } sub filename_in_manifest { my ($fname) = @_; return 1 if ($file_manifest->{$fname}); foreach my $skipre (@{$file_manifest_skip}) { return 1 if ($fname =~ $skipre); } return 0; } __DATA__ # SpamAssassin rules file # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ########################################################################### require_version @@VERSION@@