#!/usr/bin/perl -w # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; my $NUM_SCORESETS = 4; my $ZERO_MINISCULE_SCORES = 1; my $MINISCULE_THRESHOLD = 0.1; # points my $UNZERO_META_PREDICATES = 1; # scores are broken into three regions: # 1. "pre" (stuff before generated mutable scores) # 2. "gen" (first generated mutable scores section) # 3. "end" (stuff after generated mutable scores) # 4. "gen2" (any later generated mutable scores sections) # options my ($scoreset, $oldscores, $newscores) = @ARGV; $scoreset = int($scoreset) if defined $scoreset; if (!defined $newscores || $scoreset < 0 || $scoreset >= $NUM_SCORESETS ) { die "usage: rewrite-cf-with-new-scores scoreset oldscores.cf newscores.cf\n"; } # variables filled-out in read_rules() our %rules; # rules data # variables filled-out in read_gascores() my %gascores = (); # generated scores # variables filled-out in read_oldscores() my $pre = ''; # stuff before first "gen" section my $end = ''; # stuff after first "gen" section my %oldscores; # old scores my %comment; # "gen" rule comments my %fixed; # scores that are fixed (non-gen) my %gen2; # scores that are gen in the $end string # compiled output my @gen_order = (); my %gen_lines = (); # read stuff in read_rules(); read_gascores(); read_oldscores(); build_new_scores(); if ($ZERO_MINISCULE_SCORES) { fixup_miniscule_scores(); } if ($UNZERO_META_PREDICATES) { fixup_meta_predicates(); } $end = sub_gen2($end); # write stuff out print $pre; print_gen(); print $end; exit; sub read_rules { system ("./parse-rules-for-masses -s $scoreset") and die; if (-e "tmp/rules.pl") { # note: the spaces need to stay in front of the require to work around # a RPM 4.1 problem require "./tmp/rules.pl"; } else { die "parse-rules-for-masses had no error but no tmp/rules.pl"; } } sub read_gascores { open (STDIN, "<$newscores") or die "cannot open $newscores"; while () { next unless /^score\s+(\S+)\s+(-?\d+(?:\.\d+)?)/; my $name = $1; my $score = $2; # various things we should be concerned about if (!exists $rules{$name}) { warn "$name is not defined in tmp/rules.pl\n"; next; } if ($rules{$name}->{issubrule}) { warn "$name is an indirect sub-rule in tmp/rules.pl\n"; next; } if ($rules{$name} =~ /^__/) { warn "$name has an indirect sub-rule \"__\" prefix\n"; next; } if ($name eq '(null)') { warn "$name is (null)\n"; next; } $gascores{$name} = $score; } } sub read_oldscores { open (IN, "<$oldscores") or die "cannot open $oldscores"; # state of things my $where = "pre"; # region of original scores file that we're in my $seen_gen = 0; # have we seen the first tag? # read everything in while (my $line = ) { if ($line =~ /<\/gen:mutable>/) { $where = "end"; } if ($where eq "pre") { readline_fix($line); $pre .= $line; } elsif ($where eq "gen") { readline_gen($line); } elsif ($where eq "gen2") { readline_gen2($line); $end .= $line; } elsif ($where eq "end") { readline_fix($line); $end .= $line; } if ($line =~ //) { if ($seen_gen) { $where = "gen2"; } else { $where = "gen"; $seen_gen = 1; } } } } # used for both "pre" and "end" sub readline_fix { my ($line) = @_; my $comment; if ($line =~ s/\s*#\s*(.*)//) { $comment = $1; } if ($line =~ /^\s*score\s+(\S+)\s/) { my (undef, $name, @scores) = split(' ', $line); $fixed{$name}++; $comment{$name} = $comment if $comment; } } sub readline_gen { my ($line) = @_; my $comment; if ($line =~ s/\s*#\s*(.*)//) { $comment = $1; $comment =~ s/ n=$scoreset//; } if ($line =~ /^\s*score\s+(\S+)\s/) { my (undef, $name, @scores) = split(' ', $line); for (my $i = 1; $i < $NUM_SCORESETS; $i++) { $scores[$i] = $scores[0] unless defined $scores[$i]; } @{$oldscores{$name}} = @scores; $comment{$name} = $comment if $comment; } } sub readline_gen2 { my ($line) = @_; my $comment; if ($line =~ s/\s*#\s*(.*)//) { $comment = $1; $comment =~ s/ n=$scoreset//; } if ($line =~ /^\s*score\s+(\S+)\s/) { my (undef, $name, @scores) = split(' ', $line); for (my $i = 1; $i < $NUM_SCORESETS; $i++) { $scores[$i] = $scores[0] unless defined $scores[$i]; } @{$oldscores{$name}} = @scores; $comment{$name} = $comment if $comment; $gen2{$name}++; } } sub build_new_scores { # we just consider scores for this set that are in the input or were in the # "gen" region from the old scores, tmp/rules.pl is not considered here my %gen; # rules to be printed in "gen" region $gen{$_} = 1 for keys %gascores; # scores for this set from GA $gen{$_} = 1 for keys %oldscores; # original scores in "gen" region # remove fixed scores for (keys %fixed) { delete $gen{$_}; } # sort all generated rules by name for my $name (sort keys %gen) { next if ($rules{$name}->{lang}); # "lang es" rules etc. next if ($rules{$name}->{issubrule}); # indirect sub-rules next if ($name eq 'AWL'); # dynamic score my @scores = (); my $comment = ''; $comment = $comment{$name} if defined $comment{$name}; # use the old scores if they existed @scores = @{$oldscores{$name}} if exists $oldscores{$name}; # set appropriate scoreset value if (defined $gascores{$name}) { $scores[$scoreset] = $gascores{$name}; delete $oldscores{$name}; } else { # zero for current scoreset if there was no new score; # when the perceptron does this for mutable rules, it means # that score had a new score of 0 $scores[$scoreset] = 0; if (defined $oldscores{$name}) { $comment .= " n=$scoreset"; #warn "$name has no GA score, but had a score before\n"; } } # sort and unique comment tags my %unique; $unique{$_} = 1 for split(' ', $comment); $comment = join(' ', sort keys %unique); push (@gen_order, $name); $gen_lines{$name} = { scores => \@scores, comment => $comment }; } } sub new_score_line { my ($name) = @_; # create new score line my @scores = @{$gen_lines{$name}{scores}}; my $comment = $gen_lines{$name}{comment}; return sprintf("score %s %s%s", $name, join(" ", generate_scores($name, @scores)), ($comment) ? ' # ' . $comment : ''); } sub print_gen { print "\n"; foreach my $name (@gen_order) { next if ($gen2{$name}); # will do that separately print new_score_line($name), "\n"; } print "\n"; } sub sub_gen2 { my $end = shift; foreach my $name (keys %gen2) { if ($end !~ s/^\s*score\s+${name}\s.+?$/ new_score_line($name); /em) { # we failed to sub it; output score in main gen:mutable block instead delete $gen2{$name}; } } $end; } sub generate_scores { my ($name, @scores) = @_; my $isnet = 0; my $islearn = 0; if (defined $rules{$name}->{tflags}) { $isnet = ($rules{$name}->{tflags} =~ /\bnet\b/); $islearn = ($rules{$name}->{tflags} =~ /\blearn\b/); } # set defaults if not already set if (!defined $scores[0]) { warn "$name does not have a default score\n"; $scores[0] ||= 0; } my $flag = 1; for (my $i = 1; $i < $NUM_SCORESETS; $i++) { $scores[$i] = $scores[0] unless defined $scores[$i]; $flag = 0 if ($scores[$i] != $scores[$i-1]); }; # enforce rule/scoreset rules. # net rules never have a non-zero score in sets 0 and 2 for (my $i = 0; $i < $NUM_SCORESETS; $i++) { if ($isnet && ($i & 1) == 0) { $scores[$i] = 0; $flag = 0 if ($i > 0 && $scores[$i] != $scores[$i-1]); } if ($islearn && ($i & 2) == 0) { $scores[$i] = 0; $flag = 0 if ($i > 0 && $scores[$i] != $scores[$i-1]); } } if ($flag) { splice @scores, 1; } return @scores; } sub fixup_miniscule_scores { my $num_fixed = 0; foreach my $name (@gen_order) { my @scores = @{$gen_lines{$name}{scores}}; if (abs($scores[$scoreset]) < $MINISCULE_THRESHOLD) { $scores[$scoreset] = 0; $num_fixed++; } @{$gen_lines{$name}{scores}} = @scores; } warn "zeroed $num_fixed scores for being 'miniscule'.\n"; } sub fixup_meta_predicates { # this is the opposite of t/meta.t while (my ($name, $info) = each %rules) { my $type = $info->{type} || "unknown"; # look at meta rules that are not disabled next unless ($type eq "meta" && ($name =~ /^__/ || $info->{score} != 0)); next unless ($info->{depends}); # test rules should not impose requirements on release rules; ignore # any dependency requirements caused by T_ rules next if $name =~ /^T_/; for my $depend (@{ $info->{depends} }) { if (!exists $rules{$depend}) { warn "$name depends on $depend which is nonexistent\n"; next; } # if dependency is a predicate, it'll run next if $depend =~ /^__/; # not a generated rule? not our problem, then; t/meta.t will catch it next unless (exists $gen_lines{$depend}); # ignore "tflags net" and "tflags learn" rules -- it is OK # for those to have zero scores in some scoresets, for obvious # reasons. next if (defined $rules{$depend}->{tflags} && $rules{$depend}->{tflags} =~ /\b(?:net|learn)\b/); # if dependency has a non-zero score, it'll run my $depscore = $gen_lines{$depend}{scores}[$scoreset]; next if (defined $depscore && $depscore != 0); warn "dep failure: $name depends on $depend with 0 score; fixing at non-0\n"; $gen_lines{$depend}{scores}[$scoreset] = 0.001; } } }