#!/usr/bin/perl use warnings; use strict; # Compare hit rule differences for two logfiles from similar mass-check # runs. Might be used after tweaking some code and checking if anything # changed. Assumed that the exact same corpus is used for both runs. # $ ./logrulediff ham-hege.log.old ham-hege.log # ham/0d055650bfee6a5a0a1b43944f73eb7bb7fa7d39 +NEW_RULE -DISAPPEARED_RULE die "Usage: $0 " unless -f $ARGV[0] && -f $ARGV[1]; my %rules1; open(IN, $ARGV[0]) or die; while () { next if /^#/; next unless /^[.Y]\s+-?\d+\s+(\S+)\s+(\S+)/; $rules1{$1}{$_}++ foreach split(',', $2); } close IN; my %rules2; open(IN, $ARGV[1]) or die; while () { next if /^#/; next unless /^[.Y]\s+-?\d+\s+(\S+)\s+(\S+)/; $rules2{$1}{$_}++ foreach split(',', $2); } close IN; foreach my $f (sort keys %rules1) { if (!defined $rules2{$f}) { print STDERR "!!! $f not found in second logfile\n"; next; } my @adds; my @subs; foreach my $rule (keys %{$rules1{$f}}) { next if exists $rules2{$f}{$rule}; push @subs, $rule; } foreach my $rule (keys %{$rules2{$f}}) { next if exists $rules1{$f}{$rule}; push @adds, $rule; } if (@adds + @subs > 0) { print "$f "; print "+$_ " foreach (@adds); print "-$_ " foreach (@subs); print "\n"; } } foreach my $f (sort keys %rules2) { if (!defined $rules1{$f}) { print STDERR "!!! $f not found in first logfile\n"; next; } }