#!/usr/bin/perl -w

# (rough) graphic demo of this algorithm:
# 0.0  = -limit [......] 0 ........ limit
# 0.25 = -limit ..[..... 0 .]...... limit
# 0.5  = -limit ....[... 0 ...].... limit
# 0.75 = -limit ......[. 0 .....].. limit
# 1.0  = -limit ........ 0 [......] limit
my $sliding_window_limits = 4.8; # limits = [-$range, +$range]
my $sliding_window_size =   5.5; # scores have this range within limits

# 0.0  = -limit [......] 0 ........ limit
# 0.25 = -limit ....[... 0 ]....... limit
# 0.5  = -limit ......[. 0 .]...... limit (note: tighter)
# 0.75 = -limit .......[ 0 ...].... limit
# 1.0  = -limit ........ 0 [......] limit
my $shrinking_window_lower_base =   0.00; 
my $shrinking_window_lower_range =  1.00; # *ratio, added to above
my $shrinking_window_size_base =    1.00;
my $shrinking_window_size_range =   1.00; # *ratio, added to above

my $use_sliding_window = 0;

my $argcffile = shift @ARGV;
my $scoreset = shift @ARGV;
$scoreset = 0 if ( !defined $scoreset );

if (defined ($argcffile) && $argcffile eq '-test') {
  # use this to debug the ranking -> score-range mapping:
  for $rat (0.0, 0.25, 0.5, 0.75, 1.0) {
    my ($lo, $hi); if ($use_sliding_window) {
      ($lo, $hi) = sliding_window_ratio_to_range($rat);
    } else {
      ($lo, $hi) = shrinking_window_ratio_to_range($rat);
    }
    warn "test: $rat => [ $lo $hi ]\n";
  } exit;
}

my %freq_spam = ();
my %freq_nonspam = ();

my $num_spam;
my $num_nonspam;
my $num_total;

my %mutable_tests = ();
my %ranking = ();
my %soratio = ();
my %is_nice = ();

if (!defined $argcffile) { $argcffile = "../rules"; }
system ("./parse-rules-for-masses -d \"$argcffile\" -s $scoreset") and die;
if (-e "tmp/rules.pl") {
  # Note, the spaces need to stay in front of the require to work around a RPM 4.1 problem
  require "./tmp/rules.pl";
}
else {
  die "parse-rules-for-masses had no error but no tmp/rules.pl!?!";
}

while (<>) {
  /^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+\S+\s+(.+)\s*$/ or next;

  my $overall = $1+0;
  my $spam = $2+0;
  my $nonspam = $3+0;
  my $soratio = $4+0;
  my $ranking = $5+0;
  my $test = $6;

  if ($test eq '(all messages)') {
    $num_spam = $spam;
    $num_nonspam = $nonspam;
    $num_total = $spam+$nonspam;
    next;
  }
  next if ($test eq '(all messages as %)');

  if (!defined ($rules{$test})) {
    warn "rule $test no longer exists; ignoring\n";
    next;
  }

  $freq{$test} = $overall;
  $freq_spam{$test} = $spam;
  $freq_nonspam{$test} = $nonspam;

  my $tflags = $rules{$test}->{tflags}; $tflags ||= '';
  if ($tflags =~ /\buserconf\b/ ||
      ( ($scoreset % 2) == 0 && $tflags =~ /\bnet\b/ )) {
    $mutable_tests{$test} = 0;
  } else {
    $mutable_tests{$test} = 1;
  }
  if ($tflags =~ m/\bnice\b/i) {
    $is_nice{$test} = 1;
  } else {
    $is_nice{$test} = 0;
  }

  if ($overall < 0.01) {        # less than 0.01% of messages were hit
    $mutable_tests{$test} = 0;
    $soratio{$test} = 0.5;
    $ranking{$test} = 0.0;
    $rules{$test}->{score} = 0; # tvd - disable these rules automagically

  } else {
    $soratio{$test} = $soratio;
    $ranking{$test} = $ranking;
  }
}

if ( ! mkdir "tmp", 0755 ) {
  warn "Couldn't create tmp directory!: $!\n";
}

open (OUT, ">tmp/ranges.data");
foreach my $test (sort { $ranking{$b} <=> $ranking{$a} } keys %freq) {
  if (!defined ($rules{$test})) {
    warn "no rule $test";
    print OUT ("0 0 0 $test\n");
    next;
  }

  my $overall = $freq{$test};
  my $spam = $freq_spam{$test};
  my $nonspam = $freq_nonspam{$test};
  my $soratio = $soratio{$test};
  my $ranking = $ranking{$test};
  my $mutable = $mutable_tests{$test};

  if (!$mutable || $rules{$test}->{score} == 0) { # didn't look for score 0 - tvd
    printf OUT ("%3.3f %3.3f 0 $test\n",
                         $rules{$test}->{score},
                         $rules{$test}->{score});
    next;
  }

  # 0.0 = best nice, 1.0 = best nonnice
  if ($is_nice{$test}) {
    $ranking = .5 - ($ranking / 2);
  } else {
    $ranking = .5 + ($ranking / 2);
  }

  my ($lo, $hi);
  if ($use_sliding_window) {
    ($lo, $hi) = sliding_window_ratio_to_range($ranking);
  } else {
    ($lo, $hi) = shrinking_window_ratio_to_range($ranking);
  }

  # tvd
  my $tflags = $rules{$test}->{tflags}; $tflags ||= '';
  if ( $is_nice{$test} && ( $ranking < .5 ) ) { # proper nice rule
    if ( $tflags =~ /\blearn\b/ ) { # learn rules should get a higher score # -5.4
      $lo *=1.8;
    }
    elsif ($soratio <= 0.05 && $nonspam > 0.5) { # let good rules be larger if they want to, -4.5
      $lo *= 1.5;
    }

    $hi =	($soratio == 0) ? $lo :
    		($soratio <= 0.005 ) ? $lo/1.1 :
    		($soratio <= 0.010 && $nonspam > 0.2) ? $lo/2.0 :
		($soratio <= 0.025 && $nonspam > 1.5) ? $lo/10.0 :
		0;

    if ( $soratio >= 0.35 ) { # auto-disable bad rules
      ($lo,$hi) = (0,0);
    }
  }
  elsif ( !$is_nice{$test} && ( $ranking >= .5 ) ) { # proper spam rule
    if ( $tflags =~ /\blearn\b/ ) { # learn rules should get a higher score
      $hi *=1.8;
    }
    elsif ( $soratio >= 0.99 && $spam > 1.0 ) {
      $hi *= 1.5; # let good rules be larger if they want to
    }

    $lo =	($soratio == 1) ? $hi:
    		($soratio >= 0.995 ) ? $hi/4.0 :
    		($soratio >= 0.990 && $spam > 1.0) ? $hi/8.0 :
		($soratio >= 0.900 && $spam > 10.0) ? $hi/24.0 :
		0;

    if ( $soratio <= 0.65 ) { # auto-disable bad rules
      ($lo,$hi) = (0,0);
    }
  }
  else { # rule that has bad nice setting
    ($lo,$hi) = (0,0);
  }
  $mutable = 0 if ( $hi == $lo );

  printf OUT ("%3.1f %3.1f $mutable $test\n", $lo, $hi);
}
close OUT;
exit;

sub sliding_window_ratio_to_range {
  my $ratio = shift;
  my $lo = -$sliding_window_limits + ($sliding_window_size * $ratio);
  my $hi = +$sliding_window_limits - ($sliding_window_size * (1-$ratio));
  if ($lo > $hi) { # ???
    ($lo,$hi) = ($hi,$lo);
  }
  ($lo, $hi);
}

sub shrinking_window_ratio_to_range {
  my $ratio = shift;
  my $is_nice = 0;
  my $adjusted = ($ratio -.5) * 2;      # adj [0,1] to [-1,1]
  if ($adjusted < 0) { $is_nice = 1; $adjusted = -$adjusted; }

#$adjusted /= 1.5 if ( $ratio < 0.95 && $ratio > 0.15 ); # tvd

  my $lower = $shrinking_window_lower_base 
                        + ($shrinking_window_lower_range * $adjusted);
  my $range = $shrinking_window_size_base 
                        + ($shrinking_window_size_range * $adjusted);
  my $lo = $lower;
  my $hi = $lower + $range;
  if ($is_nice) {
    my $tmp = $hi; $hi = -$lo; $lo = -$tmp;
  }
  if ($lo > $hi) { # ???
    ($lo,$hi) = ($hi,$lo);
  }

  ($lo, $hi);
}