#!/usr/bin/perl -w $USE_CHI_COMBINING = 0; # use 1 bucket's set for speed, just to get going: #$SPAMLOG = "/raid/bayestest/results/bucket1/spam.log"; #$HAMLOG = "/raid/bayestest/results/bucket1/nonspam.log"; # or use the lot, slow but more accurate: $SPAMLOG = "/raid/bayestest/results/spam_all.log"; $HAMLOG = "/raid/bayestest/results/nonspam_all.log"; my $searchspaces = { ROBINSON_S_CONSTANT => [ 0.0, 0.75 ], ROBINSON_X => [ 0.0, 0.75 ], }; my $count; $|=1; srand(23); for ($count = 0; $count < 1000; $count++) { my $rands = rand_constants(); write_constants($rands); run_test($count); parse_log($rands, $count); } exit; sub rand_constants { my $rands = { }; foreach (keys %{$searchspaces}) { my $lo = $searchspaces->{$_}->[0]; my $hi = $searchspaces->{$_}->[1]; $rands->{$_} = rand($hi-$lo) + $lo; } return $rands; } sub write_constants { my $rands = shift; open (OUT, ">constants.pl"); print OUT " use constant ROBINSON_S_CONSTANT => $rands->{ROBINSON_S_CONSTANT}; use constant ROBINSON_X => $rands->{ROBINSON_X}; use constant ROBINSON_MIN_PROB_STRENGTH => 0.27; use constant PROB_BOUND_LOWER => 0.001; use constant PROB_BOUND_UPPER => 0.999; use constant N_SIGNIFICANT_TOKENS => 150; use constant USE_CHI_COMBINING => $USE_CHI_COMBINING; 1; "; close OUT; } sub run_test { my $count = shift; mkdir ("logs"); system ("./bayes-analyse-from-raw-counts $SPAMLOG $HAMLOG ". "> logs/log.$count 2>&1"); } sub parse_log { my $rands = shift; my $count = shift; my $r = { }; open (IN, "< logs/log.$count"); while () { /optimization for hamcutoff=(\S+), spamcutoff=(\S+): cost=\$\s*(\S+)/ or next; $r->{hamcutoff} = $1; $r->{spamcutoff} = $2; $r->{cost} = $3; $_ = ; $_ = ; s/\s+/ /gs; /FP: (\S+) \S+ FN: (\S+) /; $r->{fp} = $1; $r->{fn} = $2; $_ = ; s/\s+/ /gs; /Unsure: (\S+) \S+ .ham: (\S+) \S+ spam: (\S+) /; $r->{unsure} = $1; $r->{unham} = $2; $r->{unspam} = $3; last; } close IN; my $line = "$count"; foreach my $key (sort keys %{$rands}) { $line .= " $key $rands->{$key}"; } foreach my $key (sort keys %{$r}) { $line .= " $key $r->{$key}"; } print $line, "\n"; sleep 1; }