#!/usr/bin/perl # This script is used to print some statistics about classification accuracy # with a k-fold cross validation use strict; my $lambda = 50; # desired lambda for TCR calculation if ( scalar(@ARGV) < 1 ) { print STDERR "Usage: model-statistics [validate]\n"; exit 1; } my (@fp1, @fn1, @tcr1); open (FILE, $ARGV[0]) || die $!; while () { my @x = split(/\s+/); push (@fp1, $x[2] / ($x[0] + $x[2])); push (@fn1, $x[3] / ($x[1] + $x[3])); push (@tcr1, $x[1] / ($x[3] + $lambda * $x[2])); } close (FILE); stat_analysis ("False positives", "pct", \@fp1); stat_analysis ("False negatives", "pct", \@fn1); stat_analysis ("TCR (lambda=$lambda)", "lin", \@tcr1); sub stat_analysis { my $title = shift; my $pct = shift; my $s1 = shift; # This is the number of degrees of freedom of the two sample sets (i.e. # the number of samples in each set). my $dof = scalar(@{$s1}); # Compute the mean and standard deviation of the first sample # mean = 1/n * sum(s[i]) my $mean_s1 = 0; foreach my $i (1..$dof) { $mean_s1 += $$s1[$i]; } $mean_s1 /= $dof; # var = 1/(n-1) * sum((mean - s[i])^2) my $var_s1 = 0; foreach my $i (1..$dof) { $var_s1 += ($mean_s1 - $$s1[$i])**2; } $var_s1 /= $dof - 1; # std = sqrt(var) my $std_s1 = sqrt($var_s1); # SA developers like percentage points instead of probabilities. if ( $pct eq "pct" ) { printf "%s: mean=%0.4f%% std=%0.4f\n",$title,100*$mean_s1,100*$std_s1; } else { printf "%s: mean=%0.4f std=%0.4f\n",$title,$mean_s1,$std_s1; } }