#!/usr/bin/perl # # <@LICENSE> # Copyright 2004 Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use Getopt::Long; use vars qw($opt_lambda $opt_fn $opt_fp $opt_spam $opt_ham); GetOptions("lambda=f", "fn=f", "fp=f", "spam=f", "ham=f"); # lambda value for TCR equation, representing the cost of of an FP vs. the # cost of a FN. Some example values are: 1 = tagged only, 9 = mailed back # to sender asking for token, 999 = blocking or deleting a message. # # We roughly aim for a value representing "moved to infrequently-read folder". my $lambda = 50; if ($opt_lambda) { $lambda = $opt_lambda; } if (!$opt_spam && !$opt_ham) { die "usage: fn-fp-to-tcr [-lambda l] -fn n -fp n -spam n -ham n\n\n"; } # convert to the TCR metrics used in the published lit my $nspamspam = $opt_spam; my $nspamlegit = $opt_fn; my $nlegitspam = $opt_fp; my $nlegitlegit = $opt_ham; my $nlegit = $opt_ham + $opt_fp; my $nspam = $opt_spam + $opt_fn; my $werr = ($lambda * $nlegitspam + $nspamlegit) / ($lambda * $nlegit + $nspam); my $werr_base = $nspam / ($lambda * $nlegit + $nspam); $werr ||= 0.000001; # avoid / by 0 my $tcr = $werr_base / $werr; my $sr = ($nspamspam / $nspam) * 100.0; my $sp = ($nspamspam / ($nspamspam + $nlegitspam)) * 100.0; printf "TCR(l=%d): %3.6f\nSpamRecall: %3.6f%%\nSpamPrecision: %3.6f%%\n", $lambda, $tcr, $sr, $sp;