#!/bin/sh # # Given a large set of MH folders, in multiple MH trees, generate a list of # tests hit by each message. Output the test lists into "spam.local" and # "nonspam.local". # # This then gives us a statistical base to test weight changes against. # the directories to search. MAILDIRS="$HOME/Mail $HOME/Mail.old" # the names of folders that usually contain only spam messages. All # other folders are assumed to contain non-spam. Refile messages # to ensure this is the case, where possible, to keep your output # consistent! ARE_SPAM="Sapm Spam MissedSpam spam" # skipped folders. SKIPPED="drafts outbox SA" : > commands.sh for maildir in $MAILDIRS ; do [ -d "$maildir" ] || continue for folder in $maildir/* ; do [ -d "$folder" ] || continue [ -f "$folder/1" ] || continue if perl -e '@pats = qw('"$SKIPPED"'); $_ = q('"$folder"'); foreach $pat (@pats) { exit 0 if /\/$pat$/; } exit 1;' then continue fi echo "echo Checking $folder:" >> commands.sh if perl -e '@pats = qw('"$ARE_SPAM"'); $_ = q('"$folder"'); foreach $pat (@pats) { exit 0 if /\/$pat$/; } exit 1;' then echo "./mass-phrase-freq $folder >> spam.local" >> commands.sh else echo "./mass-phrase-freq $folder >> nonspam.local" >> commands.sh fi done done echo "Now run commands.sh to check all folders."