#!/bin/sh # run-masses # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This script will run a mass-check against all mbox files in a given # set of directories. The expected directory structure is: # # spamassassin SA version to use # corpus location of all corpus files # run-masses this script # spam directory of spam # ham directory of ham # # By default, the script looks for "spamassassin-head" which on my # machine is the latest version from CVS. You can easily change this # by passing parameters to this script as such: # # run-masses ../spamassassin-garun --progress # # The first parameter is the directory to use for mass-check, and all # other parameters are passed to mass-check itself. # # At the end, you will have a ham.log, spam.log, and results.log in the # corpus directory. # # BTW: cpucount is a small script to figure out the number # of CPUs on the current machine. It is available from # http://www.kluge.net/~felicity/random/ # # Setup the path as necessary PATH=/bin:/usr/bin:/usr/local/bin:${HOME}/bin:. if [ -d /sw/bin ]; then PATH=${PATH}:/sw/bin fi export PATH # Use the specified directory for tests if [ -z "$1" ]; then DIR=../spamassassin-head else DIR="$1" shift fi # How many processes should we run at once? PROCS=`cpucount` # Where are our files located? MASS=$DIR/masses RULES=$DIR/rules # do the mass-check # class:format:path # class = ham | spam # format = file (file w/ single message) | mbox (file w/ multiple messsages) | dir (of 'file's) $MASS/mass-check --all -c $RULES -j $PROCS "$@" `mbox-to-check` echo "Generating hit frequency results" $MASS/hit-frequencies -c $RULES -x -p -a > results.log # remove the parse-rules-for-masses tmp directory echo "Removing temporary files" rm -rf ./tmp