#!/bin/sh # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This is a small script used to interact with run-masses to do a full # corpus mass-check run, including the rsync to the SA server. # Change the appropriate variables below. # # By default, it'll do a set0 run, but you can change that by adding # --net or --bayes to the commandline. # # --net by itself will automatically try running 4 mass-checks in parallel # CORPUS=/home/felicity/SA/corpus SA_VER=/home/felicity/SA/spamassassin-corpora SVN=/usr/local/bin/svn SVNVERS=/usr/local/bin/svnversion NET=0 BAYES=0 OPTS="--progress --after=-2592000" RSYNC_USER=your_rsync_username RSYNC_PASSWORD="your_rsync_password"; export RSYNC_PASSWORD VERS=nightly FILENAME=$RSYNC_USER while [ ! -z "$1" ]; do if [ "$1" = "--net" ]; then NET=1 elif [ "$1" = "--bayes" ]; then BAYES=1 fi shift done if [ $NET -eq 1 ]; then FILENAME="net-$FILENAME" OPTS="$OPTS --net" VERS=weekly # We want to do this with more parallelization, but not if Bayes is also running ... if [ $BAYES -eq 0 ]; then OPTS="$OPTS -j 4 --restart 1000" fi fi if [ $BAYES -eq 1 ]; then FILENAME="bayes-$FILENAME" OPTS="$OPTS --bayes" fi # Update SA version before our run echo "[Updating $SA_VER]" cd $SA_VER COUNT=0 while ! wget -q -nd -m http://rsync.spamassassin.org/$VERS-versions.txt ; do sleep 60 COUNT=`expr $COUNT + 1` if [ $COUNT -gt 5 ]; then echo "Couldn't get the $VERS revision version, aborting!" >&2 exit 2 fi done CREV=`$SVNVERS .` NREV=`tail -1 $VERS-versions.txt | awk '{print $2}'` if [ $CREV -ge $NREV ]; then echo "Current rev ($CREV) newer or equal to nightly rev ($NREV)" exit 0 fi COUNT=0 while ! $SVN update -r $NREV; do sleep 60 COUNT=`expr $COUNT + 1` if [ $COUNT -gt 5 ]; then echo "Couldn't do a SVN update, aborting!" >&2 exit 2 fi done # update the corpus with the latest/greatest mail files echo "[Updating Corpus]" cd $CORPUS $CORPUS/update -q # remove current bayes db set echo "[Removing old Bayes DB]" rm -f $SA_VER/masses/spamassassin/bayes* # do the run echo "[Running mass-check '$OPTS' in $CORPUS]" $CORPUS/run-masses $SA_VER $OPTS > /dev/null if [ ! -s ham.log -o ! -s spam.log ]; then echo "There seems to be a problem with either ham.log or spam.log, aborting!" >&2 exit 1 fi mv -f ham.log results/ham-$FILENAME.log mv -f spam.log results/spam-$FILENAME.log mv -f results.log results/hf/results-$FILENAME.log cd results # now we have our ham.log and spam.log files... echo "[Uploading daily corpus logs]" rsync -qCPcvuzb *-$FILENAME.log $RSYNC_USER@rsync.spamassassin.org::corpus/ echo "[Our results]" cat hf/results-$FILENAME.log