#!/bin/sh # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Written by Theo Van Dinter # Please feel free to mail with any questions. :) # This is a small script used to interact with run-masses to do a full # corpus mass-check run, including the rsync to the SA server. # # NOTE: you MUST set RSYNC_USER and RSYNC_PASSWORD outside of this script so # that your results can be sent up for review. # # By default, it'll do a nightly (set0) run. If you want to do a weekly # (set1) run, add "--net" to the commandline. # # Defaults for mass-check parameters: # no commandline parameters # use DEF_AFTER for --after option # adds "-n" # use nightly-versions.txt for updates # --net # switches to NET_AFTER for --after option # adds "-j 4", "--net", "--reuse" # use weekly-versions.txt for updates # # ie: "run-corpora" equates to: "mass-check -n" # "run-corpora --net" equates to: "mass-check -n -j 4 --net --reuse" # Set the path appropriately PATH=/bin:/usr/bin:/usr/local/bin if [ -d /sw/bin ]; then PATH=${PATH}:/sw/bin fi export PATH # Where do things live? # CORPUS is the directory that houses your mail corpus and these scripts. CORPUS=$HOME/SA/corpus # SA_VER is the directory that you want updated and will use for the # nightly/weekly run SA_VER=$HOME/SA/spamassassin-corpora # OUTDIR is the directory which will be used for the temporary log files # during the run, etc. If you don't know, leave it blank and it'll use # "SA_VER/masses" OUTDIR= # FINALDIR, if set, is where you want the files moved to after processing is # completed. FINALDIR= # These are paths to various programs. If PATH is set appropriately, you can # just let the shell find them. SVN=svn SVNVERS=svnversion WGET=wget RSYNC=rsync # DEF_AFTER is used for the set0 run --after parameter # NET_AFTER is used for the set1 run --after parameter # this needs to be specially handled since there are problems with shell # parameter parsing that make these not work without it. # DEF_AFTER="-120 days" NET_AFTER="-60 days" if [ -z "$RSYNC_USER" -o -z "$RSYNC_PASSWORD" ]; then echo "You need to specify RSYNC_USER and RSYNC_PASSWORD via the environment!" >&2 exit 2 fi if [ -z "$OUTDIR" ]; then OUTDIR="$SA_VER/masses" fi cd $OUTDIR if [ -f ham.log -o -f spam.log ]; then echo "A previous run still has log files, exiting." >&2 exit 2 fi NET=0 OPTS="-n" VERS=nightly FILENAME=$RSYNC_USER umask 002 while [ ! -z "$1" ]; do if [ "$1" = "--net" ]; then NET=1 fi shift done if [ $NET -eq 1 ]; then FILENAME="net-$FILENAME" OPTS="$OPTS --net --reuse" AFTER="$NET_AFTER" VERS=weekly # We want to do this with more parallelization... OPTS="$OPTS -j 4" else AFTER="$DEF_AFTER" fi # Verify appropriate version to run with echo "[Updating $SA_VER]" COUNT=0 while ! $WGET -q -nd -m http://rsync.spamassassin.org/$VERS-versions.txt ; do sleep 60 COUNT=`expr $COUNT + 1` if [ $COUNT -gt 5 ]; then echo "Couldn't get the $VERS revision version, aborting!" >&2 exit 2 fi done NREV=`tail -1 $VERS-versions.txt | awk '{print $2}'` if [ -f "$SA_VER/masses/svninfo.tmp" ]; then CREV=`grep ^Revision: $SA_VER/masses/svninfo.tmp | awk '{print $2}'` if [ $CREV -eq $NREV ]; then echo "Looks like a problem with the $VERS-versions.txt update, same rev ($CREV)" >&2 exit 2 fi fi if ! $RSYNC -uaqrC --exclude masses/spamassassin/ --delete rsync.spamassassin.org::tagged_builds/${VERS}_mass_check/ $SA_VER; then echo "Couldn't rsync update the $VERS spamassassin corpora code" >&2 exit 2 fi CREV=`grep ^Revision: $SA_VER/masses/svninfo.tmp | awk '{print $2}'` if [ $CREV -ne $NREV ]; then echo "Looks like a problem with the rsync area, found rev $CREV but expected rev $NREV" >&2 exit 2 fi # do the run, treat $AFTER differently due to commandline parsing and "-" echo "[Running mass-check '$OPTS' in $CORPUS]" if [ -z "$AFTER" ]; then $CORPUS/run-masses $SA_VER $OPTS > /dev/null else echo "[Using '$AFTER' for --after setting]" $CORPUS/run-masses $SA_VER $OPTS --after "$AFTER" > /dev/null fi if [ ! -s ham.log -o ! -s spam.log ]; then echo "There seems to be a problem with either ham.log or spam.log, aborting!" >&2 exit 1 fi # now we have our ham.log and spam.log files... echo "[Uploading daily corpus logs]" mv ham.log ham-$FILENAME.log mv spam.log spam-$FILENAME.log mv results.log results-$FILENAME.log if ! $RSYNC -qPcvzb ham-$FILENAME.log spam-$FILENAME.log $RSYNC_USER@rsync.spamassassin.org::corpus/; then echo "There was an error during rsync!" >&2 fi RESULTDIR=. if [ "$FINALDIR" ]; then mv -f ham-$FILENAME.log spam-$FILENAME.log $FINALDIR if [ -f "ham-$FILENAME.log" -o -f "spam-$FILENAME.log" ]; then echo "There was an error moving files around, aborting!" >&2 exit 1 fi RESULTDIR=$FINALDIR if [ -d "$FINALDIR/hf" ]; then RESULTDIR="$FINALDIR/hf" fi mv -f results-$FILENAME.log $RESULTDIR fi echo "[Our results]" cat $RESULTDIR/results-$FILENAME.log