#!/usr/bin/perl -w
############################################################
# The code in this file is copyright 2001 by Craig Hughes  #
# It is licensed for use with the SpamAssassin distribution#
# under the terms of the Perl Artistic License, the text of#
# which is included as the file named "License"            #
############################################################

my $PREFIX = '@@PREFIX@@';  # substituted at 'make' time
my $DEF_RULES_DIR = '@@DEF_RULES_DIR@@';  # substituted at 'make' time
my $LOCAL_RULES_DIR = '@@LOCAL_RULES_DIR@@';  # substituted at 'make' time
use lib '@@INSTALLSITELIB@@'; # substituted at 'make' time

use lib '../lib';	# added by jm for use inside the distro
use strict;
use Socket;
use Config;
use Mail::SpamAssassin;
use Mail::SpamAssassin::NoMailAudit;
use Getopt::Long;
use Pod::Usage;
use Sys::Syslog qw(:DEFAULT setlogsock);
use POSIX qw(:sys_wait_h);
use POSIX qw(setsid);
use Errno;
# Load Time::HiRes if it's available
BEGIN {
  eval { require Time::HiRes };
  Time::HiRes->import( qw(time) ) unless $@;
}


sub spawn;  # forward declaration
sub logmsg; # forward declaration
sub cleanupchildren; # forward declaration

my %resphash = (
		EX_OK          => 0,  # no problems
		EX_USAGE       => 64, # command line usage error
		EX_DATAERR     => 65, # data format error
		EX_NOINPUT     => 66, # cannot open input
		EX_NOUSER      => 67, # addressee unknown
		EX_NOHOST      => 68, # host name unknown
		EX_UNAVAILABLE => 69, # service unavailable
		EX_SOFTWARE    => 70, # internal software error
		EX_OSERR       => 71, # system error (e.g., can't fork)
		EX_OSFILE      => 72, # critical OS file missing
		EX_CANTCREAT   => 73, # can't create (user) output file
		EX_IOERR       => 74, # input/output error
		EX_TEMPFAIL    => 75, # temp failure; user is invited to retry
		EX_PROTOCOL    => 76, # remote error in protocol
		EX_NOPERM      => 77, # permission denied
		EX_CONFIG      => 78, # configuration error
		);

# defaults
my %opt = ('user-config' => 1);

my @OLD_ARGV = @ARGV;    # Getopt::Long tends to clear @ARGV
Getopt::Long::Configure ("bundling");
GetOptions(
	'auto-whitelist|whitelist|a' => \$opt{'auto-whitelist'},
	'create-prefs!', => \$opt{'create-prefs'}, 'c' => \$opt{'create-prefs'},
	'daemonize!' => \$opt{'daemonize'}, 'd' => \$opt{'daemonize'},
	'help|h' => \$opt{'help'},
	'listen-ip|ip-address|i=s' => \$opt{'listen-ip'},
	'max-children|m=i' => \$opt{'max-children'},
	'port|p=i' => \$opt{'port'},
	'sql-config!' => \$opt{'sql-config'}, 'q' => \$opt{'sql-config'},
        'setuid-with-sql' => \$opt{'setuid-with-sql'}, 'Q' => \$opt{'setuid-with-sql'},
	'virtual-config|V=s' => \$opt{'virtual-config'},
	'pidfile|r=s' => \$opt{'pidfile'},
	'syslog|s=s' => \$opt{'syslog'},
	'syslog-socket=s' => \$opt{'syslog-socket'},
	'username|u=s' => \$opt{'username'},
	'vpopmail!' => \$opt{'vpopmail'}, 'v' => \$opt{'vpopmail'},
	'configpath|C=s' => \$opt{'configpath'},
	'user-config!' => \$opt{'user-config'}, 'x' => sub{$opt{'user-config'}=0},
	'allowed-ips|A=s' => \@{$opt{'allowed-ip'}},
	'debug!', => \$opt{'debug'}, 'D' => \$opt{'debug'},
	'local!' => \$opt{'local'}, 'L' => \$opt{'local'},
	'paranoid!' => \$opt{'paranoid'}, 'P' => \$opt{'paranoid'},
	'stop-at-threshold!' => \$opt{'stop-at-threshold'}, 'S' => \$opt{'stop-at-threshold'},
	'helper-home-dir|H:s' => \$opt{'home_dir_for_helpers'},

        # will be stripped in future release
	'add-from!' => sub { warn "The --add-from option has been removed\n" },
        'F=i' => sub { warn "The -F option has been removed\n" }

) or pod2usage(-exitval => $resphash{'EX_USAGE'}, -verbose => 0);
@ARGV = @OLD_ARGV;

$opt{'help'} and pod2usage(-exitval => $resphash{'EX_USAGE'}, -verbose => 0, -message => 'For more details, use "man spamd"');

# These can be changed on command line with -A flag
if(@{$opt{'allowed-ip'}})
{
    set_allowed_ip(split /,/,join(',',@{$opt{'allowed-ip'}}));
}
else
{
    set_allowed_ip('127.0.0.1');
}

# This can be changed on the command line with the -s flag
my $log_facility = $opt{'syslog'} || 'mail';

my $dontcopy = 1;
if ($opt{'create-prefs'}) { $dontcopy = 0; }

my $extrapid = 5000;
$extrapid = $opt{'max-children'} if defined($opt{'max-children'}) && $opt{'max-children'} > 0;

my $orighome;
if (defined $ENV{'HOME'}) {
    $orighome = $ENV{'HOME'};   # keep a copy for use by Razor, Pyzor etc.
    delete $ENV{'HOME'}; # we do not want to use this when running spamd
}

my $spamtest = Mail::SpamAssassin->new({
    dont_copy_prefs => $dontcopy,
    rules_filename => ($opt{'configpath'} || 0),
    local_tests_only => ($opt{'local'} || 0),
    stop_at_threshold => ($opt{'stop-at-threshold'} || 0),
    debug => ($opt{'debug'} || 0),
    paranoid => ($opt{'paranoid'} || 0),
    home_dir_for_helpers => (defined $opt{'home_dir_for_helpers'} ? $opt{'home_dir_for_helpers'} : $orighome),
    PREFIX => $PREFIX,
    DEF_RULES_DIR => $DEF_RULES_DIR,
    LOCAL_RULES_DIR => $LOCAL_RULES_DIR
});

# Do whitelist later in tmp dir. Side effect: this will be done as -u user.

if ($log_facility ne 'stderr') {
  eval {
    setlogsock('unix');
    syslog('debug', 'spamd starting');  # required to actually open the socket
  };

  # Solaris sometimes doesn't support UNIX-domain syslog sockets apparently;
  # same is true for perl 5.6.0 build on an early version of Red Hat 7!
  if ($@) {
    eval {
      setlogsock('inet');
      syslog('debug', 'spamd starting');
    };
  }

  # fall back to stderr if all else fails
  if ($@) {
    warn "failed to setlogsock() on this platform; reporting logs to stderr\n";
    $log_facility = 'stderr';
  }
}

my $port = $opt{'port'} || 783;
my $addr = gethostbyname($opt{'listen-ip'} || '127.0.0.1');
my $proto = getprotobyname('tcp');

($port) = $port =~ /^(\d+)$/ or die "invalid port";

# Be a well-behaved daemon
socket(Server, PF_INET, SOCK_STREAM, $proto)            || die "socket: $!";
setsockopt(Server,SOL_SOCKET,SO_REUSEADDR,pack("l",1))  || die "setsockopt: $!";
bind(Server, sockaddr_in($port, $addr))                 || die "bind: $!";
listen(Server,SOMAXCONN)                                || die "listen: $!";

$opt{'daemonize'} and daemonize();

# support non-root use (after we bind to the port)
my $setuid_to_user = 0;
if ($opt{'username'}) {
    my $uuid = getpwnam($opt{'username'});
    if (!defined $uuid || $uuid == 0) {
	die "fatal: cannot run as nonexistent user or root with -u option\n";
    }

    # make sure we can unlink it later
    if (defined $opt{'pidfile'}) {
      chown $uuid, -1, $opt{'pidfile'}
                or die "fatal: could not chown '$opt{'pidfile'}' to uid $uuid\n";
    }

    $> = $uuid;		# effective uid
    $< = $uuid;		# real uid. we now cannot setuid anymore
    if ($> != $uuid and $> != ($uuid-2**32)) {
	die "fatal: setuid to uid $uuid failed\n";
    }

} elsif ($> == 0) {
    $setuid_to_user = 1;
}


# We should set $ENV{HOME} in /tmp.
$ENV{'HOME'} = (-d '/tmp' ? '/tmp' : $ENV{TMP} || $ENV{TEMP})."/spamassassin-$$"; 
mkdir("$ENV{'HOME'}",0700)
    or die "fatal: Can't create $ENV{'HOME'}";
mkdir("$ENV{'HOME'}/.spamassassin", 0700)
    or die "fatal: Can't create $ENV{'HOME'}/.spamassassin";

# This might be slightly paranoid. Good.

$opt{'auto-whitelist'} and eval
{
    require Mail::SpamAssassin::DBBasedAddrList;

    # create a factory for the persistent address list
    my $addrlistfactory = Mail::SpamAssassin::DBBasedAddrList->new();
    $spamtest->set_persistent_address_list_factory ($addrlistfactory);
};

$spamtest->compile_now(0);	# ensure all modules etc. are loaded
$/ = "\n";			# argh, Razor resets this!  Bad Razor!

unlink <$ENV{HOME}/.spamassassin/*>,<$ENV{HOME}/*>;
rmdir "$ENV{HOME}/.spamassassin";
rmdir $ENV{HOME};
system "/bin/rm -Rf /tmp/spamassassin-$$";
delete $ENV{'HOME'};

my $current_user;
my $paddr;
my $error_before_reaper;

sub REAPER {
    $error_before_reaper = $!;  # take a copy before cleanupchildren()
    # do this silently; dbg() might do a syscall more tricky than
    # print().  syscalls in sig handlers == not good by POSIX
    while ( waitpid(-1, &WNOHANG) > 0 ) {
        $extrapid++;
    }
    $SIG{CHLD} = \&REAPER;
}

# only use REAPER if -m is used.
if ($opt{'max-children'}) {$SIG{CHLD} = \&REAPER;}
else {$SIG{CHLD} = 'IGNORE';}

$SIG{INT} = \&kill_handler;
$SIG{TERM} = \&kill_handler;

# now allow waiting processes to connect, if they're watching the log.
# The test suite does this!
if ($opt{'debug'}) {
    warn "server started on port $port (running version ".Mail::SpamAssassin::Version().")\n";
    warn "server pid: $$\n";
}

logmsg ("server started on port $port (running version ".Mail::SpamAssassin::Version().")");

for ( ; 1; close Client)
{
    $error_before_reaper = 0;
    $paddr = accept(Client,Server);

    if (!$paddr) {
      # this can happen when interrupted by SIGCHLD on Solaris,
      # perl 5.8.0, and some other platforms with -m.
      if ($! == &Errno::EINTR || $error_before_reaper == &Errno::EINTR) {
        cleanupchildren;
        next;
      } else {
        die "accept failed: $! $error_before_reaper";
      }
    }

    my $start = time;

    my($port,$iaddr) = sockaddr_in($paddr);
    my $name = gethostbyaddr($iaddr,AF_INET);

    if (ip_is_allowed(inet_ntoa($iaddr))) {
	logmsg "connection from $name [",
	inet_ntoa($iaddr),"] at port $port";
    } else {
	logmsg "unauthorized connection from $name [",
	inet_ntoa($iaddr),"] at port $port";
	next;
    }

    spawn sub {
	$|=1; # always immediately flush output

	# First request line off stream
        local $_ = <STDIN>;

	if (!defined $_) {
	    protocol_error ("(closed before headers)");
	    return 1;
	}

	chomp;

        # It may be s SKIP message, meaning that the client (spamc)
        # thinks it is too big to check.  So we don't do any real work
        # in that case.

        if (/SKIP SPAMC\/(.*)/)
	{
	    logmsg "skipped large message in ".
		sprintf("%3d", time - $start) ." seconds.";
	    return 0;

	}

	# It might be a CHECK message, meaning that we should just check
	# if it's spam or not, then return the appropriate response.

	elsif (/(CHECK|SYMBOLS|REPORT|REPORT_IFSPAM) SPAMC\/(.*)/)
	{
	    my $method = $1;
	    my $version = $2;
	    my $expected_length;

            # Protocol version 1.0 and greater may have "User:" and
            # "Content-length:" headers.  But they're not required.

	    if($version > 1.0)
	    {
		while(1)
                {
                    $_ = <STDIN>;
                    if(!defined $_)
                    {
                        protocol_error ("(EOF during headers)");
                        return 1;
                    }

                    if (/^\r\n/s) { last; }

                    # We'll run handle user unless we've been told not
                    # to process per-user config files.  Otherwise
                    # we'll check and see if we need to try SQL
                    # lookups.  If $opt{'user-config'} is true, we need to try
                    # their config file and then do the SQL lookup.
                    # If $opt{'user-config'} IS NOT true, we skip the conf file and
                    # only need to do the SQL lookup if $opt{'sql-config'} IS
                    # true.  (I got that wrong the first time.)

                    if (/^User: (.*)\r\n/)
                    {
                        $current_user = $1;
                        if (!$opt{'user-config'})
                        {
                          if ($opt{'sql-config'}) {
                            handle_user_sql($current_user);
                          } elsif ($opt{'virtual-config'}) {
                            handle_virtual_user($current_user);
                          } elsif ($opt{'setuid-with-sql'}) {
                            handle_user_setuid_with_sql($current_user);
                            $setuid_to_user = 1; #to benefit from any paranoia.
                          }
                        } else {
                          handle_user($current_user);
                        }
                    }
		    if (/^Content-length: ([0-9]*)\r\n/i) {
			$expected_length = $1;
		    }
                }
	    }

           if ( $setuid_to_user && $> == 0 )
           {
               if ($spamtest->{'paranoid'}) {
                   logmsg "PARANOID: still running as root, closing connection.";
                   die;
               }
                logmsg "Still running as root: user not specified with -u, ".
		    "not found, or set to root.  Fall back to nobody.";
		my $uid = getpwnam('nobody');
               $> = $uid;
               if ( !defined($uid) || ($> != $uid and $> != ($uid-2**32))) {
                   logmsg "fatal: setuid to nobody failed";
                   die;
               }
            }

	    my $resp = "EX_OK";

	    # Now read in message
            my @msglines;
            my $actual_length;
            my $in_header = 1;
            my $msgid;
            for (<STDIN>) {
                if ($in_header) {
                    if (/^$/) {
                        $in_header = 0;
                        $msgid = '(unknown)' unless($msgid);
                    }
                    elsif (/^Message-Id:\s+(.*)$/i) {
                        $msgid = $1;
                        while($msgid =~ s/\([^\(\)]*\)//) {};    # remove comments and
                        $msgid =~ s/^\s+|\s+$//g;                # leading and trailing spaces
                        $msgid =~ s/\s.*$/(...)/;                # keep only the first token
                    }
                }
                push(@msglines, $_);
                $actual_length += length;
            }

            logmsg "checking message $msgid for $current_user:$>" .
              ($expected_length ? ", expecting $expected_length bytes" : "") . ".";

	    my $mail = Mail::SpamAssassin::NoMailAudit->new (
                                data => \@msglines
                         );

	    # Check length if we're supposed to
	    if($expected_length)
	    {
		if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; }
	    }

	    # Now use copy-on-writed (hopefully) SA object
	    my $status = $spamtest->check($mail);
	    my $msg_score = sprintf("%.1f",$status->get_hits);
	    my $msg_threshold = sprintf("%.1f",$status->get_required_hits);
	    my $was_it_spam;
            my $response_header = "SPAMD/1.1 $resphash{$resp} $resp\r\n"; 
            my $response_spam_status = "";
	    if ($status->is_spam) {
                $response_spam_status = $method eq "REPORT_IFSPAM" ? "Yes" : "True";
		$was_it_spam = 'identified spam';
	    }
	    else
	    {
                $response_spam_status = $method eq "REPORT_IFSPAM" ? "No" : "False";
		$was_it_spam = 'clean message';
	    }
            if ($method eq "REPORT_IFSPAM") {
                $response_header .= "X-Spam-Status: $response_spam_status, hits=$msg_score required=$msg_threshold tests="
                    . $status->get_names_of_tests_hit
                    . " version=" . Mail::SpamAssassin::Version();
            }
            else
            {
                $response_header .= "Spam: $response_spam_status ; $msg_score / $msg_threshold";
	    }
            print $response_header, "\r\n\r\n";
	    print $status->get_names_of_tests_hit,"\r\n" if ($method eq "SYMBOLS");
	    print $status->get_report,"\r\n" if ($method eq "REPORT" or $method eq "REPORT_IFSPAM" and $status->is_spam);
	    $current_user ||= '(unknown)';
	    logmsg "$was_it_spam ($msg_score/$msg_threshold) for $current_user:$> in ".
		sprintf("%.1f", time - $start) ." seconds, $actual_length bytes.";

	    $status->finish();	# added by jm to allow GC'ing
	}

        # If we get the PROCESS command, the client is going to send a
        # message that we need to filter.  This is were all the real
        # work is one.

        elsif (/PROCESS SPAMC\/(.*)/)
	{
	    my $version = $1;
	    my $expected_length;

            # Protocol version 1.0 and greater may have "User:" and
            # "Content-length:" headers.  But they're not required.

	    if($version > 1.0)
	    {
		while(1)
                {
                    $_ = <STDIN>;
                    if(!defined $_)
                    {
                        protocol_error ("(EOF during headers)");
                        return 1;
                    }

                    if (/^\r\n/s) { last; }

                    # We'll run handle user unless we've been told not
                    # to process per-user config files.  Otherwise
                    # we'll check and see if we need to try SQL
                    # lookups.  If $opt{'user-config'} is true, we need to try
                    # their config file and then do the SQL lookup.
                    # If $opt{'user-config'} IS NOT true, we skip the conf file and
                    # only need to do the SQL lookup if $opt{'sql-config'} IS
                    # true.  (I got that wrong the first time.)

                    if (/^User: (.*)\r\n/)
                    {
                        $current_user = $1;
                        if (!$opt{'user-config'})
                        {
                          if ($opt{'sql-config'}) {
                            handle_user_sql($current_user);
                          } elsif ($opt{'virtual-config'}) {
                            handle_virtual_user($current_user);
                          } elsif ($opt{'setuid-with-sql'}) {
                            handle_user_setuid_with_sql($current_user);
                            $setuid_to_user = 1; #to benefit from any paranoia.
                          }
                        } else {
                          handle_user($current_user);
                        }
                    }
		    if (/^Content-length: ([0-9]*)\r\n/i) {
			$expected_length = $1;
		    }
                }
	    }

            if ( $setuid_to_user && $> == 0 )
            {
               if ($spamtest->{paranoid}) {
                   logmsg "PARANOID: still running as root, closing connection.";
                   die;
               }
                logmsg "Still running as root: user not specified, ".
		    "not found, or set to root.  Fall back to nobody.";
		my $uid = getpwnam('nobody');
               $> = $uid;
               if ( !defined($uid) || ($> != $uid and $> != ($uid-2**32))) {
                   logmsg "fatal: setuid to nobody failed";
                   die;
               }
            }

	    my $resp = "EX_OK";

	    # Now read in message
            my @msglines;
            my $actual_length;
            my $in_header = 1;
            my $msgid;
            for (<STDIN>) {
                if ($in_header) {
                    if (/^$/) {
                        $in_header = 0;
                        $msgid = '(unknown)' unless($msgid);
                    }
                    elsif (/^Message-Id:\s+(.*)$/i) {
                        $msgid = $1;
                        while($msgid =~ s/\([^\(\)]*\)//) {};    # remove comments and
                        $msgid =~ s/^\s+|\s+$//g;                # leading and trailing spaces
                        $msgid =~ s/\s.*$/(...)/;                # keep only the first token
                    }
                }
                push(@msglines, $_);
                $actual_length += length;
            }

            logmsg "processing message $msgid for $current_user:$>" .
              ($expected_length ? ", expecting $expected_length bytes" : "") . ".";

	    my $mail = Mail::SpamAssassin::NoMailAudit->new (
                                data => \@msglines
                         );

	    # Check length if we're supposed to
	    if($expected_length)
	    {
		if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; }
	    }

	    # Now use copy-on-writed (hopefully) SA object
	    my $status = $spamtest->check($mail);
	    $status->rewrite_mail; #if $status->is_spam;

	    # Build the message to send back and measure it
	    my $msg_resp = join '',$mail->header,"\n",@{$mail->body};
	    my $msg_resp_length = length($msg_resp);
	    if($version >= 1.2) # Spamc protocol 1.2 means it accepts content-length
	    {
		print "SPAMD/1.1 $resphash{$resp} $resp\r\n",
		"Content-length: $msg_resp_length\r\n\r\n",
		$msg_resp;
	    }
	    else # Earlier than 1.2 didn't accept content-length
	    {
		print "SPAMD/1.0 $resphash{$resp} $resp\r\n",
		$msg_resp;
	    }
	    my $was_it_spam;
	    if($status->is_spam) { $was_it_spam = 'identified spam'; } else { $was_it_spam = 'clean message'; }
            my $msg_score = sprintf("%.1f",$status->get_hits);
            my $msg_threshold = sprintf("%.1f",$status->get_required_hits);
	    $current_user ||= '(unknown)';
	    logmsg "$was_it_spam ($msg_score/$msg_threshold) for $current_user:$> in ".
	        sprintf("%.1f", time - $start) ." seconds, $actual_length bytes.";

	    $status->finish();	# added by jm to allow GC'ing
	}

        # If it was none of the above, then we don't know what it was.

	else
	{
	    protocol_error ($_);
	}
    };

    # Clean up any defunct processes.  (Not sure if we still need this
    # with our SIGCHLD handler, but best to keep it around anyway.)
    cleanupchildren;
}

sub protocol_error {
    local $_ = shift;

    my $resp = "EX_PROTOCOL";
    print "SPAMD/1.0 $resphash{$resp} Bad header line: $_\r\n";
    logmsg "bad protocol: header error: $_";
}

sub spawn {
    my $coderef = shift;

    unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
	warn "usage: spawn CODEREF";
    }

    my $pid;

    cleanupchildren;
    my $tries = 0;
    while ($opt{'max-children'} && $extrapid <= 0) {
      if ($tries++ > 0) { sleep (1); }
      logmsg "hit max-children limit (".$opt{'max-children'}.
                        "): waiting for some to exit";
      cleanupchildren;
    }

    $extrapid--;

    if (!defined($pid = fork)) {
       logmsg "cannot fork: $!";
       $extrapid++;
       return;
    } elsif ($pid) {
       return; # I'm the parent
    }
    # else I'm the child -- go spawn

    close Server;
    open(STDIN,  "<&Client")   || die "can't dup client to stdin";
    open(STDOUT, ">&Client")   || die "can't dup client to stdout";
    exit &$coderef();
}

sub handle_user
{
    my $username = shift;

    #
    # If vpopmail config enabled then look up userinfo for vpopmail uid
    # as defined by $opt{'username'}
    #
    my $userid = '';
    if ($opt{'vpopmail'} && $opt{'username'}) {
	$userid = $opt{'username'};
    } else {
	$userid = $username;
    }
    my ($name,$pwd,$uid,$gid,$quota,$comment,$gcos,$dir,$etc) =
	getpwnam($userid);

    if ( !$spamtest->{'paranoid'} && !defined($uid) ) {
	#if we are given a username, but can't look it up,
	#Maybe NIS is down? lets break out here to allow
	#them to get 'defaults' when we are not running paranoid.
	logmsg "handle_user() -> unable to find user [$userid]!";
	return 0;
    }

    if ($setuid_to_user) {
	$> = $uid;
       if ( !defined($uid) || ($> != $uid and $> != ($uid-2**32))) {
           logmsg "fatal: setuid to $username failed";
	    die;		# make it fatal to avoid security breaches
	}
	else
	{
	   logmsg "info: setuid to $username succeeded";
    }
    }

    #
    # If vpopmail config enabled then set $dir to virtual homedir
    #
    if ($opt{'vpopmail'} && $opt{'username'}) {
	$dir = `$dir/bin/vuserinfo -d $username`;
	$dir =~ s/\n//g;
    }
    my $cf_file = $dir."/.spamassassin/user_prefs";

    #
    # If vpopmail config enabled then pass virtual homedir onto create_default_cf_needed
    #
    if ($opt{'vpopmail'} && $opt{'username'}) {
	create_default_cf_if_needed ($cf_file, $username, $dir);
    } else {
	create_default_cf_if_needed ($cf_file, $username);
    }
    $spamtest->read_scoreonly_config ($cf_file);
    return 1;
}

# Handle user configs without the necessity of having individual users or a
# SQL database.
sub handle_virtual_user
{
	my $username = shift;

	# the virtual-config contains the path to a directory which will
	# contain per-user preferences.
	my $dir=$opt{'virtual-config'};
	my $file="$dir/$username.prefs";

	# If the user file is not there, look for a default.prefs
	if(! -f $file) {
		$file="$dir/default.prefs";
		# And if that isn't there, log that it's misconfigured.
		if(! -f $file) {
			logmsg("Couldn't find a virtual directory or defaults "
				. "for $username in $dir");
			return(0);
		} else {
			# Log that the default configuration is being used for a user.
			logmsg("Using default config for $username");
		}
	}

	# Found a config, load it.
	$spamtest->read_scoreonly_config($file);
	return(1);
}

sub handle_user_sql
{
    my $username = shift;
    $spamtest->load_scoreonly_sql ($username);
    return 1;
}

sub handle_user_setuid_with_sql
{
    my $username = shift;
    my ($name,$pwd,$uid,$gid,$quota,$comment,$gcos,$dir,$etc) = getpwnam($username);

    if ( !$spamtest->{'paranoid'} && !defined($uid) ) {
       #if we are given a username, but can't look it up,
       #Maybe NIS is down? lets break out here to allow
       #them to get 'defaults' when we are not running paranoid.
       logmsg "handle_user() -> unable to find user [$username]!\n";
       return 0;
    }

    if ($setuid_to_user) {
       $> = $uid;
       if ( !defined($uid) || ($> != $uid and $> != ($uid-2**32))) {
           logmsg "fatal: setuid to $username failed";
          die;         # make it fatal to avoid security breaches
        }
       else
       {
          logmsg "info: setuid to $username succeeded, reading scores from SQL.";
        }
    }
    $spamtest->load_scoreonly_sql ($username);
    return 1;
}

sub create_default_cf_if_needed {
    my ($cf_file, $username, $userdir) = @_;

    # Parse user scores, creating default .cf if needed:
    if( ! -r $cf_file && ! $spamtest->{'dont_copy_prefs'})
    {
	logmsg "Creating default_prefs [$cf_file]";

	#
	# If vpopmail config enabled then pass virtual homedir onto create_default_prefs
	# via $userdir
	#
	$spamtest->create_default_prefs ($cf_file,$username,$userdir);

	if ( ! -r $cf_file )
	{
	    logmsg "Couldn't create readable default_prefs for [$cf_file]";
	}
    }
}

sub logmsg
{
    my $old = $SIG{'PIPE'};
    $SIG{'PIPE'} = sub { $main::SIGPIPE_RECEIVED++; };

    # bug 605: http://bugzilla.spamassassin.org/show_bug.cgi?id=605
    # more efficient for daemontools if --syslog=stderr is used
    if($log_facility eq 'stderr') {
      print STDERR join("",@_) . "\n";
      return;
    }

    openlog('spamd','cons,pid',$log_facility);
    my $str = join ('', @_);
    if ($opt{'debug'}) { warn "logmsg: $str\n"; }

    eval {
      syslog('info', '%s', $str);
    };
    if ($@) {
      warn "syslog() failed, try using --syslog-socket switch ($@)\n";
    }

    if ($main::SIGPIPE_RECEIVED) {
       # SIGPIPE recieved when writing to syslog - this has been
       # found to occur with syslog-ng after syslog-ng restarts.
       # Close and reopen the log handle, then try again.

       closelog();
       openlog('spamd','cons,pid',$log_facility);
       syslog('info', '%s', $str);

       # now report what happend
       my $msg = "SIGPIPE received - reopening log socket";
       if ($opt{'debug'}) { warn "logmsg: $msg\n"; }
       syslog('warning', '%s', $msg);

       # if we've received multiple sigpipes, logging is probably
       # still broken.
       if ($main::SIGPIPE_RECEIVED > 1) {
           warn "logging failure: multiple SIGPIPEs received\n";
       }

       $main::SIGPIPE_RECEIVED = 0;
    }

    $SIG{'PIPE'} = $old if defined($old);
}

sub kill_handler
{
    my ($sig) = @_;
    logmsg "server killed by SIG$sig, shutting down";
    close Server;
    defined($opt{'pidfile'}) and unlink($opt{'pidfile'});
    exit 0;
}

# Remember to change this in REAPER too if you change anything!
sub cleanupchildren {
    while ( my $kid = waitpid(-1, &WNOHANG) > 0 ) {
        $extrapid++;
        Mail::SpamAssassin::dbg("cleaned up kid $kid, pool=$extrapid");
    }
}

use POSIX 'setsid';
sub daemonize
{
    $0 = join(' ', $0, @ARGV) unless($opt{'debug'});; # pretty command line in ps
    chdir '/' or die "Can't chdir to '/': $!";
    $SIG{__WARN__} = sub { logmsg($_[0]); };
    open STDIN,'/dev/null' or die "Can't read '/dev/null': $!";
    open STDOUT,'>/dev/null' or die "Can't write '/dev/null': $!";
    defined(my $pid=fork) or die "Can't fork: $!";
    exit if $pid;
    setsid or die "Can't start new session: $!";
    open STDERR,'>&STDOUT' or die "Can't duplicate stdout: $!";
    Mail::SpamAssassin::dbg('daemonized.');

    if(defined($opt{'pidfile'})) {
       open PIDF,">$opt{'pidfile'}" or warn "Can't open PID file: $!";
       print PIDF $$."\n";
       close PIDF;
    }
}

my @allowed_nets;
sub set_allowed_ip {
    foreach (@_) {
        my ($ip, $bits) = m#^\s*(\d+\.\d+\.\d+\.\d+)(?:/(\d+))?\s*$#
                or die "illegal network address given: '$_'.  Aborting.\n";
        defined $bits or $bits = 32;
        my $mask = 0xFFffFFff  ^ ((2 ** (32-$bits)) - 1);
        push @allowed_nets, {
            mask => $mask,
            ip   => my_inet_aton($ip) & $mask,
        };
    }
}
sub ip_is_allowed {
    my $ip = my_inet_aton($_[0]);
    foreach my $net (@allowed_nets){
        return 1 if ($ip & $net->{mask}) == $net->{ip};
    }
    0;
}
sub my_inet_aton { unpack("N", pack("C4", split(/\./, $_[0]))) }


#-S, --stop-at-threshold            Stop tests after the threshold is reached
#=item B<-S>, B<--stop-at-threshold>
#Stop spam checking as soon as the spam threshold is reached, to increase
#performance. This option also turns off Razor reporting.

=head1 NAME

spamd - daemonized version of spamassassin

=head1 SYNOPSIS

spamd [options]

Options:

 -a, --auto-whitelist, --whitelist  Use auto-whitelists
 -c, --create-prefs                 Create user preferences files
 -C path, --configpath=path         Path for default config files
 -d, --daemonize                    Daemonize
 -h, --help                         Print usage message.
 -i ipaddr, --listen-ip=ipaddr,...  Listen on the IP ipaddr (default: 127.0.0.1)
 -m num, --max-children num         Allow maximum num children
 -p port, --port                    Listen on specified port (default: 783)
 -q, --sql-config                   Enable SQL config (only useful with -x)
 -Q, --setuid-with-sql              Enable SQL config (only useful with -x,
                                    enables use of -a and -H)
 -V, --virtual-config=dir           Enable Virtual configs (needs -x)
 -r pidfile, --pidfile              Write the process id to pidfile
 -s facility, --syslog=facility     Specify the syslog facility (default: mail)
 --syslog-socket=type               How to connect to syslogd (default: unix)
 -u username, --username=username   Run as username
 -v, --vpopmail                     Enable vpopmail config
 -x, --nouser-config                Disable user config files
 -A host,..., --allowed-ips=..,..   Limit ip addresses which can connect
 -D, --debug                        Print debugging messages
 -L, --local                        Use local tests only (no DNS)
 -P, --paranoid                     Die upon user errors
 -H dir				    Specify a different HOME directory, path optional


=head1 DESCRIPTION

The purpose of this program is to provide a daemonized version of the
spamassassin executable.  The goal is improving throughput performance for
automated mail checking.

This is intended to be used alongside C<spamc>, a fast, low-overhead C client
program.

See the README file in the C<spamd> directory of the SpamAssassin distribution
for more details.

Note: Although spamd will check per-user config files for every message, any
changes to the system-wide config files will require restarting spamd for
the changes to take effect.

=head1 OPTIONS

Options of the long form can be shortened as long as they remain
unambiguous.  (i.e. B<--dae> can be used instead of B<--daemonize>)
Also, boolean options (like B<--auto-whitelist>) can be negated by
adding I<--no> (B<--noauto-whitelist>), however, this is usually unnecessary.

=over

=item B<-a>, B<--auto-whitelist>, B<--whitelist>

Use auto-whitelists.  Auto-whitelists track the long-term average score for
each sender and then shift the score of new messages toward that long-term
average.  This can increase or decrease the score for messages, depending on
the long-term behavior of the particular correspondent.  See the README file
for more details.

=item B<-c>, B<--create-prefs>

Create user preferences files if they don't exist (default: don't).

=item B<-C> I<path>, B<--configpath>=I<path>

Use the specified path for locating configuration files.  Ignore the default
directories.

=item B<-d>, B<--daemonize>

Detach from starting process and run in background (daemonize).

=item B<-h>, B<--help>

Print a brief help message, then exit without further action.

=item B<-i> I<ipaddress>, B<--listen-ip>=I<ipaddress>, B<--ip-address>=I<ipaddress>

Tells spamd to listen on the specified IP address [defaults to 127.0.0.1].  Use
0.0.0.0 to listen on all interfaces.

=item B<-p> I<port>, B<--port>=I<port>

Optionally specifies the port number for the server to listen on.

=item B<-q>, B<--sql-config>

Turn on SQL lookups even when per-user config files have been disabled
with B<-x>. this is useful for spamd hosts which don't have user's
home directories but do want to load user preferences from an SQL
database.

=item B<-Q>, B<--setuid-with-sql>

Turn on SQL lookups even when per-user config files have been disabled
with B<-x> and also setuid to the user.  This is useful for spamd hosts
which want to load user preferences from an SQL database but also wish to
support the use of B<-a> (AWL) and B<-H> (Helper home directories.)

=item B<-V>, B<--virtual-config>=I<directory>

This option specifies a directory which will contain per-user preference
files.  The files are in the format of B<I<username>.prefs>.  A
B<default.prefs> file will be used if an individual user config is not
found.

Note that this B<requires> that B<-x> is used, and cannot be combined with
SQL-based configuration.

=item B<-r> I<pidfile>, B<--pidfile>=I<pidfile>

Write the process ID of the spamd parent to the file specified by I<pidfile>.
The file will be unlinked when the parent exits.  Note that when running
with the B<-u> option, the file must be writable by that user.

=item B<-v>, B<--vpopmail>

Enable vpopmail config  (only useful with B<-u> set to vpopmail
user). This option is useful for vpopmail virtual users who
do not have an entry in the system /etc/passwd file.  This
allows spamd to lookup/create user_prefs in the vpopmail users
own maildir.

=item B<-s> I<facility>, B<--syslog>=I<facility>

Specify the syslog facility to use (default: mail).  If C<stderr> is specified,
output will be written to stderr.  This is useful if you're running C<spamd>
under the C<daemontools> package.

=item B<--syslog-socket>=I<type>

Specify how spamd should send messages to syslogd.  The options are C<unix>,
C<inet> or C<none>.   The default is to try C<unix> first, falling back to
C<inet> if perl detects errors in its C<unix> support.

Some platforms, or versions of perl, are shipped with dysfunctional versions of
the B<Sys::Syslog> package which do not support some socket types, so you may
need to set this.  If you get error messages regarding B<__PATH_LOG> or similar
from spamd, try changing this setting.

=item B<-u> I<username>, B<--username>=I<username>

Run as the named user.  The alternative, default behaviour is to setuid() to
the user running C<spamc>, if C<spamd> is running as root.

=item B<-x>, B<--nouser-config>, B<--user-config>

Turn off(on) per-user config files.  All users will just get the default
configuration.

=item B<-A> I<host,...>, B<--allowed-ips>=I<host,...>

Specify a list of authorized hosts or networks which can connect to this spamd
instance. Single IP addresses can be given, or ranges of ip addresses in
address/masklength format.  This option can be specified multiple times or can
take a list of addresses separated by commas.  Examples:

B<-A 10.11.12.13,10.11.12.14> -- only allow connections from 10.11.12.13 and
10.11.12.14

-B<A 10.200.300.0/24> -- allow connections from any machine in the range
10.200.300.*

By default, connections are only accepted from localhost [127.0.0.1].

=item B<-D>, B<--debug>

Print debugging messages

=item B<-L>, B<--local>

Perform only local tests on all mail.  In other words, skip DNS and other
network tests.  Works the same as the C<-L> flag to C<spamassassin(1)>.

=item B<-P>, B<--paranoid>

Die on user errors (for the user passed from spamc) instead of falling back to
user I<nobody> and using the default configuration.

=item B<-m> I<number>, B<--max-children>=I<number>

This option is not recommended -- see below.

Specify a maximum number of children to spawn. Spamd will wait until another
child finishes before forking again. Meanwhile, incoming connections will be
queued.

Use of this option is not recommended in most circumstances, as we have
received reports of the perl interpreter dumping core.  It seems that some
versions of Perl on some OSes have issues with tracking child processes and
signal handling, which the perl interpreter to crash.

Please note that there is a OS specific maximum of connections that can be
queued (Try C<perl -MSocket -e'print SOMAXCONN'> to find this maximum).

=item B<-H> I<directory>, B<--helper-home-dir>=I<directory>

Specify that external programs such as Razor, DCC, and Pyzor should have
a HOME environment variable set to a specific directory.  The default
is to use the HOME environment variable setting from the shell running
spamd.  By specifying no argument, spamd will use the spamc caller's
home directory instead.

=back

=head1 BUGS

Perl 5.005_03 seems to have a bug, which spamd triggers, causing messages to
pass through unscanned.  Upgrading to Perl 5.6 seems to fix the problem, so
that's the current workaround.  More information can be found at
http://www.hughes-family.org/bugzilla/show_bug.cgi?id=497

The C<-m> switch seems to trigger signal-handling bugs in many versions
of Perl.

=head1 SEE ALSO

spamc(1)
spamassassin(1)
Mail::SpamAssassin(3)
Mail::SpamAssassin::Conf(3)

=head1 AUTHOR

Craig R Hughes E<lt>craig@hughes-family.orgE<gt>

=head1 PREREQUISITES

C<Mail::SpamAssassin>

=cut