#!/usr/bin/perl -w
############################################################
# The code in this file is copyright 2001 by Craig Hughes  #
# It is licensed for use with the SpamAssassin distribution#
# under the terms of the Perl Artistic License, the text of#
# which is included as the file named "License"            #
############################################################

use lib '../lib';	# added by jm for use inside the distro
use strict;
use Socket;
use Carp;
use Mail::SpamAssassin;
use Mail::SpamAssassin::NoMailAudit;
use Sys::Syslog qw(:DEFAULT setlogsock);
use POSIX qw(setsid);
use Getopt::Std;
use POSIX ":sys_wait_h";

my %resphash = (
		EX_OK          => 0,  # no problems
		EX_USAGE       => 64, # command line usage error
		EX_DATAERR     => 65, # data format error
		EX_NOINPUT     => 66, # cannot open input
		EX_NOUSER      => 67, # addressee unknown
		EX_NOHOST      => 68, # host name unknown
		EX_UNAVAILABLE => 69, # service unavailable
		EX_SOFTWARE    => 70, # internal software error
		EX_OSERR       => 71, # system error (e.g., can't fork)
		EX_OSFILE      => 72, # critical OS file missing
		EX_CANTCREAT   => 73, # can't create (user) output file
		EX_IOERR       => 74, # input/output error
		EX_TEMPFAIL    => 75, # temp failure; user is invited to retry
		EX_PROTOCOL    => 76, # remote error in protocol
		EX_NOPERM      => 77, # permission denied
		EX_CONFIG      => 78, # configuration error
		);

sub usage
{
    warn <<EOUSAGE;

Usage: spamd [options]

Options:

  -a              Use per-user auto-whitelists.
  -c              Create user preferences files if they do not already exist
  -d		  Daemonize, detach from parent process
  -h              Print this usage message and exit
  -i ipaddr       Listen on the specified IP address (default: 127.0.0.1,
                  use 0.0.0.0 to listen on all available addresses)
  -p port         Listen on the specific port (default: 783)
  -q              Enable SQL config (only useful with -x)
  -s facility     Specify the syslog facility (default: mail)
  -u username     Run as named user, instead of running setuid to spamc user
  -x              Disable user config files
  -A host,...     Set list of authorized ip addresses which can connect to this
                  server (default: 127.0.0.1)
  -D              Print debugging messages
  -L              Use local tests only (no DNS or other network lookups)
  -P              Die upon user errors (does not exist or user = root) instead of
                  running as 'nobody' with defaults.
  -F 0|1          remove/add 'From ' line at start of output (default: 1)


EOUSAGE
    exit $resphash{EX_USAGE};
}

use vars qw{
    $opt_d $opt_h $opt_L $opt_p $opt_A $opt_x $opt_s $opt_D $opt_u
    $opt_P $opt_c $opt_a $opt_i $opt_q $opt_F
};

getopts('acdhi:p:qs:u:xA:DLPF:') or usage();

$opt_h and usage();

# These can be changed on command line with -A flag
my %allowed = ();
if($opt_A)
{
    %allowed = map { $_ => 1 } split /,/, $opt_A;
}
else
{
    %allowed = ( '127.0.0.1' => 1 );
}

# This can be changed on the command line with the -s flag
my $log_facility;
if($opt_s)
{
    $log_facility = $opt_s;
}
else
{
    $log_facility = 'mail';
}

my $dontcopy = 1;
if ($opt_c) { $dontcopy = 0; }

if (defined $ENV{'HOME'}) {
    delete $ENV{'HOME'}; # we do not want to use this when running spamd
}

my $spamtest = Mail::SpamAssassin->new({
    dont_copy_prefs => $dontcopy,
    local_tests_only => $opt_L,
    debug => $opt_D,
    paranoid => ($opt_P || 0),
});

$opt_a and eval
{
    require Mail::SpamAssassin::DBBasedAddrList;

    # create a factory for the persistent address list
    my $addrlistfactory = Mail::SpamAssassin::DBBasedAddrList->new();
    $spamtest->set_persistent_address_list_factory ($addrlistfactory);
};


sub spawn;  # forward declaration
sub logmsg; # forward declaration

setlogsock('unix');

my $port = $opt_p || 783;
my $addr = gethostbyname($opt_i || '127.0.0.1');
my $proto = getprotobyname('tcp');

($port) = $port =~ /^(\d+)$/ or die "invalid port";

# Be a well-behaved daemon
socket(Server, PF_INET, SOCK_STREAM, $proto)            || die "socket: $!";
setsockopt(Server,SOL_SOCKET,SO_REUSEADDR,pack("l",1))  || die "setsockopt: $!";
bind(Server, sockaddr_in($port, $addr))                 || die "bind: $!";
listen(Server,SOMAXCONN)                                || die "listen: $!";

# support non-root use (after we bind to the port)
my $setuid_to_user = 0;
if ($opt_u) {
    my $uuid = getpwnam($opt_u);
    if (!defined $uuid || $uuid == 0) {
	die "fatal: cannot run as nonexistent user or root with -u option\n";
    }
    $> = $uuid;		# effective uid
    $< = $uuid;		# real uid. we now cannot setuid anymore
    if ($> != $uuid) {
	die "fatal: setuid to uid $uuid failed\n";
    }

} elsif ($> == 0) {
    $setuid_to_user = 1;
}

$spamtest->compile_now();	# ensure all modules etc. are loaded
$/ = "\n";			# argh, Razor resets this!  Bad Razor!

$opt_d and daemonize();

my $waitedpid = 0;
my $current_user;
my $paddr;

$SIG{CHLD} = 'IGNORE';		# important: avoids perl sighandling bug on BSD
$SIG{INT} = \&kill_handler;
$SIG{TERM} = \&kill_handler;

# now allow waiting processes to connect, if they're watching the log.
# The test suite does this!
if ($opt_D) {
    warn "server started on port $port\n";
    warn "server pid: $$\n";
}
logmsg "server started on port $port";

for ( $waitedpid = 0;
      ($paddr = accept(Client,Server)) || $waitedpid;
      $waitedpid = 0, close Client)
{
    next if $waitedpid and not $paddr;

    my $start = time;

    my($port,$iaddr) = sockaddr_in($paddr);
    my $name = gethostbyaddr($iaddr,AF_INET);

    if ($allowed{inet_ntoa($iaddr)} ) {
	logmsg "connection from $name [",
	inet_ntoa($iaddr),"] at port $port";
    } else {
	logmsg "unauthorized connection from $name [",
	inet_ntoa($iaddr),"] at port $port";
	next;
    }

    spawn sub {
	$|=1; # always immediately flush output

	# First request line off stream
        local $_ = <STDIN>;

	if (!defined $_) {
	    protocol_error ("(closed before headers)");
	    return 1;
	}

	chomp;

        # It may be s SKIP message, meaning that the client (spamc)
        # thinks it is too big to check.  So we don't do any real work
        # in that case.

        if (/SKIP SPAMC\/(.*)/)
	{
	    logmsg "skipped large message in ".
		sprintf("%3d", time - $start) ." seconds.\n";
	    return 0;

	}

	# It might be a CHECK message, meaning that we should just check
	# if it's spam or not, then return the appropriate response.

	elsif (/CHECK SPAMC\/(.*)/)
	{
	    my $version = $1;
	    my $expected_length;

            # Protocol version 1.0 and greater may have "User:" and
            # "Content-length:" headers.  But they're not required.

	    if($version > 1.0)
	    {
		while(1)
                {
                    $_ = <STDIN>;
                    if(!defined $_)
                    {
                        protocol_error ("(EOF during headers)");
                        return 1;
                    }

                    if (/^\r\n/s) { last; }

                    # We'll run handle user unless we've been told not
                    # to process per-user config files.  Otherwise
                    # we'll check and see if we need to try SQL
                    # lookups.  If $opt_x is NOT true, we need to try
                    # their config file and then do the SQL lookup.
                    # If $opt_x IS true, we skip the conf file and
                    # only need to do the SQL lookup if $opt_q IS
                    # true.  (I got that wrong the first time.)

                    if (/^User: (.*)\r\n/)
                    {
                        if ($opt_x)
                        {
			    if ($opt_q) {
				handle_user_sql($1);
			    }
                        }
			else
                        {
                            handle_user($1);
                        }
                    }
		    if (/^Content-length: ([0-9]*)\r\n/i) {
			$expected_length = $1;
		    }
                }
	    }

            if ($spamtest->{paranoid} && $setuid_to_user && $> == 0)
            {
                logmsg "PARANOID: Still running as root, close connection.";
	    }
            elsif ( $setuid_to_user && $> == 0 )
            {
                logmsg "Still running as root: user not specified, ".
		    "not found, or set to root.  Fall back to nobody.";
		my $uid = getpwnam('nobody');
		if (!defined $uid) { die "no UID for nobody"; }
                $> = $uid;
            }

	    my $resp = "EX_OK";

	    # Now read in message
	    my @msglines = (<STDIN>);
	    my $actual_length = length (join '', @msglines);
	    my $mail = Mail::SpamAssassin::NoMailAudit->new (
                                data => \@msglines,
                                add_From_line => $opt_F
                         );

	    # Check length if we're supposed to
	    if($expected_length)
	    {
		if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; }
	    }

	    # Now use copy-on-writed (hopefully) SA object
	    my $status = $spamtest->check($mail);
	    my $msg_score = int($status->get_hits);
	    my $msg_threshold = int($status->get_required_hits);
	    my $was_it_spam;
	    if ($status->is_spam)
	    {
		print "SPAMD/1.1 $resphash{$resp} $resp\r\nSpam: True ; $msg_score / $msg_threshold\r\n\r\n";
		$was_it_spam = 'identified spam';
	    }
	    else
	    {
		print "SPAMD/1.1 $resphash{$resp} $resp\r\nSpam: False ; $msg_score / $msg_threshold\r\n\r\n";
		$was_it_spam = 'clean message';
	    }
	    $current_user ||= '(unknown)';
	    logmsg "$was_it_spam for $current_user:$> in ".
		sprintf("%3d", time - $start) ." seconds.\n";

	    $status->finish();	# added by jm to allow GC'ing
	}

        # If we get the PROCESS command, the client is going to send a
        # message that we need to filter.  This is were all the real
        # work is one.

        elsif (/PROCESS SPAMC\/(.*)/)
	{
	    my $version = $1;
	    my $expected_length;

            # Protocol version 1.0 and greater may have "User:" and
            # "Content-length:" headers.  But they're not required.

	    if($version > 1.0)
	    {
		while(1)
                {
                    $_ = <STDIN>;
                    if(!defined $_)
                    {
                        protocol_error ("(EOF during headers)");
                        return 1;
                    }

                    if (/^\r\n/s) { last; }

                    # We'll run handle user unless we've been told not
                    # to process per-user config files.  Otherwise
                    # we'll check and see if we need to try SQL
                    # lookups.  If $opt_x is NOT true, we need to try
                    # their config file and then do the SQL lookup.
                    # If $opt_x IS true, we skip the conf file and
                    # only need to do the SQL lookup if $opt_q IS
                    # true.  (I got that wrong the first time.)

                    if (/^User: (.*)\r\n/)
                    {
                        if ($opt_x)
                        {
			    if ($opt_q) {
				handle_user_sql($1);
			    }
                        }
			else
                        {
                            handle_user($1);
                        }
                    }
		    if (/^Content-length: ([0-9]*)\r\n/i) {
			$expected_length = $1;
		    }
                }
	    }

            if ($spamtest->{paranoid} && $setuid_to_user && $> == 0)
            {
                logmsg "PARANOID: Still running as root, close connection.";
	    }
            elsif ( $setuid_to_user && $> == 0 )
            {
                logmsg "Still running as root: user not specified, ".
		    "not found, or set to root.  Fall back to nobody.";
		my $uid = getpwnam('nobody');
		if (!defined $uid) { die "no UID for nobody"; }
                $> = $uid;
            }

	    my $resp = "EX_OK";

	    # Now read in message
	    my @msglines = (<STDIN>);
	    my $actual_length = length (join '', @msglines);
	    my $mail = Mail::SpamAssassin::NoMailAudit->new (
                                data => \@msglines,
                                add_From_line => $opt_F
                         );

	    # Check length if we're supposed to
	    if($expected_length)
	    {
		if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; }
	    }

	    # Now use copy-on-writed (hopefully) SA object
	    my $status = $spamtest->check($mail);
	    $status->rewrite_mail; #if $status->is_spam;

	    # Build the message to send back and measure it
	    my $msg_resp = join '',$mail->header,"\n",@{$mail->body};
	    my $msg_resp_length = length($msg_resp);
	    if($version >= 1.2) # Spamc protocol 1.2 means it accepts content-length
	    {
		print "SPAMD/1.1 $resphash{$resp} $resp\r\n",
		"Content-length: $msg_resp_length\r\n\r\n",
		$msg_resp;
	    }
	    else # Earlier than 1.2 didn't accept content-length
	    {
		print "SPAMD/1.0 $resphash{$resp} $resp\r\n",
		$msg_resp;
	    }
	    my $was_it_spam;
	    if($status->is_spam) { $was_it_spam = 'identified spam'; } else { $was_it_spam = 'clean message'; }
	    $current_user ||= '(unknown)';
	    logmsg "$was_it_spam for $current_user:$> in ".
		sprintf("%3d", time - $start) ." seconds.\n";

	    $status->finish();	# added by jm to allow GC'ing
	}

        # If it was none of the above, then we don't know what it was.

	else
	{
	    protocol_error ($_);
	}

	# Clean up any defunct processes.  (Not perfect, as usually leaves at
	# least one around until next connection, but avoids handling SIGCHLD.)
	# Needed on SunOS, where SIGCHLD=IGNORE doesn't make it automatic.
	my $kid;
	do {
	    $kid = waitpid(-1,&WNOHANG);
	} until ($kid <= 0);
    };
}

sub protocol_error {
    local $_ = shift;

    my $resp = "EX_PROTOCOL";
    print "SPAMD/1.0 $resphash{$resp} Bad header line: $_\r\n";
    logmsg "bad protocol: header error: $_";
}

sub spawn {
    my $coderef = shift;

    unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
	confess "usage: spawn CODEREF";
    }

    my $pid;
    if (!defined($pid = fork)) {
	logmsg "cannot fork: $!";
	return;
    } elsif ($pid) {
	return; # I'm the parent
    }
    # else I'm the child -- go spawn

    open(STDIN,  "<&Client")   || die "can't dup client to stdin";
    open(STDOUT, ">&Client")   || die "can't dup client to stdout";
    exit &$coderef();
}

sub handle_user
{
    my $username = shift;

    $current_user = $username;
    my ($name,$pwd,$uid,$gid,$quota,$comment,$gcos,$dir,$etc) =
	getpwnam($username);

    if ( !$spamtest->{paranoid} && !defined($uid) ) {
	#if we are given a username, but can't look it up,
	#Maybe NIS is down? lets break out here to allow
	#them to get 'defaults' when we are not running paranoid.
	logmsg "handle_user() -> unable to find user [$username]!\n";
	return 0;
    }

    if ($setuid_to_user) {
	$> = $uid;
	if ($> != $uid) {
	    logmsg "setuid to $uid failed";
	    die;		# make it fatal to avoid security breaches
	}
    }

    my $cf_file = $dir."/.spamassassin/user_prefs";

    create_default_cf_if_needed ($cf_file, $username);
    $spamtest->read_scoreonly_config ($cf_file);
    return 1;
}

sub handle_user_sql
{
    my $username = shift;
    $spamtest->load_scoreonly_sql ($username);
    return 1;
}

sub create_default_cf_if_needed {
    my ($cf_file, $username) = @_;

    # Parse user scores, creating default .cf if needed:
    if( ! -r $cf_file && ! $spamtest->{dont_copy_prefs})
    {
	logmsg "Creating default_prefs [$cf_file]";

	$spamtest->create_default_prefs ($cf_file,$username);

	if ( ! -r $cf_file )
	{
	    logmsg "Couldn't create readable default_prefs for [$cf_file]";
	}
    }
}

sub logmsg
{
    openlog('spamd','cons,pid',$log_facility);
    syslog('info',"@_");
    if ($opt_D) { warn "logmsg: @_\n"; }
}

sub kill_handler
{
    my ($sig) = @_;
    logmsg "server killed by SIG$sig, shutting down";
    close Server;
    exit 0;
}

use POSIX 'setsid';
sub daemonize
{
    chdir '/' or die "Can't chdir to '/': $!";
    open STDIN,'/dev/null' or die "Can't read '/dev/null': $!";
    open STDOUT,'>/dev/null' or die "Can't write '/dev/null': $!";
    defined(my $pid=fork) or die "Can't fork: $!";
    exit if $pid;
    setsid or die "Can't start new session: $!";
    open STDERR,'>&STDOUT' or die "Can't duplicate stdout: $!";
}

=head1 NAME

spamd - daemonized version of spamassassin

=head1 SYNOPSIS

spamd [options]

=head1 OPTIONS

=over

=item B<-a>

Use per-user auto-whitelists.  These will automatically create a list of
senders whose messages are to be considered non-spam by monitoring the total
number of received messages which weren't tagged as spam from that sender.
Once a threshold is exceeded, further messages from that sender will be given a
non-spam bonus (in case you correspond with people who occasionally swear in
their emails).

=item B<-c>

Create user preferences files if they don't exist (default: don't).

=item B<-d>

Detach from starting process and run in background (daemonize).

=item B<-h>

Print a brief help message, then exit without further action.

=item B<-i> I<ipaddress>

Tells spamd to listen on the specified IP address [defaults to 127.0.0.1].  Use
0.0.0.0 to listen on all interfaces.

=item B<-p> I<port>

Optionally specifies the port number for the server to listen on.

=item B<-q>

Turn on SQL lookups even when per-user config files have been disabled
with B<-x>. this is useful for spamd hosts which don't have user's
home directories but do want to load user preferences from an SQL
database.

=item B<-s> I<facility>

Specify the syslog facility to use (default: mail).

=item B<-u> I<username>

Run as the named user.  The alternative, default behaviour is to setuid() to
the user running C<spamc>, if C<spamd> is running as root.

=item B<-x>

Turn off per-user config files.  All users will just get the default
configuration.

=item B<-A> I<host,...>

Specify a list of authorized hosts which can connect to this spamd instance.
The list is one of valid IP addresses, separated by commas.  By default,
connections are only accepted from localhost (127.0.0.1).

=item B<-D>

Print debugging messages

=item B<-L>

Perform only local tests on all mail.  In other words, skip DNS and other
network tests.  Works the same as the C<-L> flag to C<spamassassin(1)>.

=item B<-P>

Die on user errors (for the user passed from spamc) instead of falling back to
user I<nobody> and using the default configuration.

=item B<-F> I<0 | 1>

Ensure that the output email message either always starts with a 'From ' line
(I<1>) for UNIX mbox format, or ensure that this line is stripped from the
output (I<0>).  (default: 1)

=back

=head1 DESCRIPTION

The purpose of this program is to provide a daemonized version of the
spamassassin executable.  The goal is improving throughput performance for
automated mail checking.

This is intended to be used alongside C<spamc>, a fast, low-overhead C client
program.

See the README file in the C<spamd> directory of the SpamAssassin distribution
for more details.

=head1 SEE ALSO

spamc(1)
spamassassin(1)
Mail::SpamAssassin(3)

=head1 AUTHOR

Craig R Hughes E<lt>craig@hughes-family.orgE<gt>

=head1 PREREQUISITES

C<Mail::SpamAssassin>

=cut