#!/usr/bin/perl -w ############################################################ # The code in this file is copyright 2001 by Craig Hughes # # It is licensed for use with the SpamAssassin distribution# # under the terms of the Perl Artistic License, the text of# # which is included as the file named "License" # ############################################################ use lib '../lib'; # added by jm for use inside the distro use strict; use Socket; use Carp; use Mail::SpamAssassin; use Mail::SpamAssassin::NoMailAudit; use Sys::Syslog qw(:DEFAULT setlogsock); use POSIX qw(setsid); use Getopt::Std; use POSIX ":sys_wait_h"; my %resphash = ( EX_OK => 0, # no problems EX_USAGE => 64, # command line usage error EX_DATAERR => 65, # data format error EX_NOINPUT => 66, # cannot open input EX_NOUSER => 67, # addressee unknown EX_NOHOST => 68, # host name unknown EX_UNAVAILABLE => 69, # service unavailable EX_SOFTWARE => 70, # internal software error EX_OSERR => 71, # system error (e.g., can't fork) EX_OSFILE => 72, # critical OS file missing EX_CANTCREAT => 73, # can't create (user) output file EX_IOERR => 74, # input/output error EX_TEMPFAIL => 75, # temp failure; user is invited to retry EX_PROTOCOL => 76, # remote error in protocol EX_NOPERM => 77, # permission denied EX_CONFIG => 78, # configuration error ); sub usage { warn < 1 } split /,/, $opt_A; } else { %allowed = ( '127.0.0.1' => 1 ); } # This can be changed on the command line with the -s flag my $log_facility; if($opt_s) { $log_facility = $opt_s; } else { $log_facility = 'mail'; } my $dontcopy = 1; if ($opt_c) { $dontcopy = 0; } if (defined $ENV{'HOME'}) { delete $ENV{'HOME'}; # we do not want to use this when running spamd } my $spamtest = Mail::SpamAssassin->new({ dont_copy_prefs => $dontcopy, local_tests_only => $opt_L, debug => $opt_D, paranoid => ($opt_P || 0), }); $opt_a and eval { require Mail::SpamAssassin::DBBasedAddrList; # create a factory for the persistent address list my $addrlistfactory = Mail::SpamAssassin::DBBasedAddrList->new(); $spamtest->set_persistent_address_list_factory ($addrlistfactory); }; sub spawn; # forward declaration sub logmsg; # forward declaration setlogsock('unix'); my $port = $opt_p || 783; my $addr = gethostbyname($opt_i || '127.0.0.1'); my $proto = getprotobyname('tcp'); ($port) = $port =~ /^(\d+)$/ or die "invalid port"; # Be a well-behaved daemon socket(Server, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; setsockopt(Server,SOL_SOCKET,SO_REUSEADDR,pack("l",1)) || die "setsockopt: $!"; bind(Server, sockaddr_in($port, $addr)) || die "bind: $!"; listen(Server,SOMAXCONN) || die "listen: $!"; # support non-root use (after we bind to the port) my $setuid_to_user = 0; if ($opt_u) { my $uuid = getpwnam($opt_u); if (!defined $uuid || $uuid == 0) { die "fatal: cannot run as nonexistent user or root with -u option\n"; } $> = $uuid; # effective uid $< = $uuid; # real uid. we now cannot setuid anymore if ($> != $uuid) { die "fatal: setuid to uid $uuid failed\n"; } } elsif ($> == 0) { $setuid_to_user = 1; } $spamtest->compile_now(); # ensure all modules etc. are loaded $/ = "\n"; # argh, Razor resets this! Bad Razor! $opt_d and daemonize(); my $waitedpid = 0; my $current_user; my $paddr; $SIG{CHLD} = 'IGNORE'; # important: avoids perl sighandling bug on BSD $SIG{INT} = \&kill_handler; $SIG{TERM} = \&kill_handler; # now allow waiting processes to connect, if they're watching the log. # The test suite does this! if ($opt_D) { warn "server started on port $port\n"; warn "server pid: $$\n"; } logmsg "server started on port $port"; for ( $waitedpid = 0; ($paddr = accept(Client,Server)) || $waitedpid; $waitedpid = 0, close Client) { next if $waitedpid and not $paddr; my $start = time; my($port,$iaddr) = sockaddr_in($paddr); my $name = gethostbyaddr($iaddr,AF_INET); if ($allowed{inet_ntoa($iaddr)} ) { logmsg "connection from $name [", inet_ntoa($iaddr),"] at port $port"; } else { logmsg "unauthorized connection from $name [", inet_ntoa($iaddr),"] at port $port"; next; } spawn sub { $|=1; # always immediately flush output # First request line off stream local $_ = ; if (!defined $_) { protocol_error ("(closed before headers)"); return 1; } chomp; # It may be s SKIP message, meaning that the client (spamc) # thinks it is too big to check. So we don't do any real work # in that case. if (/SKIP SPAMC\/(.*)/) { logmsg "skipped large message in ". sprintf("%3d", time - $start) ." seconds.\n"; return 0; } # It might be a CHECK message, meaning that we should just check # if it's spam or not, then return the appropriate response. elsif (/CHECK SPAMC\/(.*)/) { my $version = $1; my $expected_length; # Protocol version 1.0 and greater may have "User:" and # "Content-length:" headers. But they're not required. if($version > 1.0) { while(1) { $_ = ; if(!defined $_) { protocol_error ("(EOF during headers)"); return 1; } if (/^\r\n/s) { last; } # We'll run handle user unless we've been told not # to process per-user config files. Otherwise # we'll check and see if we need to try SQL # lookups. If $opt_x is NOT true, we need to try # their config file and then do the SQL lookup. # If $opt_x IS true, we skip the conf file and # only need to do the SQL lookup if $opt_q IS # true. (I got that wrong the first time.) if (/^User: (.*)\r\n/) { if ($opt_x) { if ($opt_q) { handle_user_sql($1); } } else { handle_user($1); } } if (/^Content-length: ([0-9]*)\r\n/i) { $expected_length = $1; } } } if ($spamtest->{paranoid} && $setuid_to_user && $> == 0) { logmsg "PARANOID: Still running as root, close connection."; } elsif ( $setuid_to_user && $> == 0 ) { logmsg "Still running as root: user not specified, ". "not found, or set to root. Fall back to nobody."; my $uid = getpwnam('nobody'); if (!defined $uid) { die "no UID for nobody"; } $> = $uid; } my $resp = "EX_OK"; # Now read in message my @msglines = (); my $actual_length = length (join '', @msglines); my $mail = Mail::SpamAssassin::NoMailAudit->new ( data => \@msglines, add_From_line => $opt_F ); # Check length if we're supposed to if($expected_length) { if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; } } # Now use copy-on-writed (hopefully) SA object my $status = $spamtest->check($mail); my $msg_score = int($status->get_hits); my $msg_threshold = int($status->get_required_hits); my $was_it_spam; if ($status->is_spam) { print "SPAMD/1.1 $resphash{$resp} $resp\r\nSpam: True ; $msg_score / $msg_threshold\r\n\r\n"; $was_it_spam = 'identified spam'; } else { print "SPAMD/1.1 $resphash{$resp} $resp\r\nSpam: False ; $msg_score / $msg_threshold\r\n\r\n"; $was_it_spam = 'clean message'; } $current_user ||= '(unknown)'; logmsg "$was_it_spam for $current_user:$> in ". sprintf("%3d", time - $start) ." seconds.\n"; $status->finish(); # added by jm to allow GC'ing } # If we get the PROCESS command, the client is going to send a # message that we need to filter. This is were all the real # work is one. elsif (/PROCESS SPAMC\/(.*)/) { my $version = $1; my $expected_length; # Protocol version 1.0 and greater may have "User:" and # "Content-length:" headers. But they're not required. if($version > 1.0) { while(1) { $_ = ; if(!defined $_) { protocol_error ("(EOF during headers)"); return 1; } if (/^\r\n/s) { last; } # We'll run handle user unless we've been told not # to process per-user config files. Otherwise # we'll check and see if we need to try SQL # lookups. If $opt_x is NOT true, we need to try # their config file and then do the SQL lookup. # If $opt_x IS true, we skip the conf file and # only need to do the SQL lookup if $opt_q IS # true. (I got that wrong the first time.) if (/^User: (.*)\r\n/) { if ($opt_x) { if ($opt_q) { handle_user_sql($1); } } else { handle_user($1); } } if (/^Content-length: ([0-9]*)\r\n/i) { $expected_length = $1; } } } if ($spamtest->{paranoid} && $setuid_to_user && $> == 0) { logmsg "PARANOID: Still running as root, close connection."; } elsif ( $setuid_to_user && $> == 0 ) { logmsg "Still running as root: user not specified, ". "not found, or set to root. Fall back to nobody."; my $uid = getpwnam('nobody'); if (!defined $uid) { die "no UID for nobody"; } $> = $uid; } my $resp = "EX_OK"; # Now read in message my @msglines = (); my $actual_length = length (join '', @msglines); my $mail = Mail::SpamAssassin::NoMailAudit->new ( data => \@msglines, add_From_line => $opt_F ); # Check length if we're supposed to if($expected_length) { if($actual_length != $expected_length) { protocol_error ("(Content-length mismatch: $expected_length vs. $actual_length)"); return 1; } } # Now use copy-on-writed (hopefully) SA object my $status = $spamtest->check($mail); $status->rewrite_mail; #if $status->is_spam; # Build the message to send back and measure it my $msg_resp = join '',$mail->header,"\n",@{$mail->body}; my $msg_resp_length = length($msg_resp); if($version >= 1.2) # Spamc protocol 1.2 means it accepts content-length { print "SPAMD/1.1 $resphash{$resp} $resp\r\n", "Content-length: $msg_resp_length\r\n\r\n", $msg_resp; } else # Earlier than 1.2 didn't accept content-length { print "SPAMD/1.0 $resphash{$resp} $resp\r\n", $msg_resp; } my $was_it_spam; if($status->is_spam) { $was_it_spam = 'identified spam'; } else { $was_it_spam = 'clean message'; } $current_user ||= '(unknown)'; logmsg "$was_it_spam for $current_user:$> in ". sprintf("%3d", time - $start) ." seconds.\n"; $status->finish(); # added by jm to allow GC'ing } # If it was none of the above, then we don't know what it was. else { protocol_error ($_); } # Clean up any defunct processes. (Not perfect, as usually leaves at # least one around until next connection, but avoids handling SIGCHLD.) # Needed on SunOS, where SIGCHLD=IGNORE doesn't make it automatic. my $kid; do { $kid = waitpid(-1,&WNOHANG); } until ($kid <= 0); }; } sub protocol_error { local $_ = shift; my $resp = "EX_PROTOCOL"; print "SPAMD/1.0 $resphash{$resp} Bad header line: $_\r\n"; logmsg "bad protocol: header error: $_"; } sub spawn { my $coderef = shift; unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') { confess "usage: spawn CODEREF"; } my $pid; if (!defined($pid = fork)) { logmsg "cannot fork: $!"; return; } elsif ($pid) { return; # I'm the parent } # else I'm the child -- go spawn open(STDIN, "<&Client") || die "can't dup client to stdin"; open(STDOUT, ">&Client") || die "can't dup client to stdout"; exit &$coderef(); } sub handle_user { my $username = shift; $current_user = $username; my ($name,$pwd,$uid,$gid,$quota,$comment,$gcos,$dir,$etc) = getpwnam($username); if ( !$spamtest->{paranoid} && !defined($uid) ) { #if we are given a username, but can't look it up, #Maybe NIS is down? lets break out here to allow #them to get 'defaults' when we are not running paranoid. logmsg "handle_user() -> unable to find user [$username]!\n"; return 0; } if ($setuid_to_user) { $> = $uid; if ($> != $uid) { logmsg "setuid to $uid failed"; die; # make it fatal to avoid security breaches } } my $cf_file = $dir."/.spamassassin/user_prefs"; create_default_cf_if_needed ($cf_file, $username); $spamtest->read_scoreonly_config ($cf_file); return 1; } sub handle_user_sql { my $username = shift; $spamtest->load_scoreonly_sql ($username); return 1; } sub create_default_cf_if_needed { my ($cf_file, $username) = @_; # Parse user scores, creating default .cf if needed: if( ! -r $cf_file && ! $spamtest->{dont_copy_prefs}) { logmsg "Creating default_prefs [$cf_file]"; $spamtest->create_default_prefs ($cf_file,$username); if ( ! -r $cf_file ) { logmsg "Couldn't create readable default_prefs for [$cf_file]"; } } } sub logmsg { openlog('spamd','cons,pid',$log_facility); syslog('info',"@_"); if ($opt_D) { warn "logmsg: @_\n"; } } sub kill_handler { my ($sig) = @_; logmsg "server killed by SIG$sig, shutting down"; close Server; exit 0; } use POSIX 'setsid'; sub daemonize { chdir '/' or die "Can't chdir to '/': $!"; open STDIN,'/dev/null' or die "Can't read '/dev/null': $!"; open STDOUT,'>/dev/null' or die "Can't write '/dev/null': $!"; defined(my $pid=fork) or die "Can't fork: $!"; exit if $pid; setsid or die "Can't start new session: $!"; open STDERR,'>&STDOUT' or die "Can't duplicate stdout: $!"; } =head1 NAME spamd - daemonized version of spamassassin =head1 SYNOPSIS spamd [options] =head1 OPTIONS =over =item B<-a> Use per-user auto-whitelists. These will automatically create a list of senders whose messages are to be considered non-spam by monitoring the total number of received messages which weren't tagged as spam from that sender. Once a threshold is exceeded, further messages from that sender will be given a non-spam bonus (in case you correspond with people who occasionally swear in their emails). =item B<-c> Create user preferences files if they don't exist (default: don't). =item B<-d> Detach from starting process and run in background (daemonize). =item B<-h> Print a brief help message, then exit without further action. =item B<-i> I Tells spamd to listen on the specified IP address [defaults to 127.0.0.1]. Use 0.0.0.0 to listen on all interfaces. =item B<-p> I Optionally specifies the port number for the server to listen on. =item B<-q> Turn on SQL lookups even when per-user config files have been disabled with B<-x>. this is useful for spamd hosts which don't have user's home directories but do want to load user preferences from an SQL database. =item B<-s> I Specify the syslog facility to use (default: mail). =item B<-u> I Run as the named user. The alternative, default behaviour is to setuid() to the user running C, if C is running as root. =item B<-x> Turn off per-user config files. All users will just get the default configuration. =item B<-A> I Specify a list of authorized hosts which can connect to this spamd instance. The list is one of valid IP addresses, separated by commas. By default, connections are only accepted from localhost (127.0.0.1). =item B<-D> Print debugging messages =item B<-L> Perform only local tests on all mail. In other words, skip DNS and other network tests. Works the same as the C<-L> flag to C. =item B<-P> Die on user errors (for the user passed from spamc) instead of falling back to user I and using the default configuration. =item B<-F> I<0 | 1> Ensure that the output email message either always starts with a 'From ' line (I<1>) for UNIX mbox format, or ensure that this line is stripped from the output (I<0>). (default: 1) =back =head1 DESCRIPTION The purpose of this program is to provide a daemonized version of the spamassassin executable. The goal is improving throughput performance for automated mail checking. This is intended to be used alongside C, a fast, low-overhead C client program. See the README file in the C directory of the SpamAssassin distribution for more details. =head1 SEE ALSO spamc(1) spamassassin(1) Mail::SpamAssassin(3) =head1 AUTHOR Craig R Hughes Ecraig@hughes-family.orgE =head1 PREREQUISITES C =cut