#!/usr/bin/perl -w

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
  if 0;    # not running under some shell

# Some configurable stuff here.  This may get offloaded to a file in the
# future.

# $smarthost="localhost:10026";

# Some stuff from MSDW's smtpprox for preforking stuff

my $children          = 4;
my $minperchild       = 5;
my $maxperchild       = 10;
my $maxsize           = 80 * 1024;    # default 80k
my $shared            = 0;
my $debug             = 0;
my $recipient_mapping = 0;            # name of the file which will be used
my %recipient_mapping;

# This file is based largely on example code bundled with MacGyver's
# Net::SMTP::Server kit, but with some additional stuff to use
# Mail::SpamAsssassin and a modified version of Net::SMTP::Server::Relay so
# then it becomes Net::SMTP::Server::SmartHost.  This way I can direct mail
# to a specific mailserver specified.  ::Relay does MX lookups which isn't
# what we want, but instead, reinject the message back into the system via
# an unfiltered version of SMTP server
#
# This was written with Postfix in mind, but nothing says you cannot use it
# for another MTA.  Be sure to read FILTER_README for a bit more background
# on how to integrate an SMTP-based filter (considered an "advanced" method).
#
# --Ian R. Justman <ianj@esper.net>, 11/21/2001

use Sys::Syslog qw(:DEFAULT setlogsock);

use Carp;
use Net::SMTP::Server;
use Net::SMTP::Server::Client;
use Mail::SpamAssassin::SMTP::SmartHost;
use Mail::SpamAssassin::NoMailAudit;
use Mail::SpamAssassin;
use Net::DNS;
use Getopt::Long;
use strict;

my $spamtest = Mail::SpamAssassin->new();
$spamtest->compile_now(0);    # ensure all modules etc. are preloaded
$/ = "\n";                    # argh, Razor resets this!  Bad Razor!

# This is the preforking and option-parsiong section taken from the MSDW
# smtpproxy code by Bennett Todd.  Any comments from that code are not my
# own comments (marked with "[MSDW]") unless otherwise noted.
#
# Depending on your platform, you may need his patch which uses
# IPC/semaphores to get information which may be required to allow two
# simultaneous instances to accept() a connection, which can be obtained at
# http://bent.latency.net/smtpprox/smtpprox-semaphore-patch.  It is best to
# apply the patch to the original script, then port it to this one.
#
# --irj

my $syntax =
  "syntax: $0 [--children=$children] [--minperchild=$minperchild] "
  . "[--maxperchild=$maxperchild] [--shared] [--debug] [--recipient_mapping=file] "
  . "[--maxmsgsize=$maxsize] "
  . "listen.addr:port talk.addr:port [spamaddr\@example.com]\n";

sub stop {
  my $message = $_;
  print $message;
  die $syntax;
}

GetOptions(
  "children=n"          => \$children,
  "shared"              => \$shared,
  "debug+"              => \$debug,
  "recipient_mapping=s" => \$recipient_mapping,
  "minperchild=n"       => \$minperchild,
  "maxperchild=n"       => \$maxperchild,
  "maxmsgsize=i"        => \$maxsize,
  )
  or &stop("can't get the options !\n");

&stop("Numbers of arguments must be at least two !\n") if @ARGV < 2;

my ( $srcaddr, $srcport ) = split /:/, $ARGV[0];
my ( $dstaddr, $dstport ) = split /:/, $ARGV[1];
my $spamaddr = $ARGV[2] || "recipient";

if ($recipient_mapping) {

  print "opening file $recipient_mapping ...\n" if $debug;
  open( RECIPIENT_MAPPING, $recipient_mapping )
    || die "can't open $recipient_mapping, $!";
  my @recipient_mapping = <RECIPIENT_MAPPING>;
  close RECIPIENT_MAPPING;
  foreach (@recipient_mapping) {
    next if /^\s*\#/;
    if (/([\w\-@\.\+<>]+)\s+([\w\-@\.\+<>]+)/) {
      my $destination = $1;
      my $rewrite     = $2;
      print "$destination -> $rewrite\n" if $debug;
      $recipient_mapping{$destination} = $rewrite;
    }
  }
}

my $trying_message =
  "Trying to start using source $srcaddr port $srcport, "
  . "destination $dstaddr port $dstport, "
  . "reporting e-mail address $spamaddr."
  . " shared : $shared"
  . " recipient_mapping file : $recipient_mapping"
  . " debug : $debug\n";

print $trying_message;

setlogsock 'unix';
openlog( 'spamassassin', 'nowait', 'local3' );
syslog( 'notice', $trying_message );
closelog();

&stop("srcport or dstport not defined !\n")
  unless defined($srcport)
  and defined($dstport)
  and defined($spamaddr);

my $smarthost = $dstaddr . ":" . $dstport;

# Set up the server using the IP address and port specified on the command
# line by the user.
#
# Since a vast majority of the SMTP code is based on MacGyver's sample code,
# I'll spare everyone those details here as that info is in his code. 
# Instead,  I'll be concentrating on the message-handling portion. --irj

my $server = new Net::SMTP::Server( $srcaddr, $srcport )
  || croak("Unable to create server: $!\n");

my $startup_message =
  "Server started on address $srcaddr port $srcport "
  . "with destination address $dstaddr port $dstport\n";

print $startup_message;

setlogsock 'unix';
openlog( 'spamassassin', 'nowait', 'local3' );
syslog( 'notice', $startup_message );
closelog();

# [MSDW]
# This should allow a kill on the parent to also blow away the
# children, I hope

my %children;
use vars qw($please_die);
$please_die = 0;
$SIG{TERM} = sub { $please_die = 1; };

# [MSDW]
# This sets up the parent process

PARENT: while (1) {
  while ( scalar( keys %children ) >= $children ) {
    my $child = wait;
    delete $children{$child} if exists $children{$child};
    if ($please_die) { kill 15, keys %children; exit 0; }
  }
  my $pid = fork;
  die "$0: fork failed: $!\n" unless defined $pid;
  last PARENT if $pid == 0;
  $children{$pid} = 1;
  select( undef, undef, undef, 0.1 );
  if ($please_die) { kill 15, keys %children; exit 0; }
}

# [MSDW]
# This block is a child service daemon. It inherited the bound
# socket created by SMTP::Server->new, it will service a random
# number of connection requests in [minperchild..maxperchild] then
# exit

my $lives = $minperchild + ( rand( $maxperchild - $minperchild ) );
my %opts;

while ( my $conn = $server->accept() ) {
  my $port = $conn->peerport();
  print "getting connection port $port\n" if $debug;
  my $client = new Net::SMTP::Server::Client($conn)
    || croak("Unable to handle client connection: $!\n");

  # [MSDW]
  # Process the client.  This command will block until
  # the connecting client completes the SMTP transaction.
  $client->process || next;

  # Mail::SpamAssassin::NoMailAudit wants an array of lines, while the
  # server returns a huge string.  Since I am unsure whether it needs to
  # have the CR/LF pair for each line for use with Razor, after splitting
  # it, using the CR/LF pairs as delimiters, I walk over the message again
  # to re-add them.  Once the array is populated and tweaked, it is then
  # handed to a new Mail::SpamAssassin::NoMailAudit object.
  # --irj

  # perldoc -f split
  #split   Splits a string into a list of strings and returns
  #           that list.  By default, empty leading fields are
  #           preserved, and empty trailing ones are deleted.
  #
  # so, it removes last empty lines !!! -> hence the last argument, -1

  my $message    = $client->{MSG};
  my $len        = length($message);
  my $tmpMessage = "Message length is : $len chars\n";
  setlogsock 'unix';
  openlog( 'spamassassin', 'nowait', 'local3' );
  syslog( 'notice', $tmpMessage );
  closelog();

  my $recips;
  my $msg;

  my @msg = split ( /^/, $message, '-1' );
  my $arraycont = @msg;
  my %args      = ( data => \@msg );
  my $mail      = Mail::SpamAssassin::NoMailAudit->new(%args);

  if ( $len <= $maxsize ) {

    # At some point, I may also put some other code so I can go grab
    # preferences, e.g. via MySQL, e.g. scoring parameters, or even whether to
    # filter at all (hey, with Perl + MySQL, your imagination is the
    # limit).
    #
    # This is where the testing actually happens.  In this example, which I
    # have in an actual production environment (save the address), I have it
    # rewriting the message then forwarding to a collection account for
    # examination.  The addresses have been changed to protect the innocent.
    #
    # If the message is OK, we skip doing anything with the object and
    # instead, pass the original message to the smarthost code below.
    # --irj

    my $status = $spamtest->check($mail);
    my @msg_debug;

    # add headers
    $status->rewrite_mail();
    my $header = join ( "", $mail->header() );
    my $body   = join ( "", @{ $mail->body() } );
    $message = join ( "\r\n", $header, $body );
    print $message, "\n" if $debug >= 2;

    if ( $status->is_spam() ) {
      $msg = sprintf( "    SPAM[%6.1f]: %s",
        $status->get_hits(), $status->get_names_of_tests_hit() );

      # check if the mail goes to one address
      if ( $spamaddr ne "recipient" ) {
        my @recipients = ("$spamaddr");
        $recips = \@recipients;
      }
      else {
        $recips = $client->{TO};

        if ($shared) {
          my @rewrite = map {
            my $init = $_;
            s/<(.*?)(\@.*)/<shared+user.$1.spam$2/g;
            push @msg_debug, "rewrite shared $init -> $_";
            $shared
          } @{ $client->{TO} };

          $recips = \@rewrite;
        }

        if ($recipient_mapping) {

          # if there is an entry in the recipient mapping, replace it by the value
          # otherwise let it alone.
          my @rewrite = map {
            if ( $recipient_mapping{$_} )
            {
              push @msg_debug, "rewrite $_ -> $recipient_mapping{$_}";
              $recipient_mapping{$_};
            }
            else { push @msg_debug, "not rewriting $_"; $_ }
          } @{ $client->{TO} };

          $recips = \@rewrite;
        }

      }
    }
    else {
      $msg = sprintf( "NOT_SPAM[%6.1f]: %s",
        $status->get_hits(), $status->get_names_of_tests_hit() );

      $recips = $client->{TO};
    }

    setlogsock 'unix';
    openlog( 'spamassassin', 'nowait', 'local3' );
    syslog( 'notice', $msg );
    if ($debug) {
      foreach (@msg_debug) {
        syslog( 'notice', $_ );
        print $_, "\n";
      }
    }
    closelog();

    $status->finish();

  }
  else {

    # Do these really need to be here?
    # If the message is too large, we didn't change anything...
    # So I'm commenting this out for now, TVD - 2002/09/29
    #my $header = join ( "", $mail->header() );
    #my $body   = join ( "", @{ $mail->body() } );
    #$message = join ( "\r\n", $header, $body );
    #print $message, "\n" if $debug >= 2;

    $recips = $client->{TO};
  }

  # Here is where we actually connect back into Postfix or wherever.  As
  # has been mentioned before, more detailed information on how to set
  # Postfix up to use an "advanced" filter setup, directly upon this
  # documentation this implementation is based.
  #
  # Here, we need to use a hacked version of Net::SMTP::Server::Relay to
  # make this work, which I will bundle in along with the script.  I made
  # no other modifications to the rest of the distribution (which is
  # required to make this work and is in CPAN).
  # --irj
  my $relay =
    new Mail::SpamAssassin::SMTP::SmartHost( $client->{FROM}, $recips, $message,
    $smarthost );

  # Zap this instance if this child's processing limit has been reached.
  # --irj
  print "mail     delivered port $port\n" if $debug;

  delete $server->{"s"};
  exit 0 if $lives-- <= 0;
}

=head1 NAME

spamproxyd - mail filter to identify spam using text analysis

=head1 SYNOPSIS

=over

=item spamproxyd

=back

=head1 OPTIONS

=over 4

--shared deliver the spam to shared+user.$user.spam
   (usefull for imap users (i'm using cyrus))

--debug print the recipient inside of spamproxyd (using twice --debug will increase the debug)

--recipient_mapping=file  reads a file which contains two emails per line
    (with <> around each email (depending on your mta)), space separated.
    the spam coming to the first email will be sent to the second email
    (example : <joe@mydomain.com> <joe-spam@mydomain.com>


spamproxyd used Mail::Spamassassin, which loads local.cf 
(in rules directory) as site-wide preferences. You may want to add/modify it.

=back

=head1 DESCRIPTION

IMPORTANT!  PLEASE read CHANGES.spamproxy before continuing!

This is a prototype for an SMTP filter based on Mail::SpamAssassin
(http://spamassassin.org, http://spamassassin.sourceforge.net).

This was originally written with Postfix's filering in mind, based on the
"advanced" example detailed in the FILTER_README file in the Postfix
distribution, but there's no reason why it couldn't be used with other
servers.

This script is just proof of concept right now; it may more than likely not
be usable in a larger-scale environment where there's high volumes of mail
being transferred.  However, it's currently good enough for a small-scale
environment, like the IRC network for which I serve as postmaster, along
with several other people I service on a small machine.

This script requires Mail::Assassin (see above) and Net::SMTP::Server
(http://www.macgyver.org/software/perl/, plus it is also in CPAN).  You also
need a modified version of one of the modules in order to connect to a
specific SMTP server, which I include in the package.

Right now, this script has a couple of shortcomings:

1.  Configurability, configurability!  This is especially true if this will
    filter for multiple people whose needs may be quite different, including
    per-user weighting of the "suspicious stuff", white-lists, etc, and of
    course, whether to tag spam then deliver (if wanted), even whether to
    filter at all.

2.  What do YOU want? Who knows?  With Perl, your imagination's the limit. 

So far, I've managed to zap quite a bit of spam that'd normally go right
through the server.  With Vipul's Razor, this can go up quite a bit.  If
anyone has any ideas about Vipul's Razor and how I populate my arrays,
please let me know.

=head1 SEE ALSO

Mail::SpamAssassin(3)
Net::SMTP::Server(3)

=head1 AUTHOR

Ian R. Justman E<lt>ianj@esper.netE<gt>

=head1 CREDITS

Justin Mason and Craig Hughes for B<Mail::SpamAssassin>

Habeeb J. "MacGyver" Dihu for his B<Net::SMTP::Server> code

Bennett Todd for the perforking code and option-parsing code from his
    pacakge, smtpproxy

Alexandre Dulaunoy added size check to bypass for a specified size of the message

Special thanks go out to the crew at my usual IRC hangout, notably Barry
Hughes, Matti Koskimies, plus a number of others whom I may have not given
appropriate credit, but you still deserve it.  You've been a big help.  :)

=head1 PREREQUISITES

C<Mail::SpamAssassin>
C<Net::SMTP::Server>

=head1 EXAMPLES

here is how i use it (postfix) :

I added in : 

postfix main.cf :

content_filter = smtp:localhost:10025 

postfix master.cf :

localhost:10026     inet  n      -      n      -      10      smtpd
      -o content_filter=
      -o local_recipient_maps=
      -o myhostname=localhost.hansonpublications.com

and i start spamproxyd via :

./spamproxyd.pl --debug 127.0.0.1:10025 127.0.0.1:10026

=head1 TODO

Daemonize it

Add signal catchs (for termination)

Create a pid file

=cut