#!/usr/bin/perl -w =head1 NAME bayes_dump_to_trusted_networks - generate configuration from Bayes database =head1 SYNOPSIS sa-learn --dump | bayes_dump_to_trusted_networks [opts] > trust.cf bayes_dump_to_trusted_networks bayes.dump [opts] > trust.cf options: --minham n --rdns =head1 DESCRIPTION This tool uses a dump of your Bayes database to determine which IP addresses are 'trustworthy', and therefore should be listed in 'trusted_networks' lines in your configuration. This will reduce unneccesary DNSBL lookups, will whitelist mails from trustworthy sources, and allows several SpamAssassin rules to operate more effectively. A 'trustworthy' IP is one that is trusted not to B emails; in other words, it's not a subverted machine running a proxy, or one under spammer control. As such, any IP that has relayed more than 3 ham mails is considered trustworthy. It doesn't matter if it has ever relayed spam mails to you, since large ISP smarthost relays will have done so -- relaying both ham and spam from their customer pool. (The important thing is that it relayed the mail without forging sender address information.) =head1 OPTIONS =over 4 =item --minham n Require C or more ham messages before considering an IP a candidate for trust. Default: 3. =item --rdns Annotate with reverse-DNS for that IP address. Slows things down, but easier to read. =back =head1 PREREQUISITES C =cut eval { require Net::CIDR::Lite; # used to consolidate into CIDR ranges }; die "Net::CIDR::Lite module is required to use this script. Aborting.\n". "(error was: $@)\n" if $@; use strict; use vars qw{ $IP_ADDRESS $IP_PRIVATE }; # an IP address, in IPv4, IPv4-mapped-in-IPv6, or IPv6 format. NOTE: cannot # just refer to $IPV4_ADDRESS, due to perl bug reported in nesting qr//s. :( # $IP_ADDRESS = qr/\b (?:IPv6:)? (?: (?:0*:0*:ffff:(?:0*:)?)? # IPv4-mapped-in-IPv6 (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\. (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d) | # an IPv6 address, seems to always be at least 6 words [a-f0-9]{0,4} \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} (?:\:[a-f0-9]{0,4})* )\b/ix; $IP_PRIVATE = qr{^(?: 192\.168| # 192.168/16: Private Use 10| # 10/8: Private Use 172\.(?:1[6-9]|2[0-9]|3[01])| # 172.16-172.31/16: Private Use 169\.254| # 169.254/16: Private Use (APIPA) 127 # 127/8: Private Use (local host) )\.}x; use Getopt::Long; sub usage { die " usage: bayes_dump_to_trusted_networks [--minham n] [--rdns] [file] "; } use vars qw( $opt_minham $opt_rdns $opt_help ); GetOptions( 'minham:i' => \$opt_minham, 'rdns' => \$opt_rdns, 'help' => \$opt_help ) or usage(); $opt_help and usage(); $opt_rdns ||= 0; $opt_minham ||= 3; my %class_cs = (); while (<>) { my ($prob, $nspam, $nham, $atime, $tok) = split; # only select IP-address Received tokens next if ($tok !~ /^H\*r:ip\*(${IP_ADDRESS})/o); my $ip = $1; next unless ($nham >= $opt_minham); # has relayed >= n ham mails next if ($ip =~ /$IP_PRIVATE/o); $ip =~ s/[^0-9\.\:]/_/gs; # sanitise! $ip =~ /^(.*)\.(\d+)/; if (defined $class_cs{$1}) { $class_cs{$1} .= " ".$2; } else { $class_cs{$1} = $2; } } foreach my $class_c (sort { classc2long($a) <=> classc2long($b) } keys %class_cs) { my $cidr = Net::CIDR::Lite->new; foreach my $octet (split (' ', $class_cs{$class_c})) { my $ip = "$class_c.$octet"; $cidr->add_ip ($ip); } $cidr->clean(); my @ips = $cidr->list(); foreach my $ip (@ips) { my $rdns = ''; if ($ip =~ s/\/32$//) { if ($opt_rdns) { my $host = `host -W 1 $ip 2>&1`; $host =~ s/^.* domain name pointer (\S+)\s*$/$1/gm; $host =~ s/^.*not found: .*$/(no rdns)/gs; $host =~ s/\s+/ /gs; # (newlines) $host =~ s/\.$//; $rdns = "\t# $host"; } } else { if ($opt_rdns) { $rdns = "\t# (range)"; } } print "trusted_networks $ip$rdns\n"; } } sub classc2long { my $ip = shift; ($ip =~ /^(\d+)\.(\d+)\.(\d+)$/) or return -1; ($1 << 16) | ($2 << 8) | ($3); }