/[Apache-SVN]/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
ViewVC logotype

Contents of /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 882020 - (hide annotations)
Thu Nov 19 00:51:09 2009 UTC (8 days, 2 hours ago) by mmartinec
File size: 29673 byte(s)
Bug 6238: introducing the 'time_limit' configuration option,
with associated code changes in various places
1 quinlan 149177 # <@LICENSE>
2 felicity 431796 # Licensed to the Apache Software Foundation (ASF) under one or more
3     # contributor license agreements. See the NOTICE file distributed with
4     # this work for additional information regarding copyright ownership.
5     # The ASF licenses this file to you under the Apache License, Version 2.0
6     # (the "License"); you may not use this file except in compliance with
7     # the License. You may obtain a copy of the License at:
8 quinlan 149177 #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     # </@LICENSE>
17    
18 jm 6898 =head1 NAME
19    
20     URIDNSBL - look up URLs against DNS blocklists
21    
22 quinlan 149177 =head1 SYNOPSIS
23    
24     loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26    
27     =head1 DESCRIPTION
28    
29 jm 6898 This works by analysing message text and HTML for URLs, extracting the
30     domain names from those, querying their NS records in DNS, resolving
31     the hostnames used therein, and querying various DNS blocklists for
32     those IP addresses. This is quite effective.
33    
34 dos 527740 =head1 USER SETTINGS
35 jm 6898
36     =over 4
37    
38 mmartinec 831320 =item skip_uribl_checks ( 0 | 1 ) (default: 0)
39    
40     Turning on the skip_uribl_checks setting will disable the URIDNSBL plugin.
41    
42     By default, SpamAssassin will run URI DNSBL checks. Individual URI blocklists
43     may be disabled selectively by setting a score of a corresponding rule to 0
44     or through the uridnsbl_skip_domain parameter.
45    
46     See also a related configuration parameter skip_rbl_checks,
47     which controls the DNSEval plugin (documented in the Conf man page).
48    
49     =back
50    
51     =over 4
52    
53 dos 527740 =item uridnsbl_skip_domain domain1 domain2 ...
54    
55     Specify a domain, or a number of domains, which should be skipped for the
56     URIBL checks. This is very useful to specify very common domains which are
57     not going to be listed in URIBLs.
58    
59     =back
60    
61     =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
62    
63     =over 4
64    
65 jm 9881 =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
66 jm 6898
67     Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
68     used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
69     is the type of lookup (B<TXT> or B<A>). Note that you must also
70 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
71 jm 6898
72     Example:
73    
74     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
75 felicity 54021 body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
76 jm 6898 describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
77    
78 jm 646805 =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
79    
80     Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
81     name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
82     and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
83    
84     C<subtest> is the sub-test to run against the returned data. The sub-test may
85     either be an IPv4 dotted address for DNSBLs that return multiple A records or a
86     non-negative decimal number to specify a bitmask for DNSBLs that return a
87     single A record containing a bitmask of results.
88    
89     Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
90     C<check_uridnsbl()> to use this.
91    
92     Example:
93    
94     uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
95     uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
96    
97 jm 9881 =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
98    
99     Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
100     to be used, C<rhsbl_zone> is the zone to look up domain names in, and
101     C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
102 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
103 jm 9881
104     An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
105 quinlan 160276 URI using the domain C<foo.com> will cause a lookup of
106     C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
107     used in the URIBL lookup, so the domain C<foo.bar.com> will look up
108     C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
109     C<bar.co.uk.uriblzone.net>.
110 jm 9881
111 mmartinec 565567 If an URI consists of an IP address instead of a hostname, the IP address is
112     looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
113 quinlan 160276
114 jm 21406 Example:
115    
116     urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
117    
118     =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
119    
120     Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
121     name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
122     in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
123    
124     C<subtest> is the sub-test to run against the returned data. The sub-test may
125 sidney 433917 either be an IPv4 dotted address for RHSBLs that return multiple A records or a
126 jm 21406 non-negative decimal number to specify a bitmask for RHSBLs that return a
127 sidney 433917 single A record containing a bitmask of results.
128 jm 21406
129 felicity 54021 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
130 jm 21406 C<check_uridnsbl()> to use this.
131    
132     Example:
133    
134     urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
135     urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
136    
137 jm 720211 =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
138    
139     Perform a RHSBL-style domain lookup against the contents of the NS records
140     for each URI. In other words, a URI using the domain C<foo.com> will cause
141     an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
142     that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
143     are stripped from both the domain used in the URI, and the domain in the
144     lookup.
145    
146     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
147     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
148     B<A>).
149    
150     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
151     C<check_uridnsbl()> to use this.
152    
153     =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
154    
155     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
156     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
157     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
158     B<A>). C<subtest> is the sub-test to run against the returned data; see
159     <urirhssub>.
160    
161     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
162     C<check_uridnsbl()> to use this.
163    
164 jm 728782 =item urifullnsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
165    
166     Perform a RHSBL-style domain lookup against the contents of the NS records for
167     each URI. In other words, a URI using the domain C<foo.com> will cause an NS
168     lookup to take place; assuming that domain has an NS of C<ns0.bar.com>, that
169     will cause a lookup of C<ns0.bar.com.uriblzone.net>. Note that hostnames are
170     stripped from the domain used in the URI.
171    
172     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
173     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
174     B<A>).
175    
176     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
177     C<check_uridnsbl()> to use this.
178    
179     =item urifullnsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
180    
181     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
182     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
183     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
184     B<A>). C<subtest> is the sub-test to run against the returned data; see
185     <urirhssub>.
186    
187     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
188     C<check_uridnsbl()> to use this.
189    
190 dos 527740 =back
191    
192     =head1 ADMINISTRATOR SETTINGS
193    
194     =over 4
195    
196 jm 6898 =item uridnsbl_max_domains N (default: 20)
197    
198     The maximum number of domains to look up.
199    
200     =back
201    
202 dos 527786 =head1 NOTES
203    
204     The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
205     option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
206    
207 jm 6898 =cut
208    
209     package Mail::SpamAssassin::Plugin::URIDNSBL;
210    
211     use Mail::SpamAssassin::Plugin;
212 quinlan 160273 use Mail::SpamAssassin::Constants qw(:ip);
213 quinlan 7001 use Mail::SpamAssassin::Util;
214 jm 720211 use Mail::SpamAssassin::Util::RegistrarBoundaries;
215 quinlan 162095 use Mail::SpamAssassin::Logger;
216 jm 6898 use strict;
217 quinlan 55260 use warnings;
218 jm 6898 use bytes;
219 mmartinec 574664 use re 'taint';
220 jm 6898
221     use vars qw(@ISA);
222     @ISA = qw(Mail::SpamAssassin::Plugin);
223    
224     use constant LOG_COMPLETION_TIMES => 0;
225    
226     # constructor
227     sub new {
228     my $class = shift;
229     my $samain = shift;
230    
231     # some boilerplate...
232     $class = ref($class) || $class;
233     my $self = $class->SUPER::new($samain);
234     bless ($self, $class);
235    
236 jm 161778 # this can be effectively global, at least in each process, safely
237 jm 6898
238 jm 161778 $self->{finished} = { };
239 jm 6898
240     $self->register_eval_rule ("check_uridnsbl");
241 felicity 56728 $self->set_config($samain->{conf});
242 jm 6898
243     return $self;
244     }
245    
246     # this is just a placeholder; in fact the results are dealt with later
247     sub check_uridnsbl {
248     return 0;
249     }
250    
251     # ---------------------------------------------------------------------------
252    
253     # once the metadata is parsed, we can access the URI list. So start off
254     # the lookups here!
255     sub parsed_metadata {
256     my ($self, $opts) = @_;
257     my $scanner = $opts->{permsgstatus};
258    
259 mmartinec 831320 return 0 if $scanner->{main}->{conf}->{skip_uribl_checks};
260    
261 jm 161778 if (!$scanner->is_dns_available()) {
262 jm 6898 $self->{dns_not_available} = 1;
263 mmartinec 831320 return 0;
264 dos 518664 } else {
265     # due to re-testing dns may become available after being unavailable
266     # DOS: I don't think dns_not_available is even used anymore
267     $self->{dns_not_available} = 0;
268 jm 6898 }
269    
270 felicity 439873 $scanner->{'uridnsbl_activerules'} = { };
271     $scanner->{'uridnsbl_hits'} = { };
272     $scanner->{'uridnsbl_seen_domain'} = { };
273 jm 6898
274 parker 9571 # only hit DNSBLs for active rules (defined and score != 0)
275 felicity 439873 $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
276 jm 720211 $scanner->{'uridnsbl_active_rules_nsrhsbl'} = { };
277 jm 728782 $scanner->{'uridnsbl_active_rules_fullnsrhsbl'} = { };
278 felicity 439873 $scanner->{'uridnsbl_active_rules_revipbl'} = { };
279    
280 jm 6898 foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
281 felicity 47438 next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
282 jm 9881
283 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
284 jm 9881 if ($rulecf->{is_rhsbl}) {
285 felicity 439873 $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
286 jm 728782 } elsif ($rulecf->{is_fullnsrhsbl}) {
287     $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename} = 1;
288 jm 720211 } elsif ($rulecf->{is_nsrhsbl}) {
289     $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
290 jm 9881 } else {
291 felicity 439873 $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
292 jm 9881 }
293 jm 6898 }
294    
295     # get all domains in message
296 felicity 155151
297 felicity 170124 # don't keep dereferencing this
298     my $skip_domains = $scanner->{main}->{conf}->{uridnsbl_skip_domains};
299    
300 dos 573117 # list of hashes to use in order
301 mmartinec 571893 my @uri_ordered;
302 felicity 155151
303     # Generate the full list of html-parsed domains.
304 felicity 169564 my $uris = $scanner->get_uri_detail_list();
305 felicity 155151
306 felicity 157209 # go from uri => info to uri_ordered
307     # 0: a
308     # 1: form
309     # 2: img
310     # 3: !a_empty
311     # 4: parsed
312     # 5: a_empty
313 felicity 169564 while (my($uri, $info) = each %{$uris}) {
314 felicity 170124 # we want to skip mailto: uris
315     next if ($uri =~ /^mailto:/);
316    
317     # no domains were found via this uri, so skip
318     next unless ($info->{domains});
319    
320 felicity 157209 my $entry = 3;
321    
322     if ($info->{types}->{a}) {
323     $entry = 5;
324    
325     # determine a vs a_empty
326     foreach my $at (@{$info->{anchor_text}}) {
327     if (length $at) {
328     $entry = 0;
329     last;
330     }
331     }
332 felicity 155151 }
333 felicity 157209 elsif ($info->{types}->{form}) {
334     $entry = 1;
335     }
336     elsif ($info->{types}->{img}) {
337     $entry = 2;
338     }
339 felicity 169570 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
340 felicity 169564 $entry = 4;
341     }
342 felicity 155151
343 felicity 170124 # take the usable domains and add to the ordered list
344     foreach ( keys %{ $info->{domains} } ) {
345     if (exists $skip_domains->{$_}) {
346     dbg("uridnsbl: domain $_ in skip list");
347     next;
348     }
349     $uri_ordered[$entry]->{$_} = 1;
350     }
351 felicity 155151 }
352    
353 felicity 170124 # at this point, @uri_ordered is an ordered array of uri hashes
354 felicity 155151
355 mmartinec 571893 my %domlist;
356 felicity 439873 my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
357     while (keys %domlist < $umd && @uri_ordered) {
358 felicity 155151 my $array = shift @uri_ordered;
359 felicity 157209 next unless $array;
360    
361 felicity 170124 # run through and find the new domains in this grouping
362     my @domains = grep(!$domlist{$_}, keys %{$array});
363     next unless @domains;
364 felicity 155151
365     # the new domains are all useful, just add them in
366 felicity 439873 if (keys(%domlist) + @domains <= $umd) {
367 felicity 170124 foreach (@domains) {
368 felicity 155151 $domlist{$_} = 1;
369 felicity 47380 }
370 felicity 155151 }
371     else {
372     # trim down to a limited number - pick randomly
373     my $i;
374 felicity 439873 while (@domains && keys %domlist < $umd) {
375 felicity 170124 my $r = int rand (scalar @domains);
376     $domlist{splice (@domains, $r, 1)} = 1;
377 felicity 47380 }
378     }
379 jm 6898 }
380    
381     # and query
382 felicity 155151 dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
383     foreach my $dom (keys %domlist) {
384 felicity 439873 $self->query_domain ($scanner, $dom);
385 jm 6898 }
386    
387     return 1;
388     }
389    
390 felicity 56728 sub set_config {
391     my($self, $conf) = @_;
392 mmartinec 571893 my @cmds;
393 jm 6898
394 felicity 56728 push(@cmds, {
395 mmartinec 831320 setting => 'skip_uribl_checks',
396     default => 0,
397     type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
398     });
399    
400     push(@cmds, {
401 felicity 56728 setting => 'uridnsbl_max_domains',
402 dos 527740 is_admin => 1,
403 felicity 56728 default => 20,
404     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
405     });
406 jm 6898
407 felicity 56728 push (@cmds, {
408     setting => 'uridnsbl',
409 dos 527740 is_priv => 1,
410 felicity 56728 code => sub {
411     my ($self, $key, $value, $line) = @_;
412     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
413     my $rulename = $1;
414     my $zone = $2;
415     my $type = $3;
416     $self->{uridnsbls}->{$rulename} = {
417     zone => $zone, type => $type,
418     is_rhsbl => 0
419     };
420     }
421 dos 178359 elsif ($value =~ /^$/) {
422     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
423     }
424     else {
425     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
426     }
427 jm 6898 }
428 felicity 56728 });
429 felicity 55981
430 felicity 56728 push (@cmds, {
431 jm 646805 setting => 'uridnssub',
432     is_priv => 1,
433     code => sub {
434     my ($self, $key, $value, $line) = @_;
435     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
436     my $rulename = $1;
437     my $zone = $2;
438     my $type = $3;
439     my $subrule = $4;
440     $self->{uridnsbls}->{$rulename} = {
441     zone => $zone, type => $type,
442     is_rhsbl => 0, is_subrule => 1
443     };
444     $self->{uridnsbl_subs}->{$zone} ||= { };
445     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
446     }
447     elsif ($value =~ /^$/) {
448     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
449     }
450     else {
451     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
452     }
453     }
454     });
455    
456     push (@cmds, {
457 felicity 56728 setting => 'urirhsbl',
458 dos 527740 is_priv => 1,
459 felicity 56728 code => sub {
460     my ($self, $key, $value, $line) = @_;
461     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
462     my $rulename = $1;
463     my $zone = $2;
464     my $type = $3;
465     $self->{uridnsbls}->{$rulename} = {
466     zone => $zone, type => $type,
467     is_rhsbl => 1
468     };
469     }
470 dos 178359 elsif ($value =~ /^$/) {
471     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
472     }
473     else {
474     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
475     }
476 jm 9881 }
477 felicity 56728 });
478 felicity 55981
479 felicity 56728 push (@cmds, {
480     setting => 'urirhssub',
481 dos 527740 is_priv => 1,
482 felicity 56728 code => sub {
483     my ($self, $key, $value, $line) = @_;
484 sidney 433917 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
485 felicity 56728 my $rulename = $1;
486     my $zone = $2;
487     my $type = $3;
488     my $subrule = $4;
489     $self->{uridnsbls}->{$rulename} = {
490     zone => $zone, type => $type,
491     is_rhsbl => 1, is_subrule => 1
492     };
493     $self->{uridnsbl_subs}->{$zone} ||= { };
494 dos 543724 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
495 felicity 56728 }
496 dos 178359 elsif ($value =~ /^$/) {
497     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
498     }
499     else {
500     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
501     }
502 jm 21406 }
503 felicity 56728 });
504 felicity 55981
505 felicity 56728 push (@cmds, {
506 jm 720211 setting => 'urinsrhsbl',
507     is_priv => 1,
508     code => sub {
509     my ($self, $key, $value, $line) = @_;
510     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
511     my $rulename = $1;
512     my $zone = $2;
513     my $type = $3;
514     $self->{uridnsbls}->{$rulename} = {
515     zone => $zone, type => $type,
516     is_nsrhsbl => 1
517     };
518     }
519     elsif ($value =~ /^$/) {
520     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
521     }
522     else {
523     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
524     }
525     }
526     });
527    
528     push (@cmds, {
529     setting => 'urinsrhssub',
530     is_priv => 1,
531     code => sub {
532     my ($self, $key, $value, $line) = @_;
533     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
534     my $rulename = $1;
535     my $zone = $2;
536     my $type = $3;
537     my $subrule = $4;
538     $self->{uridnsbls}->{$rulename} = {
539     zone => $zone, type => $type,
540     is_nsrhsbl => 1, is_subrule => 1
541     };
542     $self->{uridnsbl_subs}->{$zone} ||= { };
543     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
544     }
545     elsif ($value =~ /^$/) {
546     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
547     }
548     else {
549     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
550     }
551     }
552     });
553    
554     push (@cmds, {
555 jm 728782 setting => 'urifullnsrhsbl',
556     is_priv => 1,
557     code => sub {
558     my ($self, $key, $value, $line) = @_;
559     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
560     my $rulename = $1;
561     my $zone = $2;
562     my $type = $3;
563     $self->{uridnsbls}->{$rulename} = {
564     zone => $zone, type => $type,
565     is_fullnsrhsbl => 1
566     };
567     }
568     elsif ($value =~ /^$/) {
569     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
570     }
571     else {
572     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
573     }
574     }
575     });
576    
577     push (@cmds, {
578     setting => 'urifullnsrhssub',
579     is_priv => 1,
580     code => sub {
581     my ($self, $key, $value, $line) = @_;
582     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
583     my $rulename = $1;
584     my $zone = $2;
585     my $type = $3;
586     my $subrule = $4;
587     $self->{uridnsbls}->{$rulename} = {
588     zone => $zone, type => $type,
589     is_fullnsrhsbl => 1, is_subrule => 1
590     };
591     $self->{uridnsbl_subs}->{$zone} ||= { };
592     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
593     }
594     elsif ($value =~ /^$/) {
595     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
596     }
597     else {
598     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
599     }
600     }
601     });
602    
603     push (@cmds, {
604 felicity 56728 setting => 'uridnsbl_skip_domain',
605     default => {},
606 jm 818443 type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
607 felicity 56728 code => sub {
608     my ($self, $key, $value, $line) = @_;
609 dos 178359 if ($value =~ /^$/) {
610     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
611     }
612 felicity 56728 foreach my $domain (split(/\s+/, $value)) {
613     $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
614     }
615 felicity 47380 }
616 felicity 56728 });
617 felicity 55981
618 jm 527859 # obsolete
619     push(@cmds, {
620     setting => 'uridnsbl_timeout',
621     code => sub {
622     # not a lint_warn(), since it's pretty harmless and we don't want
623     # to break stuff like sa-update
624     warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
625     return 0;
626     }
627     });
628    
629 felicity 56728 $conf->{parser}->register_commands(\@cmds);
630 jm 6898 }
631    
632     # ---------------------------------------------------------------------------
633    
634     sub query_domain {
635 felicity 439873 my ($self, $scanner, $dom) = @_;
636 jm 6898
637 quinlan 51813 #warn "uridnsbl: domain $dom\n";
638     #return;
639 jm 6898
640     $dom = lc $dom;
641 felicity 439873 return if $scanner->{uridnsbl_seen_domain}->{$dom};
642     $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
643 quinlan 160273 $self->log_dns_result("querying domain $dom");
644 jm 6898
645 mmartinec 567684 my $obj = { dom => $dom };
646 jm 6898
647 quinlan 160273 my $single_dnsbl = 0;
648 jm 394351 if ($dom =~ /^\d+\.\d+\.\d+\.\d+$/) {
649 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
650     my $IP_PRIVATE = IP_PRIVATE;
651     # only look up the IP if it is public and valid
652     if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
653 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
654 quinlan 160273 # and check the IP in RHSBLs too
655     if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
656     $dom = "$4.$3.$2.$1";
657     $single_dnsbl = 1;
658     }
659     }
660 jm 9881 }
661 felicity 47380 else {
662 quinlan 160273 $single_dnsbl = 1;
663     }
664    
665 jm 596095 my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
666 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
667 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
668 jm 596095 my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
669    
670 quinlan 160273 if ($single_dnsbl) {
671 jm 9881 # look up the domain in the RHSBL subset
672 jm 596095 foreach my $rulename (keys %{$rhsblrules}) {
673 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
674     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
675 quinlan 160273 $dom, $rulecf->{zone}, $rulecf->{type});
676 jm 394351
677     # see comment below
678 felicity 439873 $scanner->register_async_rule_start($rulename);
679 jm 9881 }
680    
681 jm 596095 # perform NS, A lookups to look up the domain in the non-RHSBL subset,
682     # but only if there are active reverse-IP-URIBL rules
683 jm 721524 if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ &&
684 jm 728782 (scalar keys %{$reviprules} ||
685     scalar keys %{$nsrhsblrules} ||
686     scalar keys %{$fullnsrhsblrules}))
687 jm 721524 {
688 felicity 439873 $self->lookup_domain_ns($scanner, $obj, $dom);
689 quinlan 160273 }
690 jm 6898 }
691 jm 394351
692     # note that these rules are now underway. important: unless the
693     # rule hits, in the current design, these will not be considered
694     # "finished" until harvest_dnsbl_queries() completes
695 jm 596095 foreach my $rulename (keys %{$reviprules}) {
696 felicity 439873 $scanner->register_async_rule_start($rulename);
697 jm 394351 }
698 jm 6898 }
699    
700     # ---------------------------------------------------------------------------
701    
702     sub lookup_domain_ns {
703 felicity 439873 my ($self, $scanner, $obj, $dom) = @_;
704 jm 6898
705     my $key = "NS:".$dom;
706 felicity 439873 return if $scanner->{async}->get_lookup($key);
707 jm 6898
708     # dig $dom ns
709 mmartinec 585292 my $ent = $self->start_lookup($scanner, $dom, 'NS',
710 mmartinec 564636 $self->res_bgsend($scanner, $dom, 'NS', $key),
711     $key);
712 jm 6898 $ent->{obj} = $obj;
713     }
714    
715     sub complete_ns_lookup {
716 felicity 439873 my ($self, $scanner, $ent, $dom) = @_;
717 jm 7002
718 jm 161157 my $packet = $ent->{response_packet};
719 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
720 jm 6898
721 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
722     my $IP_PRIVATE = IP_PRIVATE;
723 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
724 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
725 quinlan 160273
726 jm 6898 foreach my $rr (@answer) {
727     my $str = $rr->string;
728     next unless (defined($str) && defined($dom));
729     $self->log_dns_result ("NSs for $dom: $str");
730    
731     if ($str =~ /IN\s+NS\s+(\S+)/) {
732 quinlan 160273 my $nsmatch = $1;
733 jm 720211 my $nsrhblstr = $nsmatch;
734 jm 728782 my $fullnsrhblstr = $nsmatch;
735     $fullnsrhblstr =~ s/\.$//;
736 quinlan 160273
737     if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+\.?$/) {
738     $nsmatch =~ s/\.$//;
739     # only look up the IP if it is public and valid
740     if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
741 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
742 quinlan 160273 }
743 jm 720211 $nsrhblstr = $nsmatch;
744 quinlan 160273 }
745     else {
746 felicity 439873 $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
747 jm 720211 $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
748 quinlan 160273 }
749 jm 720211
750     foreach my $rulename (keys %{$nsrhsblrules}) {
751     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
752     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
753     $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
754    
755     $scanner->register_async_rule_start($rulename);
756     }
757 jm 728782
758     foreach my $rulename (keys %{$fullnsrhsblrules}) {
759     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
760     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
761     $fullnsrhblstr, $rulecf->{zone}, $rulecf->{type});
762    
763     $scanner->register_async_rule_start($rulename);
764     }
765 jm 6898 }
766     }
767     }
768    
769     # ---------------------------------------------------------------------------
770    
771     sub lookup_a_record {
772 felicity 439873 my ($self, $scanner, $obj, $hname) = @_;
773 jm 6898
774     my $key = "A:".$hname;
775 felicity 439873 return if $scanner->{async}->get_lookup($key);
776 jm 6898
777     # dig $hname a
778 mmartinec 585292 my $ent = $self->start_lookup($scanner, $hname, 'A',
779 mmartinec 564636 $self->res_bgsend($scanner, $hname, 'A', $key),
780     $key);
781 jm 6898 $ent->{obj} = $obj;
782     }
783    
784     sub complete_a_lookup {
785 felicity 439873 my ($self, $scanner, $ent, $hname) = @_;
786 jm 6898
787 mmartinec 582903 my $packet = $ent->{response_packet};
788     my @answer = !defined $packet ? () : $packet->answer;
789     foreach my $rr (@answer) {
790 jm 6898 my $str = $rr->string;
791     $self->log_dns_result ("A for NS $hname: $str");
792    
793     if ($str =~ /IN\s+A\s+(\S+)/) {
794 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
795 jm 6898 }
796     }
797     }
798    
799     # ---------------------------------------------------------------------------
800    
801     sub lookup_dnsbl_for_ip {
802 felicity 439873 my ($self, $scanner, $obj, $ip) = @_;
803 jm 6898
804     $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
805     my $revip = "$4.$3.$2.$1";
806    
807 felicity 439873 my $cf = $scanner->{uridnsbl_active_rules_revipbl};
808 jm 6898 foreach my $rulename (keys %{$cf}) {
809 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
810     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
811 quinlan 160273 $revip, $rulecf->{zone}, $rulecf->{type});
812 jm 6898 }
813     }
814    
815     sub lookup_single_dnsbl {
816 felicity 439873 my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
817 jm 6898
818 jm 9881 my $key = "DNSBL:".$dnsbl.":".$lookupstr;
819 felicity 439873 return if $scanner->{async}->get_lookup($key);
820 jm 9881 my $item = $lookupstr.".".$dnsbl;
821 jm 6898
822     # dig $ip txt
823 mmartinec 585292 my $ent = $self->start_lookup($scanner, $item, 'DNSBL',
824 mmartinec 564636 $self->res_bgsend($scanner, $item, $qtype, $key),
825     $key);
826 jm 6898 $ent->{obj} = $obj;
827     $ent->{rulename} = $rulename;
828 jm 21406 $ent->{zone} = $dnsbl;
829 jm 6898 }
830    
831     sub complete_dnsbl_lookup {
832 felicity 439873 my ($self, $scanner, $ent, $dnsblip) = @_;
833 jm 6898
834 felicity 439873 my $conf = $scanner->{conf};
835 mmartinec 571893 my @subtests;
836 jm 21406 my $rulename = $ent->{rulename};
837     my $rulecf = $conf->{uridnsbls}->{$rulename};
838    
839 jm 161157 my $packet = $ent->{response_packet};
840 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
841 jm 161157
842 quinlan 125719 my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}};
843 jm 21406 foreach my $rr (@answer)
844     {
845     next if ($rr->type ne 'A' && $rr->type ne 'TXT');
846    
847     my $rdatastr = $rr->rdatastr;
848 jm 6898 my $dom = $ent->{obj}->{dom};
849    
850 jm 21406 if (!$rulecf->{is_subrule}) {
851     # this zone is a simple rule, not a set of subrules
852 quinlan 125719 # skip any A record that isn't on 127/8
853 jm 158887 if ($rr->type eq 'A' && $rr->rdatastr !~ /^127\./) {
854 jm 165017 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
855     $packet->header->id." rr=".$rr->string);
856 quinlan 158898 next;
857 jm 158887 }
858 felicity 439873 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
859 jm 21406 }
860     else {
861     foreach my $subtest (keys (%{$uridnsbl_subs}))
862     {
863     if ($subtest eq $rdatastr) {
864 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
865     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
866     }
867 jm 21406 }
868     # bitmask
869     elsif ($subtest =~ /^\d+$/) {
870 sidney 433917 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
871     Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
872 jm 21406 {
873 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
874     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
875     }
876 jm 21406 }
877     }
878     }
879     }
880 jm 6898 }
881     }
882    
883 jm 21406 sub got_dnsbl_hit {
884 felicity 439873 my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
885 jm 21406
886     $str =~ s/\s+/ /gs; # long whitespace => short
887 quinlan 51813 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
888 jm 21406
889 felicity 439873 if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
890     $scanner->{uridnsbl_hits}->{$rulename} = { };
891 jm 21406 };
892 felicity 439873 $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
893 jm 394348
894 felicity 439873 if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
895 jm 720211 || $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
896 jm 728782 || $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename}
897 felicity 439873 || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
898 jm 394348 {
899     # TODO: this needs to handle multiple domain hits per rule
900 felicity 439873 $scanner->clear_test_state();
901     my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
902     $scanner->test_log ("URIs: $uris");
903     $scanner->got_hit ($rulename, "");
904 jm 394351
905     # note that this rule has completed (since it got at least 1 hit)
906 felicity 439873 $scanner->register_async_rule_finish($rulename);
907 jm 394348 }
908 jm 21406 }
909    
910 jm 6898 # ---------------------------------------------------------------------------
911    
912     sub start_lookup {
913 mmartinec 585292 my ($self, $scanner, $zone, $type, $id, $key) = @_;
914 jm 394348
915 jm 6898 my $ent = {
916 jm 394348 key => $key,
917 mmartinec 585292 zone => $zone, # serves to fetch other per-zone settings
918 jm 394348 type => "URI-".$type,
919     id => $id,
920     completed_callback => sub {
921     my $ent = shift;
922 mmartinec 580055 if (defined $ent->{response_packet}) { # not aborted or empty
923     $self->completed_lookup_callback ($scanner, $ent);
924     }
925 jm 394348 }
926 jm 6898 };
927 mmartinec 882020 $scanner->{async}->start_lookup($ent, $scanner->{master_deadline});
928 jm 394348 return $ent;
929 jm 6898 }
930    
931 jm 394348 sub completed_lookup_callback {
932 felicity 439873 my ($self, $scanner, $ent) = @_;
933 jm 394348 my $type = $ent->{type};
934     my $key = $ent->{key};
935     $key =~ /:(\S+?)$/; my $val = $1;
936 jm 6898
937 jm 394348 if ($type eq 'URI-NS') {
938 felicity 439873 $self->complete_ns_lookup ($scanner, $ent, $val);
939 jm 6898 }
940 jm 394348 elsif ($type eq 'URI-A') {
941 felicity 439873 $self->complete_a_lookup ($scanner, $ent, $val);
942 jm 6898 }
943 jm 394348 elsif ($type eq 'URI-DNSBL') {
944 felicity 439873 $self->complete_dnsbl_lookup ($scanner, $ent, $val);
945 jm 6898 }
946     }
947    
948     # ---------------------------------------------------------------------------
949    
950 jm 161148 sub res_bgsend {
951 mmartinec 564636 my ($self, $scanner, $host, $type, $key) = @_;
952 jm 161778
953     return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
954 mmartinec 564636 my ($pkt, $id, $timestamp) = @_;
955     $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp);
956 jm 161778 });
957 jm 161148 }
958    
959 jm 6898 sub log_dns_result {
960 felicity 57557 #my $self = shift;
961 quinlan 51813 #Mail::SpamAssassin::dbg("uridnsbl: ".join (' ', @_));
962 jm 6898 }
963    
964     # ---------------------------------------------------------------------------
965    
966     1;

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2