/[Apache-SVN]/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
ViewVC logotype

Contents of /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 728782 - (hide annotations)
Mon Dec 22 21:23:50 2008 UTC (11 months ago) by jm
File size: 28908 byte(s)
bug 6020: allow urifullnsrhsbl: variant of urinsrhsbl which looks up the full hostname from the NS record in the DNSBL, not just the domain
1 quinlan 149177 # <@LICENSE>
2 felicity 431796 # Licensed to the Apache Software Foundation (ASF) under one or more
3     # contributor license agreements. See the NOTICE file distributed with
4     # this work for additional information regarding copyright ownership.
5     # The ASF licenses this file to you under the Apache License, Version 2.0
6     # (the "License"); you may not use this file except in compliance with
7     # the License. You may obtain a copy of the License at:
8 quinlan 149177 #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     # </@LICENSE>
17    
18 jm 6898 =head1 NAME
19    
20     URIDNSBL - look up URLs against DNS blocklists
21    
22 quinlan 149177 =head1 SYNOPSIS
23    
24     loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26    
27     =head1 DESCRIPTION
28    
29 jm 6898 This works by analysing message text and HTML for URLs, extracting the
30     domain names from those, querying their NS records in DNS, resolving
31     the hostnames used therein, and querying various DNS blocklists for
32     those IP addresses. This is quite effective.
33    
34 dos 527740 =head1 USER SETTINGS
35 jm 6898
36     =over 4
37    
38 dos 527740 =item uridnsbl_skip_domain domain1 domain2 ...
39    
40     Specify a domain, or a number of domains, which should be skipped for the
41     URIBL checks. This is very useful to specify very common domains which are
42     not going to be listed in URIBLs.
43    
44     =back
45    
46     =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
47    
48     =over 4
49    
50 jm 9881 =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
51 jm 6898
52     Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
53     used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
54     is the type of lookup (B<TXT> or B<A>). Note that you must also
55 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
56 jm 6898
57     Example:
58    
59     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
60 felicity 54021 body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
61 jm 6898 describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
62    
63 jm 646805 =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
64    
65     Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
66     name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
67     and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
68    
69     C<subtest> is the sub-test to run against the returned data. The sub-test may
70     either be an IPv4 dotted address for DNSBLs that return multiple A records or a
71     non-negative decimal number to specify a bitmask for DNSBLs that return a
72     single A record containing a bitmask of results.
73    
74     Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
75     C<check_uridnsbl()> to use this.
76    
77     Example:
78    
79     uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
80     uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
81    
82 jm 9881 =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
83    
84     Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
85     to be used, C<rhsbl_zone> is the zone to look up domain names in, and
86     C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
87 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
88 jm 9881
89     An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
90 quinlan 160276 URI using the domain C<foo.com> will cause a lookup of
91     C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
92     used in the URIBL lookup, so the domain C<foo.bar.com> will look up
93     C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
94     C<bar.co.uk.uriblzone.net>.
95 jm 9881
96 mmartinec 565567 If an URI consists of an IP address instead of a hostname, the IP address is
97     looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
98 quinlan 160276
99 jm 21406 Example:
100    
101     urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
102    
103     =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
104    
105     Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
106     name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
107     in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
108    
109     C<subtest> is the sub-test to run against the returned data. The sub-test may
110 sidney 433917 either be an IPv4 dotted address for RHSBLs that return multiple A records or a
111 jm 21406 non-negative decimal number to specify a bitmask for RHSBLs that return a
112 sidney 433917 single A record containing a bitmask of results.
113 jm 21406
114 felicity 54021 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
115 jm 21406 C<check_uridnsbl()> to use this.
116    
117     Example:
118    
119     urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
120     urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
121    
122 jm 720211 =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
123    
124     Perform a RHSBL-style domain lookup against the contents of the NS records
125     for each URI. In other words, a URI using the domain C<foo.com> will cause
126     an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
127     that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
128     are stripped from both the domain used in the URI, and the domain in the
129     lookup.
130    
131     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
132     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
133     B<A>).
134    
135     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
136     C<check_uridnsbl()> to use this.
137    
138     =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
139    
140     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
141     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
142     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
143     B<A>). C<subtest> is the sub-test to run against the returned data; see
144     <urirhssub>.
145    
146     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
147     C<check_uridnsbl()> to use this.
148    
149 jm 728782 =item urifullnsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
150    
151     Perform a RHSBL-style domain lookup against the contents of the NS records for
152     each URI. In other words, a URI using the domain C<foo.com> will cause an NS
153     lookup to take place; assuming that domain has an NS of C<ns0.bar.com>, that
154     will cause a lookup of C<ns0.bar.com.uriblzone.net>. Note that hostnames are
155     stripped from the domain used in the URI.
156    
157     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
158     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
159     B<A>).
160    
161     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
162     C<check_uridnsbl()> to use this.
163    
164     =item urifullnsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
165    
166     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
167     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
168     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
169     B<A>). C<subtest> is the sub-test to run against the returned data; see
170     <urirhssub>.
171    
172     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
173     C<check_uridnsbl()> to use this.
174    
175 dos 527740 =back
176    
177     =head1 ADMINISTRATOR SETTINGS
178    
179     =over 4
180    
181 jm 6898 =item uridnsbl_max_domains N (default: 20)
182    
183     The maximum number of domains to look up.
184    
185     =back
186    
187 dos 527786 =head1 NOTES
188    
189     The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
190     option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
191    
192 jm 6898 =cut
193    
194     package Mail::SpamAssassin::Plugin::URIDNSBL;
195    
196     use Mail::SpamAssassin::Plugin;
197 quinlan 160273 use Mail::SpamAssassin::Constants qw(:ip);
198 quinlan 7001 use Mail::SpamAssassin::Util;
199 jm 720211 use Mail::SpamAssassin::Util::RegistrarBoundaries;
200 quinlan 162095 use Mail::SpamAssassin::Logger;
201 jm 6898 use strict;
202 quinlan 55260 use warnings;
203 jm 6898 use bytes;
204 mmartinec 574664 use re 'taint';
205 jm 6898
206     use vars qw(@ISA);
207     @ISA = qw(Mail::SpamAssassin::Plugin);
208    
209     use constant LOG_COMPLETION_TIMES => 0;
210    
211     # constructor
212     sub new {
213     my $class = shift;
214     my $samain = shift;
215    
216     # some boilerplate...
217     $class = ref($class) || $class;
218     my $self = $class->SUPER::new($samain);
219     bless ($self, $class);
220    
221 jm 161778 # this can be effectively global, at least in each process, safely
222 jm 6898
223 jm 161778 $self->{finished} = { };
224 jm 6898
225     $self->register_eval_rule ("check_uridnsbl");
226 felicity 56728 $self->set_config($samain->{conf});
227 jm 6898
228     return $self;
229     }
230    
231     # this is just a placeholder; in fact the results are dealt with later
232     sub check_uridnsbl {
233     return 0;
234     }
235    
236     # ---------------------------------------------------------------------------
237    
238     # once the metadata is parsed, we can access the URI list. So start off
239     # the lookups here!
240     sub parsed_metadata {
241     my ($self, $opts) = @_;
242     my $scanner = $opts->{permsgstatus};
243    
244 jm 161778 if (!$scanner->is_dns_available()) {
245 jm 6898 $self->{dns_not_available} = 1;
246     return;
247 dos 518664 } else {
248     # due to re-testing dns may become available after being unavailable
249     # DOS: I don't think dns_not_available is even used anymore
250     $self->{dns_not_available} = 0;
251 jm 6898 }
252    
253 felicity 439873 $scanner->{'uridnsbl_activerules'} = { };
254     $scanner->{'uridnsbl_hits'} = { };
255     $scanner->{'uridnsbl_seen_domain'} = { };
256 jm 6898
257 parker 9571 # only hit DNSBLs for active rules (defined and score != 0)
258 felicity 439873 $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
259 jm 720211 $scanner->{'uridnsbl_active_rules_nsrhsbl'} = { };
260 jm 728782 $scanner->{'uridnsbl_active_rules_fullnsrhsbl'} = { };
261 felicity 439873 $scanner->{'uridnsbl_active_rules_revipbl'} = { };
262    
263 jm 6898 foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
264 felicity 47438 next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
265 jm 9881
266 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
267 jm 9881 if ($rulecf->{is_rhsbl}) {
268 felicity 439873 $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
269 jm 728782 } elsif ($rulecf->{is_fullnsrhsbl}) {
270     $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename} = 1;
271 jm 720211 } elsif ($rulecf->{is_nsrhsbl}) {
272     $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
273 jm 9881 } else {
274 felicity 439873 $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
275 jm 9881 }
276 jm 6898 }
277    
278     # get all domains in message
279 felicity 155151
280 felicity 170124 # don't keep dereferencing this
281     my $skip_domains = $scanner->{main}->{conf}->{uridnsbl_skip_domains};
282    
283 dos 573117 # list of hashes to use in order
284 mmartinec 571893 my @uri_ordered;
285 felicity 155151
286     # Generate the full list of html-parsed domains.
287 felicity 169564 my $uris = $scanner->get_uri_detail_list();
288 felicity 155151
289 felicity 157209 # go from uri => info to uri_ordered
290     # 0: a
291     # 1: form
292     # 2: img
293     # 3: !a_empty
294     # 4: parsed
295     # 5: a_empty
296 felicity 169564 while (my($uri, $info) = each %{$uris}) {
297 felicity 170124 # we want to skip mailto: uris
298     next if ($uri =~ /^mailto:/);
299    
300     # no domains were found via this uri, so skip
301     next unless ($info->{domains});
302    
303 felicity 157209 my $entry = 3;
304    
305     if ($info->{types}->{a}) {
306     $entry = 5;
307    
308     # determine a vs a_empty
309     foreach my $at (@{$info->{anchor_text}}) {
310     if (length $at) {
311     $entry = 0;
312     last;
313     }
314     }
315 felicity 155151 }
316 felicity 157209 elsif ($info->{types}->{form}) {
317     $entry = 1;
318     }
319     elsif ($info->{types}->{img}) {
320     $entry = 2;
321     }
322 felicity 169570 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
323 felicity 169564 $entry = 4;
324     }
325 felicity 155151
326 felicity 170124 # take the usable domains and add to the ordered list
327     foreach ( keys %{ $info->{domains} } ) {
328     if (exists $skip_domains->{$_}) {
329     dbg("uridnsbl: domain $_ in skip list");
330     next;
331     }
332     $uri_ordered[$entry]->{$_} = 1;
333     }
334 felicity 155151 }
335    
336 felicity 170124 # at this point, @uri_ordered is an ordered array of uri hashes
337 felicity 155151
338 mmartinec 571893 my %domlist;
339 felicity 439873 my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
340     while (keys %domlist < $umd && @uri_ordered) {
341 felicity 155151 my $array = shift @uri_ordered;
342 felicity 157209 next unless $array;
343    
344 felicity 170124 # run through and find the new domains in this grouping
345     my @domains = grep(!$domlist{$_}, keys %{$array});
346     next unless @domains;
347 felicity 155151
348     # the new domains are all useful, just add them in
349 felicity 439873 if (keys(%domlist) + @domains <= $umd) {
350 felicity 170124 foreach (@domains) {
351 felicity 155151 $domlist{$_} = 1;
352 felicity 47380 }
353 felicity 155151 }
354     else {
355     # trim down to a limited number - pick randomly
356     my $i;
357 felicity 439873 while (@domains && keys %domlist < $umd) {
358 felicity 170124 my $r = int rand (scalar @domains);
359     $domlist{splice (@domains, $r, 1)} = 1;
360 felicity 47380 }
361     }
362 jm 6898 }
363    
364     # and query
365 felicity 155151 dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
366     foreach my $dom (keys %domlist) {
367 felicity 439873 $self->query_domain ($scanner, $dom);
368 jm 6898 }
369    
370     return 1;
371     }
372    
373 felicity 56728 sub set_config {
374     my($self, $conf) = @_;
375 mmartinec 571893 my @cmds;
376 jm 6898
377 felicity 56728 push(@cmds, {
378     setting => 'uridnsbl_max_domains',
379 dos 527740 is_admin => 1,
380 felicity 56728 default => 20,
381     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
382     });
383 jm 6898
384 felicity 56728 push (@cmds, {
385     setting => 'uridnsbl',
386 dos 527740 is_priv => 1,
387 felicity 56728 code => sub {
388     my ($self, $key, $value, $line) = @_;
389     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
390     my $rulename = $1;
391     my $zone = $2;
392     my $type = $3;
393     $self->{uridnsbls}->{$rulename} = {
394     zone => $zone, type => $type,
395     is_rhsbl => 0
396     };
397     }
398 dos 178359 elsif ($value =~ /^$/) {
399     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
400     }
401     else {
402     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
403     }
404 jm 6898 }
405 felicity 56728 });
406 felicity 55981
407 felicity 56728 push (@cmds, {
408 jm 646805 setting => 'uridnssub',
409     is_priv => 1,
410     code => sub {
411     my ($self, $key, $value, $line) = @_;
412     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
413     my $rulename = $1;
414     my $zone = $2;
415     my $type = $3;
416     my $subrule = $4;
417     $self->{uridnsbls}->{$rulename} = {
418     zone => $zone, type => $type,
419     is_rhsbl => 0, is_subrule => 1
420     };
421     $self->{uridnsbl_subs}->{$zone} ||= { };
422     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
423     }
424     elsif ($value =~ /^$/) {
425     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
426     }
427     else {
428     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
429     }
430     }
431     });
432    
433     push (@cmds, {
434 felicity 56728 setting => 'urirhsbl',
435 dos 527740 is_priv => 1,
436 felicity 56728 code => sub {
437     my ($self, $key, $value, $line) = @_;
438     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
439     my $rulename = $1;
440     my $zone = $2;
441     my $type = $3;
442     $self->{uridnsbls}->{$rulename} = {
443     zone => $zone, type => $type,
444     is_rhsbl => 1
445     };
446     }
447 dos 178359 elsif ($value =~ /^$/) {
448     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
449     }
450     else {
451     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
452     }
453 jm 9881 }
454 felicity 56728 });
455 felicity 55981
456 felicity 56728 push (@cmds, {
457     setting => 'urirhssub',
458 dos 527740 is_priv => 1,
459 felicity 56728 code => sub {
460     my ($self, $key, $value, $line) = @_;
461 sidney 433917 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
462 felicity 56728 my $rulename = $1;
463     my $zone = $2;
464     my $type = $3;
465     my $subrule = $4;
466     $self->{uridnsbls}->{$rulename} = {
467     zone => $zone, type => $type,
468     is_rhsbl => 1, is_subrule => 1
469     };
470     $self->{uridnsbl_subs}->{$zone} ||= { };
471 dos 543724 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
472 felicity 56728 }
473 dos 178359 elsif ($value =~ /^$/) {
474     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
475     }
476     else {
477     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
478     }
479 jm 21406 }
480 felicity 56728 });
481 felicity 55981
482 felicity 56728 push (@cmds, {
483 jm 720211 setting => 'urinsrhsbl',
484     is_priv => 1,
485     code => sub {
486     my ($self, $key, $value, $line) = @_;
487     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
488     my $rulename = $1;
489     my $zone = $2;
490     my $type = $3;
491     $self->{uridnsbls}->{$rulename} = {
492     zone => $zone, type => $type,
493     is_nsrhsbl => 1
494     };
495     }
496     elsif ($value =~ /^$/) {
497     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
498     }
499     else {
500     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
501     }
502     }
503     });
504    
505     push (@cmds, {
506     setting => 'urinsrhssub',
507     is_priv => 1,
508     code => sub {
509     my ($self, $key, $value, $line) = @_;
510     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
511     my $rulename = $1;
512     my $zone = $2;
513     my $type = $3;
514     my $subrule = $4;
515     $self->{uridnsbls}->{$rulename} = {
516     zone => $zone, type => $type,
517     is_nsrhsbl => 1, is_subrule => 1
518     };
519     $self->{uridnsbl_subs}->{$zone} ||= { };
520     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
521     }
522     elsif ($value =~ /^$/) {
523     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
524     }
525     else {
526     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
527     }
528     }
529     });
530    
531     push (@cmds, {
532 jm 728782 setting => 'urifullnsrhsbl',
533     is_priv => 1,
534     code => sub {
535     my ($self, $key, $value, $line) = @_;
536     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
537     my $rulename = $1;
538     my $zone = $2;
539     my $type = $3;
540     $self->{uridnsbls}->{$rulename} = {
541     zone => $zone, type => $type,
542     is_fullnsrhsbl => 1
543     };
544     }
545     elsif ($value =~ /^$/) {
546     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
547     }
548     else {
549     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
550     }
551     }
552     });
553    
554     push (@cmds, {
555     setting => 'urifullnsrhssub',
556     is_priv => 1,
557     code => sub {
558     my ($self, $key, $value, $line) = @_;
559     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
560     my $rulename = $1;
561     my $zone = $2;
562     my $type = $3;
563     my $subrule = $4;
564     $self->{uridnsbls}->{$rulename} = {
565     zone => $zone, type => $type,
566     is_fullnsrhsbl => 1, is_subrule => 1
567     };
568     $self->{uridnsbl_subs}->{$zone} ||= { };
569     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
570     }
571     elsif ($value =~ /^$/) {
572     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
573     }
574     else {
575     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
576     }
577     }
578     });
579    
580     push (@cmds, {
581 felicity 56728 setting => 'uridnsbl_skip_domain',
582     default => {},
583     code => sub {
584     my ($self, $key, $value, $line) = @_;
585 dos 178359 if ($value =~ /^$/) {
586     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
587     }
588 felicity 56728 foreach my $domain (split(/\s+/, $value)) {
589     $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
590     }
591 felicity 47380 }
592 felicity 56728 });
593 felicity 55981
594 jm 527859 # obsolete
595     push(@cmds, {
596     setting => 'uridnsbl_timeout',
597     code => sub {
598     # not a lint_warn(), since it's pretty harmless and we don't want
599     # to break stuff like sa-update
600     warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
601     return 0;
602     }
603     });
604    
605 felicity 56728 $conf->{parser}->register_commands(\@cmds);
606 jm 6898 }
607    
608     # ---------------------------------------------------------------------------
609    
610     sub query_domain {
611 felicity 439873 my ($self, $scanner, $dom) = @_;
612 jm 6898
613 quinlan 51813 #warn "uridnsbl: domain $dom\n";
614     #return;
615 jm 6898
616     $dom = lc $dom;
617 felicity 439873 return if $scanner->{uridnsbl_seen_domain}->{$dom};
618     $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
619 quinlan 160273 $self->log_dns_result("querying domain $dom");
620 jm 6898
621 mmartinec 567684 my $obj = { dom => $dom };
622 jm 6898
623 quinlan 160273 my $single_dnsbl = 0;
624 jm 394351 if ($dom =~ /^\d+\.\d+\.\d+\.\d+$/) {
625 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
626     my $IP_PRIVATE = IP_PRIVATE;
627     # only look up the IP if it is public and valid
628     if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
629 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
630 quinlan 160273 # and check the IP in RHSBLs too
631     if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
632     $dom = "$4.$3.$2.$1";
633     $single_dnsbl = 1;
634     }
635     }
636 jm 9881 }
637 felicity 47380 else {
638 quinlan 160273 $single_dnsbl = 1;
639     }
640    
641 jm 596095 my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
642 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
643 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
644 jm 596095 my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
645    
646 quinlan 160273 if ($single_dnsbl) {
647 jm 9881 # look up the domain in the RHSBL subset
648 jm 596095 foreach my $rulename (keys %{$rhsblrules}) {
649 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
650     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
651 quinlan 160273 $dom, $rulecf->{zone}, $rulecf->{type});
652 jm 394351
653     # see comment below
654 felicity 439873 $scanner->register_async_rule_start($rulename);
655 jm 9881 }
656    
657 jm 596095 # perform NS, A lookups to look up the domain in the non-RHSBL subset,
658     # but only if there are active reverse-IP-URIBL rules
659 jm 721524 if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ &&
660 jm 728782 (scalar keys %{$reviprules} ||
661     scalar keys %{$nsrhsblrules} ||
662     scalar keys %{$fullnsrhsblrules}))
663 jm 721524 {
664 felicity 439873 $self->lookup_domain_ns($scanner, $obj, $dom);
665 quinlan 160273 }
666 jm 6898 }
667 jm 394351
668     # note that these rules are now underway. important: unless the
669     # rule hits, in the current design, these will not be considered
670     # "finished" until harvest_dnsbl_queries() completes
671 jm 596095 foreach my $rulename (keys %{$reviprules}) {
672 felicity 439873 $scanner->register_async_rule_start($rulename);
673 jm 394351 }
674 jm 6898 }
675    
676     # ---------------------------------------------------------------------------
677    
678     sub lookup_domain_ns {
679 felicity 439873 my ($self, $scanner, $obj, $dom) = @_;
680 jm 6898
681     my $key = "NS:".$dom;
682 felicity 439873 return if $scanner->{async}->get_lookup($key);
683 jm 6898
684     # dig $dom ns
685 mmartinec 585292 my $ent = $self->start_lookup($scanner, $dom, 'NS',
686 mmartinec 564636 $self->res_bgsend($scanner, $dom, 'NS', $key),
687     $key);
688 jm 6898 $ent->{obj} = $obj;
689     }
690    
691     sub complete_ns_lookup {
692 felicity 439873 my ($self, $scanner, $ent, $dom) = @_;
693 jm 7002
694 jm 161157 my $packet = $ent->{response_packet};
695 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
696 jm 6898
697 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
698     my $IP_PRIVATE = IP_PRIVATE;
699 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
700 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
701 quinlan 160273
702 jm 6898 foreach my $rr (@answer) {
703     my $str = $rr->string;
704     next unless (defined($str) && defined($dom));
705     $self->log_dns_result ("NSs for $dom: $str");
706    
707     if ($str =~ /IN\s+NS\s+(\S+)/) {
708 quinlan 160273 my $nsmatch = $1;
709 jm 720211 my $nsrhblstr = $nsmatch;
710 jm 728782 my $fullnsrhblstr = $nsmatch;
711     $fullnsrhblstr =~ s/\.$//;
712 quinlan 160273
713     if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+\.?$/) {
714     $nsmatch =~ s/\.$//;
715     # only look up the IP if it is public and valid
716     if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
717 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
718 quinlan 160273 }
719 jm 720211 $nsrhblstr = $nsmatch;
720 quinlan 160273 }
721     else {
722 felicity 439873 $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
723 jm 720211 $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
724 quinlan 160273 }
725 jm 720211
726     foreach my $rulename (keys %{$nsrhsblrules}) {
727     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
728     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
729     $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
730    
731     $scanner->register_async_rule_start($rulename);
732     }
733 jm 728782
734     foreach my $rulename (keys %{$fullnsrhsblrules}) {
735     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
736     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
737     $fullnsrhblstr, $rulecf->{zone}, $rulecf->{type});
738    
739     $scanner->register_async_rule_start($rulename);
740     }
741 jm 6898 }
742     }
743     }
744    
745     # ---------------------------------------------------------------------------
746    
747     sub lookup_a_record {
748 felicity 439873 my ($self, $scanner, $obj, $hname) = @_;
749 jm 6898
750     my $key = "A:".$hname;
751 felicity 439873 return if $scanner->{async}->get_lookup($key);
752 jm 6898
753     # dig $hname a
754 mmartinec 585292 my $ent = $self->start_lookup($scanner, $hname, 'A',
755 mmartinec 564636 $self->res_bgsend($scanner, $hname, 'A', $key),
756     $key);
757 jm 6898 $ent->{obj} = $obj;
758     }
759    
760     sub complete_a_lookup {
761 felicity 439873 my ($self, $scanner, $ent, $hname) = @_;
762 jm 6898
763 mmartinec 582903 my $packet = $ent->{response_packet};
764     my @answer = !defined $packet ? () : $packet->answer;
765     foreach my $rr (@answer) {
766 jm 6898 my $str = $rr->string;
767     $self->log_dns_result ("A for NS $hname: $str");
768    
769     if ($str =~ /IN\s+A\s+(\S+)/) {
770 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
771 jm 6898 }
772     }
773     }
774    
775     # ---------------------------------------------------------------------------
776    
777     sub lookup_dnsbl_for_ip {
778 felicity 439873 my ($self, $scanner, $obj, $ip) = @_;
779 jm 6898
780     $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
781     my $revip = "$4.$3.$2.$1";
782    
783 felicity 439873 my $cf = $scanner->{uridnsbl_active_rules_revipbl};
784 jm 6898 foreach my $rulename (keys %{$cf}) {
785 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
786     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
787 quinlan 160273 $revip, $rulecf->{zone}, $rulecf->{type});
788 jm 6898 }
789     }
790    
791     sub lookup_single_dnsbl {
792 felicity 439873 my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
793 jm 6898
794 jm 9881 my $key = "DNSBL:".$dnsbl.":".$lookupstr;
795 felicity 439873 return if $scanner->{async}->get_lookup($key);
796 jm 9881 my $item = $lookupstr.".".$dnsbl;
797 jm 6898
798     # dig $ip txt
799 mmartinec 585292 my $ent = $self->start_lookup($scanner, $item, 'DNSBL',
800 mmartinec 564636 $self->res_bgsend($scanner, $item, $qtype, $key),
801     $key);
802 jm 6898 $ent->{obj} = $obj;
803     $ent->{rulename} = $rulename;
804 jm 21406 $ent->{zone} = $dnsbl;
805 jm 6898 }
806    
807     sub complete_dnsbl_lookup {
808 felicity 439873 my ($self, $scanner, $ent, $dnsblip) = @_;
809 jm 6898
810 felicity 439873 my $conf = $scanner->{conf};
811 mmartinec 571893 my @subtests;
812 jm 21406 my $rulename = $ent->{rulename};
813     my $rulecf = $conf->{uridnsbls}->{$rulename};
814    
815 jm 161157 my $packet = $ent->{response_packet};
816 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
817 jm 161157
818 quinlan 125719 my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}};
819 jm 21406 foreach my $rr (@answer)
820     {
821     next if ($rr->type ne 'A' && $rr->type ne 'TXT');
822    
823     my $rdatastr = $rr->rdatastr;
824 jm 6898 my $dom = $ent->{obj}->{dom};
825    
826 jm 21406 if (!$rulecf->{is_subrule}) {
827     # this zone is a simple rule, not a set of subrules
828 quinlan 125719 # skip any A record that isn't on 127/8
829 jm 158887 if ($rr->type eq 'A' && $rr->rdatastr !~ /^127\./) {
830 jm 165017 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
831     $packet->header->id." rr=".$rr->string);
832 quinlan 158898 next;
833 jm 158887 }
834 felicity 439873 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
835 jm 21406 }
836     else {
837     foreach my $subtest (keys (%{$uridnsbl_subs}))
838     {
839     if ($subtest eq $rdatastr) {
840 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
841     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
842     }
843 jm 21406 }
844     # bitmask
845     elsif ($subtest =~ /^\d+$/) {
846 sidney 433917 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
847     Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
848 jm 21406 {
849 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
850     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
851     }
852 jm 21406 }
853     }
854     }
855     }
856 jm 6898 }
857     }
858    
859 jm 21406 sub got_dnsbl_hit {
860 felicity 439873 my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
861 jm 21406
862     $str =~ s/\s+/ /gs; # long whitespace => short
863 quinlan 51813 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
864 jm 21406
865 felicity 439873 if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
866     $scanner->{uridnsbl_hits}->{$rulename} = { };
867 jm 21406 };
868 felicity 439873 $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
869 jm 394348
870 felicity 439873 if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
871 jm 720211 || $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
872 jm 728782 || $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename}
873 felicity 439873 || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
874 jm 394348 {
875     # TODO: this needs to handle multiple domain hits per rule
876 felicity 439873 $scanner->clear_test_state();
877     my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
878     $scanner->test_log ("URIs: $uris");
879     $scanner->got_hit ($rulename, "");
880 jm 394351
881     # note that this rule has completed (since it got at least 1 hit)
882 felicity 439873 $scanner->register_async_rule_finish($rulename);
883 jm 394348 }
884 jm 21406 }
885    
886 jm 6898 # ---------------------------------------------------------------------------
887    
888     sub start_lookup {
889 mmartinec 585292 my ($self, $scanner, $zone, $type, $id, $key) = @_;
890 jm 394348
891 jm 6898 my $ent = {
892 jm 394348 key => $key,
893 mmartinec 585292 zone => $zone, # serves to fetch other per-zone settings
894 jm 394348 type => "URI-".$type,
895     id => $id,
896     completed_callback => sub {
897     my $ent = shift;
898 mmartinec 580055 if (defined $ent->{response_packet}) { # not aborted or empty
899     $self->completed_lookup_callback ($scanner, $ent);
900     }
901 jm 394348 }
902 jm 6898 };
903 felicity 439873 $scanner->{async}->start_lookup($ent);
904 jm 394348 return $ent;
905 jm 6898 }
906    
907 jm 394348 sub completed_lookup_callback {
908 felicity 439873 my ($self, $scanner, $ent) = @_;
909 jm 394348 my $type = $ent->{type};
910     my $key = $ent->{key};
911     $key =~ /:(\S+?)$/; my $val = $1;
912 jm 6898
913 jm 394348 if ($type eq 'URI-NS') {
914 felicity 439873 $self->complete_ns_lookup ($scanner, $ent, $val);
915 jm 6898 }
916 jm 394348 elsif ($type eq 'URI-A') {
917 felicity 439873 $self->complete_a_lookup ($scanner, $ent, $val);
918 jm 6898 }
919 jm 394348 elsif ($type eq 'URI-DNSBL') {
920 felicity 439873 $self->complete_dnsbl_lookup ($scanner, $ent, $val);
921 jm 6898 }
922     }
923    
924     # ---------------------------------------------------------------------------
925    
926 jm 161148 sub res_bgsend {
927 mmartinec 564636 my ($self, $scanner, $host, $type, $key) = @_;
928 jm 161778
929     return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
930 mmartinec 564636 my ($pkt, $id, $timestamp) = @_;
931     $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp);
932 jm 161778 });
933 jm 161148 }
934    
935 jm 6898 sub log_dns_result {
936 felicity 57557 #my $self = shift;
937 quinlan 51813 #Mail::SpamAssassin::dbg("uridnsbl: ".join (' ', @_));
938 jm 6898 }
939    
940     # ---------------------------------------------------------------------------
941    
942     1;

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2