/[Apache-SVN]/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
ViewVC logotype

Contents of /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 721524 - (hide annotations)
Fri Nov 28 16:02:30 2008 UTC (11 months, 3 weeks ago) by jm
File size: 25492 byte(s)
bug 6020: if no URIBLs are already defined, no NSRHSBL lookup would happen. fix
1 quinlan 149177 # <@LICENSE>
2 felicity 431796 # Licensed to the Apache Software Foundation (ASF) under one or more
3     # contributor license agreements. See the NOTICE file distributed with
4     # this work for additional information regarding copyright ownership.
5     # The ASF licenses this file to you under the Apache License, Version 2.0
6     # (the "License"); you may not use this file except in compliance with
7     # the License. You may obtain a copy of the License at:
8 quinlan 149177 #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     # </@LICENSE>
17    
18 jm 6898 =head1 NAME
19    
20     URIDNSBL - look up URLs against DNS blocklists
21    
22 quinlan 149177 =head1 SYNOPSIS
23    
24     loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26    
27     =head1 DESCRIPTION
28    
29 jm 6898 This works by analysing message text and HTML for URLs, extracting the
30     domain names from those, querying their NS records in DNS, resolving
31     the hostnames used therein, and querying various DNS blocklists for
32     those IP addresses. This is quite effective.
33    
34 dos 527740 =head1 USER SETTINGS
35 jm 6898
36     =over 4
37    
38 dos 527740 =item uridnsbl_skip_domain domain1 domain2 ...
39    
40     Specify a domain, or a number of domains, which should be skipped for the
41     URIBL checks. This is very useful to specify very common domains which are
42     not going to be listed in URIBLs.
43    
44     =back
45    
46     =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
47    
48     =over 4
49    
50 jm 9881 =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
51 jm 6898
52     Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
53     used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
54     is the type of lookup (B<TXT> or B<A>). Note that you must also
55 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
56 jm 6898
57     Example:
58    
59     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
60 felicity 54021 body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
61 jm 6898 describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
62    
63 jm 646805 =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
64    
65     Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
66     name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
67     and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
68    
69     C<subtest> is the sub-test to run against the returned data. The sub-test may
70     either be an IPv4 dotted address for DNSBLs that return multiple A records or a
71     non-negative decimal number to specify a bitmask for DNSBLs that return a
72     single A record containing a bitmask of results.
73    
74     Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
75     C<check_uridnsbl()> to use this.
76    
77     Example:
78    
79     uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
80     uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
81    
82 jm 9881 =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
83    
84     Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
85     to be used, C<rhsbl_zone> is the zone to look up domain names in, and
86     C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
87 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
88 jm 9881
89     An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
90 quinlan 160276 URI using the domain C<foo.com> will cause a lookup of
91     C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
92     used in the URIBL lookup, so the domain C<foo.bar.com> will look up
93     C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
94     C<bar.co.uk.uriblzone.net>.
95 jm 9881
96 mmartinec 565567 If an URI consists of an IP address instead of a hostname, the IP address is
97     looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
98 quinlan 160276
99 jm 21406 Example:
100    
101     urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
102    
103     =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
104    
105     Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
106     name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
107     in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
108    
109     C<subtest> is the sub-test to run against the returned data. The sub-test may
110 sidney 433917 either be an IPv4 dotted address for RHSBLs that return multiple A records or a
111 jm 21406 non-negative decimal number to specify a bitmask for RHSBLs that return a
112 sidney 433917 single A record containing a bitmask of results.
113 jm 21406
114 felicity 54021 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
115 jm 21406 C<check_uridnsbl()> to use this.
116    
117     Example:
118    
119     urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
120     urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
121    
122 jm 720211 =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
123    
124     Perform a RHSBL-style domain lookup against the contents of the NS records
125     for each URI. In other words, a URI using the domain C<foo.com> will cause
126     an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
127     that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
128     are stripped from both the domain used in the URI, and the domain in the
129     lookup.
130    
131     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
132     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
133     B<A>).
134    
135     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
136     C<check_uridnsbl()> to use this.
137    
138     =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
139    
140     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
141     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
142     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
143     B<A>). C<subtest> is the sub-test to run against the returned data; see
144     <urirhssub>.
145    
146     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
147     C<check_uridnsbl()> to use this.
148    
149 dos 527740 =back
150    
151     =head1 ADMINISTRATOR SETTINGS
152    
153     =over 4
154    
155 jm 6898 =item uridnsbl_max_domains N (default: 20)
156    
157     The maximum number of domains to look up.
158    
159     =back
160    
161 dos 527786 =head1 NOTES
162    
163     The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
164     option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
165    
166 jm 6898 =cut
167    
168     package Mail::SpamAssassin::Plugin::URIDNSBL;
169    
170     use Mail::SpamAssassin::Plugin;
171 quinlan 160273 use Mail::SpamAssassin::Constants qw(:ip);
172 quinlan 7001 use Mail::SpamAssassin::Util;
173 jm 720211 use Mail::SpamAssassin::Util::RegistrarBoundaries;
174 quinlan 162095 use Mail::SpamAssassin::Logger;
175 jm 6898 use strict;
176 quinlan 55260 use warnings;
177 jm 6898 use bytes;
178 mmartinec 574664 use re 'taint';
179 jm 6898
180     use vars qw(@ISA);
181     @ISA = qw(Mail::SpamAssassin::Plugin);
182    
183     use constant LOG_COMPLETION_TIMES => 0;
184    
185     # constructor
186     sub new {
187     my $class = shift;
188     my $samain = shift;
189    
190     # some boilerplate...
191     $class = ref($class) || $class;
192     my $self = $class->SUPER::new($samain);
193     bless ($self, $class);
194    
195 jm 161778 # this can be effectively global, at least in each process, safely
196 jm 6898
197 jm 161778 $self->{finished} = { };
198 jm 6898
199     $self->register_eval_rule ("check_uridnsbl");
200 felicity 56728 $self->set_config($samain->{conf});
201 jm 6898
202     return $self;
203     }
204    
205     # this is just a placeholder; in fact the results are dealt with later
206     sub check_uridnsbl {
207     return 0;
208     }
209    
210     # ---------------------------------------------------------------------------
211    
212     # once the metadata is parsed, we can access the URI list. So start off
213     # the lookups here!
214     sub parsed_metadata {
215     my ($self, $opts) = @_;
216     my $scanner = $opts->{permsgstatus};
217    
218 jm 161778 if (!$scanner->is_dns_available()) {
219 jm 6898 $self->{dns_not_available} = 1;
220     return;
221 dos 518664 } else {
222     # due to re-testing dns may become available after being unavailable
223     # DOS: I don't think dns_not_available is even used anymore
224     $self->{dns_not_available} = 0;
225 jm 6898 }
226    
227 felicity 439873 $scanner->{'uridnsbl_activerules'} = { };
228     $scanner->{'uridnsbl_hits'} = { };
229     $scanner->{'uridnsbl_seen_domain'} = { };
230 jm 6898
231 parker 9571 # only hit DNSBLs for active rules (defined and score != 0)
232 felicity 439873 $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
233 jm 720211 $scanner->{'uridnsbl_active_rules_nsrhsbl'} = { };
234 felicity 439873 $scanner->{'uridnsbl_active_rules_revipbl'} = { };
235    
236 jm 6898 foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
237 felicity 47438 next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
238 jm 9881
239 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
240 jm 9881 if ($rulecf->{is_rhsbl}) {
241 felicity 439873 $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
242 jm 720211 } elsif ($rulecf->{is_nsrhsbl}) {
243     $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
244 jm 9881 } else {
245 felicity 439873 $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
246 jm 9881 }
247 jm 6898 }
248    
249     # get all domains in message
250 felicity 155151
251 felicity 170124 # don't keep dereferencing this
252     my $skip_domains = $scanner->{main}->{conf}->{uridnsbl_skip_domains};
253    
254 dos 573117 # list of hashes to use in order
255 mmartinec 571893 my @uri_ordered;
256 felicity 155151
257     # Generate the full list of html-parsed domains.
258 felicity 169564 my $uris = $scanner->get_uri_detail_list();
259 felicity 155151
260 felicity 157209 # go from uri => info to uri_ordered
261     # 0: a
262     # 1: form
263     # 2: img
264     # 3: !a_empty
265     # 4: parsed
266     # 5: a_empty
267 felicity 169564 while (my($uri, $info) = each %{$uris}) {
268 felicity 170124 # we want to skip mailto: uris
269     next if ($uri =~ /^mailto:/);
270    
271     # no domains were found via this uri, so skip
272     next unless ($info->{domains});
273    
274 felicity 157209 my $entry = 3;
275    
276     if ($info->{types}->{a}) {
277     $entry = 5;
278    
279     # determine a vs a_empty
280     foreach my $at (@{$info->{anchor_text}}) {
281     if (length $at) {
282     $entry = 0;
283     last;
284     }
285     }
286 felicity 155151 }
287 felicity 157209 elsif ($info->{types}->{form}) {
288     $entry = 1;
289     }
290     elsif ($info->{types}->{img}) {
291     $entry = 2;
292     }
293 felicity 169570 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
294 felicity 169564 $entry = 4;
295     }
296 felicity 155151
297 felicity 170124 # take the usable domains and add to the ordered list
298     foreach ( keys %{ $info->{domains} } ) {
299     if (exists $skip_domains->{$_}) {
300     dbg("uridnsbl: domain $_ in skip list");
301     next;
302     }
303     $uri_ordered[$entry]->{$_} = 1;
304     }
305 felicity 155151 }
306    
307 felicity 170124 # at this point, @uri_ordered is an ordered array of uri hashes
308 felicity 155151
309 mmartinec 571893 my %domlist;
310 felicity 439873 my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
311     while (keys %domlist < $umd && @uri_ordered) {
312 felicity 155151 my $array = shift @uri_ordered;
313 felicity 157209 next unless $array;
314    
315 felicity 170124 # run through and find the new domains in this grouping
316     my @domains = grep(!$domlist{$_}, keys %{$array});
317     next unless @domains;
318 felicity 155151
319     # the new domains are all useful, just add them in
320 felicity 439873 if (keys(%domlist) + @domains <= $umd) {
321 felicity 170124 foreach (@domains) {
322 felicity 155151 $domlist{$_} = 1;
323 felicity 47380 }
324 felicity 155151 }
325     else {
326     # trim down to a limited number - pick randomly
327     my $i;
328 felicity 439873 while (@domains && keys %domlist < $umd) {
329 felicity 170124 my $r = int rand (scalar @domains);
330     $domlist{splice (@domains, $r, 1)} = 1;
331 felicity 47380 }
332     }
333 jm 6898 }
334    
335     # and query
336 felicity 155151 dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
337     foreach my $dom (keys %domlist) {
338 felicity 439873 $self->query_domain ($scanner, $dom);
339 jm 6898 }
340    
341     return 1;
342     }
343    
344 felicity 56728 sub set_config {
345     my($self, $conf) = @_;
346 mmartinec 571893 my @cmds;
347 jm 6898
348 felicity 56728 push(@cmds, {
349     setting => 'uridnsbl_max_domains',
350 dos 527740 is_admin => 1,
351 felicity 56728 default => 20,
352     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
353     });
354 jm 6898
355 felicity 56728 push (@cmds, {
356     setting => 'uridnsbl',
357 dos 527740 is_priv => 1,
358 felicity 56728 code => sub {
359     my ($self, $key, $value, $line) = @_;
360     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
361     my $rulename = $1;
362     my $zone = $2;
363     my $type = $3;
364     $self->{uridnsbls}->{$rulename} = {
365     zone => $zone, type => $type,
366     is_rhsbl => 0
367     };
368     }
369 dos 178359 elsif ($value =~ /^$/) {
370     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
371     }
372     else {
373     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
374     }
375 jm 6898 }
376 felicity 56728 });
377 felicity 55981
378 felicity 56728 push (@cmds, {
379 jm 646805 setting => 'uridnssub',
380     is_priv => 1,
381     code => sub {
382     my ($self, $key, $value, $line) = @_;
383     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
384     my $rulename = $1;
385     my $zone = $2;
386     my $type = $3;
387     my $subrule = $4;
388     $self->{uridnsbls}->{$rulename} = {
389     zone => $zone, type => $type,
390     is_rhsbl => 0, is_subrule => 1
391     };
392     $self->{uridnsbl_subs}->{$zone} ||= { };
393     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
394     }
395     elsif ($value =~ /^$/) {
396     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
397     }
398     else {
399     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
400     }
401     }
402     });
403    
404     push (@cmds, {
405 felicity 56728 setting => 'urirhsbl',
406 dos 527740 is_priv => 1,
407 felicity 56728 code => sub {
408     my ($self, $key, $value, $line) = @_;
409     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
410     my $rulename = $1;
411     my $zone = $2;
412     my $type = $3;
413     $self->{uridnsbls}->{$rulename} = {
414     zone => $zone, type => $type,
415     is_rhsbl => 1
416     };
417     }
418 dos 178359 elsif ($value =~ /^$/) {
419     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
420     }
421     else {
422     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
423     }
424 jm 9881 }
425 felicity 56728 });
426 felicity 55981
427 felicity 56728 push (@cmds, {
428     setting => 'urirhssub',
429 dos 527740 is_priv => 1,
430 felicity 56728 code => sub {
431     my ($self, $key, $value, $line) = @_;
432 sidney 433917 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
433 felicity 56728 my $rulename = $1;
434     my $zone = $2;
435     my $type = $3;
436     my $subrule = $4;
437     $self->{uridnsbls}->{$rulename} = {
438     zone => $zone, type => $type,
439     is_rhsbl => 1, is_subrule => 1
440     };
441     $self->{uridnsbl_subs}->{$zone} ||= { };
442 dos 543724 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
443 felicity 56728 }
444 dos 178359 elsif ($value =~ /^$/) {
445     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
446     }
447     else {
448     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
449     }
450 jm 21406 }
451 felicity 56728 });
452 felicity 55981
453 felicity 56728 push (@cmds, {
454 jm 720211 setting => 'urinsrhsbl',
455     is_priv => 1,
456     code => sub {
457     my ($self, $key, $value, $line) = @_;
458     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
459     my $rulename = $1;
460     my $zone = $2;
461     my $type = $3;
462     $self->{uridnsbls}->{$rulename} = {
463     zone => $zone, type => $type,
464     is_nsrhsbl => 1
465     };
466     }
467     elsif ($value =~ /^$/) {
468     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
469     }
470     else {
471     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
472     }
473     }
474     });
475    
476     push (@cmds, {
477     setting => 'urinsrhssub',
478     is_priv => 1,
479     code => sub {
480     my ($self, $key, $value, $line) = @_;
481     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
482     my $rulename = $1;
483     my $zone = $2;
484     my $type = $3;
485     my $subrule = $4;
486     $self->{uridnsbls}->{$rulename} = {
487     zone => $zone, type => $type,
488     is_nsrhsbl => 1, is_subrule => 1
489     };
490     $self->{uridnsbl_subs}->{$zone} ||= { };
491     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
492     }
493     elsif ($value =~ /^$/) {
494     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
495     }
496     else {
497     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
498     }
499     }
500     });
501    
502     push (@cmds, {
503 felicity 56728 setting => 'uridnsbl_skip_domain',
504     default => {},
505     code => sub {
506     my ($self, $key, $value, $line) = @_;
507 dos 178359 if ($value =~ /^$/) {
508     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
509     }
510 felicity 56728 foreach my $domain (split(/\s+/, $value)) {
511     $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
512     }
513 felicity 47380 }
514 felicity 56728 });
515 felicity 55981
516 jm 527859 # obsolete
517     push(@cmds, {
518     setting => 'uridnsbl_timeout',
519     code => sub {
520     # not a lint_warn(), since it's pretty harmless and we don't want
521     # to break stuff like sa-update
522     warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
523     return 0;
524     }
525     });
526    
527 felicity 56728 $conf->{parser}->register_commands(\@cmds);
528 jm 6898 }
529    
530     # ---------------------------------------------------------------------------
531    
532     sub query_domain {
533 felicity 439873 my ($self, $scanner, $dom) = @_;
534 jm 6898
535 quinlan 51813 #warn "uridnsbl: domain $dom\n";
536     #return;
537 jm 6898
538     $dom = lc $dom;
539 felicity 439873 return if $scanner->{uridnsbl_seen_domain}->{$dom};
540     $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
541 quinlan 160273 $self->log_dns_result("querying domain $dom");
542 jm 6898
543 mmartinec 567684 my $obj = { dom => $dom };
544 jm 6898
545 quinlan 160273 my $single_dnsbl = 0;
546 jm 394351 if ($dom =~ /^\d+\.\d+\.\d+\.\d+$/) {
547 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
548     my $IP_PRIVATE = IP_PRIVATE;
549     # only look up the IP if it is public and valid
550     if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
551 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
552 quinlan 160273 # and check the IP in RHSBLs too
553     if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
554     $dom = "$4.$3.$2.$1";
555     $single_dnsbl = 1;
556     }
557     }
558 jm 9881 }
559 felicity 47380 else {
560 quinlan 160273 $single_dnsbl = 1;
561     }
562    
563 jm 596095 my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
564 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
565 jm 596095 my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
566    
567 quinlan 160273 if ($single_dnsbl) {
568 jm 9881 # look up the domain in the RHSBL subset
569 jm 596095 foreach my $rulename (keys %{$rhsblrules}) {
570 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
571     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
572 quinlan 160273 $dom, $rulecf->{zone}, $rulecf->{type});
573 jm 394351
574     # see comment below
575 felicity 439873 $scanner->register_async_rule_start($rulename);
576 jm 9881 }
577    
578 jm 596095 # perform NS, A lookups to look up the domain in the non-RHSBL subset,
579     # but only if there are active reverse-IP-URIBL rules
580 jm 721524 if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ &&
581     (scalar keys %{$reviprules} || scalar keys %{$nsrhsblrules}))
582     {
583 felicity 439873 $self->lookup_domain_ns($scanner, $obj, $dom);
584 quinlan 160273 }
585 jm 6898 }
586 jm 394351
587     # note that these rules are now underway. important: unless the
588     # rule hits, in the current design, these will not be considered
589     # "finished" until harvest_dnsbl_queries() completes
590 jm 596095 foreach my $rulename (keys %{$reviprules}) {
591 felicity 439873 $scanner->register_async_rule_start($rulename);
592 jm 394351 }
593 jm 6898 }
594    
595     # ---------------------------------------------------------------------------
596    
597     sub lookup_domain_ns {
598 felicity 439873 my ($self, $scanner, $obj, $dom) = @_;
599 jm 6898
600     my $key = "NS:".$dom;
601 felicity 439873 return if $scanner->{async}->get_lookup($key);
602 jm 6898
603     # dig $dom ns
604 mmartinec 585292 my $ent = $self->start_lookup($scanner, $dom, 'NS',
605 mmartinec 564636 $self->res_bgsend($scanner, $dom, 'NS', $key),
606     $key);
607 jm 6898 $ent->{obj} = $obj;
608     }
609    
610     sub complete_ns_lookup {
611 felicity 439873 my ($self, $scanner, $ent, $dom) = @_;
612 jm 7002
613 jm 161157 my $packet = $ent->{response_packet};
614 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
615 jm 6898
616 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
617     my $IP_PRIVATE = IP_PRIVATE;
618 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
619 quinlan 160273
620 jm 6898 foreach my $rr (@answer) {
621     my $str = $rr->string;
622     next unless (defined($str) && defined($dom));
623     $self->log_dns_result ("NSs for $dom: $str");
624    
625     if ($str =~ /IN\s+NS\s+(\S+)/) {
626 quinlan 160273 my $nsmatch = $1;
627 jm 720211 my $nsrhblstr = $nsmatch;
628 quinlan 160273
629     if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+\.?$/) {
630     $nsmatch =~ s/\.$//;
631     # only look up the IP if it is public and valid
632     if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
633 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
634 quinlan 160273 }
635 jm 720211 $nsrhblstr = $nsmatch;
636 quinlan 160273 }
637     else {
638 felicity 439873 $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
639 jm 720211 $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
640 quinlan 160273 }
641 jm 720211
642     foreach my $rulename (keys %{$nsrhsblrules}) {
643     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
644     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
645     $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
646    
647     $scanner->register_async_rule_start($rulename);
648     }
649 jm 6898 }
650     }
651     }
652    
653     # ---------------------------------------------------------------------------
654    
655     sub lookup_a_record {
656 felicity 439873 my ($self, $scanner, $obj, $hname) = @_;
657 jm 6898
658     my $key = "A:".$hname;
659 felicity 439873 return if $scanner->{async}->get_lookup($key);
660 jm 6898
661     # dig $hname a
662 mmartinec 585292 my $ent = $self->start_lookup($scanner, $hname, 'A',
663 mmartinec 564636 $self->res_bgsend($scanner, $hname, 'A', $key),
664     $key);
665 jm 6898 $ent->{obj} = $obj;
666     }
667    
668     sub complete_a_lookup {
669 felicity 439873 my ($self, $scanner, $ent, $hname) = @_;
670 jm 6898
671 mmartinec 582903 my $packet = $ent->{response_packet};
672     my @answer = !defined $packet ? () : $packet->answer;
673     foreach my $rr (@answer) {
674 jm 6898 my $str = $rr->string;
675     $self->log_dns_result ("A for NS $hname: $str");
676    
677     if ($str =~ /IN\s+A\s+(\S+)/) {
678 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
679 jm 6898 }
680     }
681     }
682    
683     # ---------------------------------------------------------------------------
684    
685     sub lookup_dnsbl_for_ip {
686 felicity 439873 my ($self, $scanner, $obj, $ip) = @_;
687 jm 6898
688     $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
689     my $revip = "$4.$3.$2.$1";
690    
691 felicity 439873 my $cf = $scanner->{uridnsbl_active_rules_revipbl};
692 jm 6898 foreach my $rulename (keys %{$cf}) {
693 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
694     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
695 quinlan 160273 $revip, $rulecf->{zone}, $rulecf->{type});
696 jm 6898 }
697     }
698    
699     sub lookup_single_dnsbl {
700 felicity 439873 my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
701 jm 6898
702 jm 9881 my $key = "DNSBL:".$dnsbl.":".$lookupstr;
703 felicity 439873 return if $scanner->{async}->get_lookup($key);
704 jm 9881 my $item = $lookupstr.".".$dnsbl;
705 jm 6898
706     # dig $ip txt
707 mmartinec 585292 my $ent = $self->start_lookup($scanner, $item, 'DNSBL',
708 mmartinec 564636 $self->res_bgsend($scanner, $item, $qtype, $key),
709     $key);
710 jm 6898 $ent->{obj} = $obj;
711     $ent->{rulename} = $rulename;
712 jm 21406 $ent->{zone} = $dnsbl;
713 jm 6898 }
714    
715     sub complete_dnsbl_lookup {
716 felicity 439873 my ($self, $scanner, $ent, $dnsblip) = @_;
717 jm 6898
718 felicity 439873 my $conf = $scanner->{conf};
719 mmartinec 571893 my @subtests;
720 jm 21406 my $rulename = $ent->{rulename};
721     my $rulecf = $conf->{uridnsbls}->{$rulename};
722    
723 jm 161157 my $packet = $ent->{response_packet};
724 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
725 jm 161157
726 quinlan 125719 my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}};
727 jm 21406 foreach my $rr (@answer)
728     {
729     next if ($rr->type ne 'A' && $rr->type ne 'TXT');
730    
731     my $rdatastr = $rr->rdatastr;
732 jm 6898 my $dom = $ent->{obj}->{dom};
733    
734 jm 21406 if (!$rulecf->{is_subrule}) {
735     # this zone is a simple rule, not a set of subrules
736 quinlan 125719 # skip any A record that isn't on 127/8
737 jm 158887 if ($rr->type eq 'A' && $rr->rdatastr !~ /^127\./) {
738 jm 165017 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
739     $packet->header->id." rr=".$rr->string);
740 quinlan 158898 next;
741 jm 158887 }
742 felicity 439873 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
743 jm 21406 }
744     else {
745     foreach my $subtest (keys (%{$uridnsbl_subs}))
746     {
747     if ($subtest eq $rdatastr) {
748 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
749     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
750     }
751 jm 21406 }
752     # bitmask
753     elsif ($subtest =~ /^\d+$/) {
754 sidney 433917 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
755     Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
756 jm 21406 {
757 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
758     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
759     }
760 jm 21406 }
761     }
762     }
763     }
764 jm 6898 }
765     }
766    
767 jm 21406 sub got_dnsbl_hit {
768 felicity 439873 my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
769 jm 21406
770     $str =~ s/\s+/ /gs; # long whitespace => short
771 quinlan 51813 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
772 jm 21406
773 felicity 439873 if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
774     $scanner->{uridnsbl_hits}->{$rulename} = { };
775 jm 21406 };
776 felicity 439873 $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
777 jm 394348
778 felicity 439873 if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
779 jm 720211 || $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
780 felicity 439873 || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
781 jm 394348 {
782     # TODO: this needs to handle multiple domain hits per rule
783 felicity 439873 $scanner->clear_test_state();
784     my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
785     $scanner->test_log ("URIs: $uris");
786     $scanner->got_hit ($rulename, "");
787 jm 394351
788     # note that this rule has completed (since it got at least 1 hit)
789 felicity 439873 $scanner->register_async_rule_finish($rulename);
790 jm 394348 }
791 jm 21406 }
792    
793 jm 6898 # ---------------------------------------------------------------------------
794    
795     sub start_lookup {
796 mmartinec 585292 my ($self, $scanner, $zone, $type, $id, $key) = @_;
797 jm 394348
798 jm 6898 my $ent = {
799 jm 394348 key => $key,
800 mmartinec 585292 zone => $zone, # serves to fetch other per-zone settings
801 jm 394348 type => "URI-".$type,
802     id => $id,
803     completed_callback => sub {
804     my $ent = shift;
805 mmartinec 580055 if (defined $ent->{response_packet}) { # not aborted or empty
806     $self->completed_lookup_callback ($scanner, $ent);
807     }
808 jm 394348 }
809 jm 6898 };
810 felicity 439873 $scanner->{async}->start_lookup($ent);
811 jm 394348 return $ent;
812 jm 6898 }
813    
814 jm 394348 sub completed_lookup_callback {
815 felicity 439873 my ($self, $scanner, $ent) = @_;
816 jm 394348 my $type = $ent->{type};
817     my $key = $ent->{key};
818     $key =~ /:(\S+?)$/; my $val = $1;
819 jm 6898
820 jm 394348 if ($type eq 'URI-NS') {
821 felicity 439873 $self->complete_ns_lookup ($scanner, $ent, $val);
822 jm 6898 }
823 jm 394348 elsif ($type eq 'URI-A') {
824 felicity 439873 $self->complete_a_lookup ($scanner, $ent, $val);
825 jm 6898 }
826 jm 394348 elsif ($type eq 'URI-DNSBL') {
827 felicity 439873 $self->complete_dnsbl_lookup ($scanner, $ent, $val);
828 jm 6898 }
829     }
830    
831     # ---------------------------------------------------------------------------
832    
833 jm 161148 sub res_bgsend {
834 mmartinec 564636 my ($self, $scanner, $host, $type, $key) = @_;
835 jm 161778
836     return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
837 mmartinec 564636 my ($pkt, $id, $timestamp) = @_;
838     $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp);
839 jm 161778 });
840 jm 161148 }
841    
842 jm 6898 sub log_dns_result {
843 felicity 57557 #my $self = shift;
844 quinlan 51813 #Mail::SpamAssassin::dbg("uridnsbl: ".join (' ', @_));
845 jm 6898 }
846    
847     # ---------------------------------------------------------------------------
848    
849     1;

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2