/[Apache-SVN]/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
ViewVC logotype

Contents of /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 818443 - (hide annotations)
Thu Sep 24 10:57:17 2009 UTC (2 months ago) by jm
File size: 28973 byte(s)
bug 6205: add test to ensure that all config settings are correctly handled when switching between users; add more config setting type metadata to enable those tests to work; and fix URIDetail to store config on the {conf} object, not on the plugin.
1 quinlan 149177 # <@LICENSE>
2 felicity 431796 # Licensed to the Apache Software Foundation (ASF) under one or more
3     # contributor license agreements. See the NOTICE file distributed with
4     # this work for additional information regarding copyright ownership.
5     # The ASF licenses this file to you under the Apache License, Version 2.0
6     # (the "License"); you may not use this file except in compliance with
7     # the License. You may obtain a copy of the License at:
8 quinlan 149177 #
9     # http://www.apache.org/licenses/LICENSE-2.0
10     #
11     # Unless required by applicable law or agreed to in writing, software
12     # distributed under the License is distributed on an "AS IS" BASIS,
13     # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14     # See the License for the specific language governing permissions and
15     # limitations under the License.
16     # </@LICENSE>
17    
18 jm 6898 =head1 NAME
19    
20     URIDNSBL - look up URLs against DNS blocklists
21    
22 quinlan 149177 =head1 SYNOPSIS
23    
24     loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26    
27     =head1 DESCRIPTION
28    
29 jm 6898 This works by analysing message text and HTML for URLs, extracting the
30     domain names from those, querying their NS records in DNS, resolving
31     the hostnames used therein, and querying various DNS blocklists for
32     those IP addresses. This is quite effective.
33    
34 dos 527740 =head1 USER SETTINGS
35 jm 6898
36     =over 4
37    
38 dos 527740 =item uridnsbl_skip_domain domain1 domain2 ...
39    
40     Specify a domain, or a number of domains, which should be skipped for the
41     URIBL checks. This is very useful to specify very common domains which are
42     not going to be listed in URIBLs.
43    
44     =back
45    
46     =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
47    
48     =over 4
49    
50 jm 9881 =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
51 jm 6898
52     Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
53     used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
54     is the type of lookup (B<TXT> or B<A>). Note that you must also
55 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
56 jm 6898
57     Example:
58    
59     uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
60 felicity 54021 body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
61 jm 6898 describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
62    
63 jm 646805 =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
64    
65     Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
66     name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
67     and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
68    
69     C<subtest> is the sub-test to run against the returned data. The sub-test may
70     either be an IPv4 dotted address for DNSBLs that return multiple A records or a
71     non-negative decimal number to specify a bitmask for DNSBLs that return a
72     single A record containing a bitmask of results.
73    
74     Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
75     C<check_uridnsbl()> to use this.
76    
77     Example:
78    
79     uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
80     uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
81    
82 jm 9881 =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
83    
84     Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
85     to be used, C<rhsbl_zone> is the zone to look up domain names in, and
86     C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
87 felicity 54021 define a body-eval rule calling C<check_uridnsbl()> to use this.
88 jm 9881
89     An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
90 quinlan 160276 URI using the domain C<foo.com> will cause a lookup of
91     C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
92     used in the URIBL lookup, so the domain C<foo.bar.com> will look up
93     C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
94     C<bar.co.uk.uriblzone.net>.
95 jm 9881
96 mmartinec 565567 If an URI consists of an IP address instead of a hostname, the IP address is
97     looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
98 quinlan 160276
99 jm 21406 Example:
100    
101     urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
102    
103     =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
104    
105     Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
106     name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
107     in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
108    
109     C<subtest> is the sub-test to run against the returned data. The sub-test may
110 sidney 433917 either be an IPv4 dotted address for RHSBLs that return multiple A records or a
111 jm 21406 non-negative decimal number to specify a bitmask for RHSBLs that return a
112 sidney 433917 single A record containing a bitmask of results.
113 jm 21406
114 felicity 54021 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
115 jm 21406 C<check_uridnsbl()> to use this.
116    
117     Example:
118    
119     urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
120     urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
121    
122 jm 720211 =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
123    
124     Perform a RHSBL-style domain lookup against the contents of the NS records
125     for each URI. In other words, a URI using the domain C<foo.com> will cause
126     an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
127     that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
128     are stripped from both the domain used in the URI, and the domain in the
129     lookup.
130    
131     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
132     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
133     B<A>).
134    
135     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
136     C<check_uridnsbl()> to use this.
137    
138     =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
139    
140     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
141     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
142     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
143     B<A>). C<subtest> is the sub-test to run against the returned data; see
144     <urirhssub>.
145    
146     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
147     C<check_uridnsbl()> to use this.
148    
149 jm 728782 =item urifullnsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
150    
151     Perform a RHSBL-style domain lookup against the contents of the NS records for
152     each URI. In other words, a URI using the domain C<foo.com> will cause an NS
153     lookup to take place; assuming that domain has an NS of C<ns0.bar.com>, that
154     will cause a lookup of C<ns0.bar.com.uriblzone.net>. Note that hostnames are
155     stripped from the domain used in the URI.
156    
157     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
158     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
159     B<A>).
160    
161     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
162     C<check_uridnsbl()> to use this.
163    
164     =item urifullnsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
165    
166     Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
167     C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
168     to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
169     B<A>). C<subtest> is the sub-test to run against the returned data; see
170     <urirhssub>.
171    
172     Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
173     C<check_uridnsbl()> to use this.
174    
175 dos 527740 =back
176    
177     =head1 ADMINISTRATOR SETTINGS
178    
179     =over 4
180    
181 jm 6898 =item uridnsbl_max_domains N (default: 20)
182    
183     The maximum number of domains to look up.
184    
185     =back
186    
187 dos 527786 =head1 NOTES
188    
189     The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
190     option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
191    
192 jm 6898 =cut
193    
194     package Mail::SpamAssassin::Plugin::URIDNSBL;
195    
196     use Mail::SpamAssassin::Plugin;
197 quinlan 160273 use Mail::SpamAssassin::Constants qw(:ip);
198 quinlan 7001 use Mail::SpamAssassin::Util;
199 jm 720211 use Mail::SpamAssassin::Util::RegistrarBoundaries;
200 quinlan 162095 use Mail::SpamAssassin::Logger;
201 jm 6898 use strict;
202 quinlan 55260 use warnings;
203 jm 6898 use bytes;
204 mmartinec 574664 use re 'taint';
205 jm 6898
206     use vars qw(@ISA);
207     @ISA = qw(Mail::SpamAssassin::Plugin);
208    
209     use constant LOG_COMPLETION_TIMES => 0;
210    
211     # constructor
212     sub new {
213     my $class = shift;
214     my $samain = shift;
215    
216     # some boilerplate...
217     $class = ref($class) || $class;
218     my $self = $class->SUPER::new($samain);
219     bless ($self, $class);
220    
221 jm 161778 # this can be effectively global, at least in each process, safely
222 jm 6898
223 jm 161778 $self->{finished} = { };
224 jm 6898
225     $self->register_eval_rule ("check_uridnsbl");
226 felicity 56728 $self->set_config($samain->{conf});
227 jm 6898
228     return $self;
229     }
230    
231     # this is just a placeholder; in fact the results are dealt with later
232     sub check_uridnsbl {
233     return 0;
234     }
235    
236     # ---------------------------------------------------------------------------
237    
238     # once the metadata is parsed, we can access the URI list. So start off
239     # the lookups here!
240     sub parsed_metadata {
241     my ($self, $opts) = @_;
242     my $scanner = $opts->{permsgstatus};
243    
244 jm 161778 if (!$scanner->is_dns_available()) {
245 jm 6898 $self->{dns_not_available} = 1;
246     return;
247 dos 518664 } else {
248     # due to re-testing dns may become available after being unavailable
249     # DOS: I don't think dns_not_available is even used anymore
250     $self->{dns_not_available} = 0;
251 jm 6898 }
252    
253 felicity 439873 $scanner->{'uridnsbl_activerules'} = { };
254     $scanner->{'uridnsbl_hits'} = { };
255     $scanner->{'uridnsbl_seen_domain'} = { };
256 jm 6898
257 parker 9571 # only hit DNSBLs for active rules (defined and score != 0)
258 felicity 439873 $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
259 jm 720211 $scanner->{'uridnsbl_active_rules_nsrhsbl'} = { };
260 jm 728782 $scanner->{'uridnsbl_active_rules_fullnsrhsbl'} = { };
261 felicity 439873 $scanner->{'uridnsbl_active_rules_revipbl'} = { };
262    
263 jm 6898 foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
264 felicity 47438 next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
265 jm 9881
266 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
267 jm 9881 if ($rulecf->{is_rhsbl}) {
268 felicity 439873 $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
269 jm 728782 } elsif ($rulecf->{is_fullnsrhsbl}) {
270     $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename} = 1;
271 jm 720211 } elsif ($rulecf->{is_nsrhsbl}) {
272     $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
273 jm 9881 } else {
274 felicity 439873 $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
275 jm 9881 }
276 jm 6898 }
277    
278     # get all domains in message
279 felicity 155151
280 felicity 170124 # don't keep dereferencing this
281     my $skip_domains = $scanner->{main}->{conf}->{uridnsbl_skip_domains};
282    
283 dos 573117 # list of hashes to use in order
284 mmartinec 571893 my @uri_ordered;
285 felicity 155151
286     # Generate the full list of html-parsed domains.
287 felicity 169564 my $uris = $scanner->get_uri_detail_list();
288 felicity 155151
289 felicity 157209 # go from uri => info to uri_ordered
290     # 0: a
291     # 1: form
292     # 2: img
293     # 3: !a_empty
294     # 4: parsed
295     # 5: a_empty
296 felicity 169564 while (my($uri, $info) = each %{$uris}) {
297 felicity 170124 # we want to skip mailto: uris
298     next if ($uri =~ /^mailto:/);
299    
300     # no domains were found via this uri, so skip
301     next unless ($info->{domains});
302    
303 felicity 157209 my $entry = 3;
304    
305     if ($info->{types}->{a}) {
306     $entry = 5;
307    
308     # determine a vs a_empty
309     foreach my $at (@{$info->{anchor_text}}) {
310     if (length $at) {
311     $entry = 0;
312     last;
313     }
314     }
315 felicity 155151 }
316 felicity 157209 elsif ($info->{types}->{form}) {
317     $entry = 1;
318     }
319     elsif ($info->{types}->{img}) {
320     $entry = 2;
321     }
322 felicity 169570 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
323 felicity 169564 $entry = 4;
324     }
325 felicity 155151
326 felicity 170124 # take the usable domains and add to the ordered list
327     foreach ( keys %{ $info->{domains} } ) {
328     if (exists $skip_domains->{$_}) {
329     dbg("uridnsbl: domain $_ in skip list");
330     next;
331     }
332     $uri_ordered[$entry]->{$_} = 1;
333     }
334 felicity 155151 }
335    
336 felicity 170124 # at this point, @uri_ordered is an ordered array of uri hashes
337 felicity 155151
338 mmartinec 571893 my %domlist;
339 felicity 439873 my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
340     while (keys %domlist < $umd && @uri_ordered) {
341 felicity 155151 my $array = shift @uri_ordered;
342 felicity 157209 next unless $array;
343    
344 felicity 170124 # run through and find the new domains in this grouping
345     my @domains = grep(!$domlist{$_}, keys %{$array});
346     next unless @domains;
347 felicity 155151
348     # the new domains are all useful, just add them in
349 felicity 439873 if (keys(%domlist) + @domains <= $umd) {
350 felicity 170124 foreach (@domains) {
351 felicity 155151 $domlist{$_} = 1;
352 felicity 47380 }
353 felicity 155151 }
354     else {
355     # trim down to a limited number - pick randomly
356     my $i;
357 felicity 439873 while (@domains && keys %domlist < $umd) {
358 felicity 170124 my $r = int rand (scalar @domains);
359     $domlist{splice (@domains, $r, 1)} = 1;
360 felicity 47380 }
361     }
362 jm 6898 }
363    
364     # and query
365 felicity 155151 dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
366     foreach my $dom (keys %domlist) {
367 felicity 439873 $self->query_domain ($scanner, $dom);
368 jm 6898 }
369    
370     return 1;
371     }
372    
373 felicity 56728 sub set_config {
374     my($self, $conf) = @_;
375 mmartinec 571893 my @cmds;
376 jm 6898
377 felicity 56728 push(@cmds, {
378     setting => 'uridnsbl_max_domains',
379 dos 527740 is_admin => 1,
380 felicity 56728 default => 20,
381     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
382     });
383 jm 6898
384 felicity 56728 push (@cmds, {
385     setting => 'uridnsbl',
386 dos 527740 is_priv => 1,
387 felicity 56728 code => sub {
388     my ($self, $key, $value, $line) = @_;
389     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
390     my $rulename = $1;
391     my $zone = $2;
392     my $type = $3;
393     $self->{uridnsbls}->{$rulename} = {
394     zone => $zone, type => $type,
395     is_rhsbl => 0
396     };
397     }
398 dos 178359 elsif ($value =~ /^$/) {
399     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
400     }
401     else {
402     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
403     }
404 jm 6898 }
405 felicity 56728 });
406 felicity 55981
407 felicity 56728 push (@cmds, {
408 jm 646805 setting => 'uridnssub',
409     is_priv => 1,
410     code => sub {
411     my ($self, $key, $value, $line) = @_;
412     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
413     my $rulename = $1;
414     my $zone = $2;
415     my $type = $3;
416     my $subrule = $4;
417     $self->{uridnsbls}->{$rulename} = {
418     zone => $zone, type => $type,
419     is_rhsbl => 0, is_subrule => 1
420     };
421     $self->{uridnsbl_subs}->{$zone} ||= { };
422     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
423     }
424     elsif ($value =~ /^$/) {
425     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
426     }
427     else {
428     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
429     }
430     }
431     });
432    
433     push (@cmds, {
434 felicity 56728 setting => 'urirhsbl',
435 dos 527740 is_priv => 1,
436 felicity 56728 code => sub {
437     my ($self, $key, $value, $line) = @_;
438     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
439     my $rulename = $1;
440     my $zone = $2;
441     my $type = $3;
442     $self->{uridnsbls}->{$rulename} = {
443     zone => $zone, type => $type,
444     is_rhsbl => 1
445     };
446     }
447 dos 178359 elsif ($value =~ /^$/) {
448     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
449     }
450     else {
451     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
452     }
453 jm 9881 }
454 felicity 56728 });
455 felicity 55981
456 felicity 56728 push (@cmds, {
457     setting => 'urirhssub',
458 dos 527740 is_priv => 1,
459 felicity 56728 code => sub {
460     my ($self, $key, $value, $line) = @_;
461 sidney 433917 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
462 felicity 56728 my $rulename = $1;
463     my $zone = $2;
464     my $type = $3;
465     my $subrule = $4;
466     $self->{uridnsbls}->{$rulename} = {
467     zone => $zone, type => $type,
468     is_rhsbl => 1, is_subrule => 1
469     };
470     $self->{uridnsbl_subs}->{$zone} ||= { };
471 dos 543724 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
472 felicity 56728 }
473 dos 178359 elsif ($value =~ /^$/) {
474     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
475     }
476     else {
477     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
478     }
479 jm 21406 }
480 felicity 56728 });
481 felicity 55981
482 felicity 56728 push (@cmds, {
483 jm 720211 setting => 'urinsrhsbl',
484     is_priv => 1,
485     code => sub {
486     my ($self, $key, $value, $line) = @_;
487     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
488     my $rulename = $1;
489     my $zone = $2;
490     my $type = $3;
491     $self->{uridnsbls}->{$rulename} = {
492     zone => $zone, type => $type,
493     is_nsrhsbl => 1
494     };
495     }
496     elsif ($value =~ /^$/) {
497     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
498     }
499     else {
500     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
501     }
502     }
503     });
504    
505     push (@cmds, {
506     setting => 'urinsrhssub',
507     is_priv => 1,
508     code => sub {
509     my ($self, $key, $value, $line) = @_;
510     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
511     my $rulename = $1;
512     my $zone = $2;
513     my $type = $3;
514     my $subrule = $4;
515     $self->{uridnsbls}->{$rulename} = {
516     zone => $zone, type => $type,
517     is_nsrhsbl => 1, is_subrule => 1
518     };
519     $self->{uridnsbl_subs}->{$zone} ||= { };
520     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
521     }
522     elsif ($value =~ /^$/) {
523     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
524     }
525     else {
526     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
527     }
528     }
529     });
530    
531     push (@cmds, {
532 jm 728782 setting => 'urifullnsrhsbl',
533     is_priv => 1,
534     code => sub {
535     my ($self, $key, $value, $line) = @_;
536     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
537     my $rulename = $1;
538     my $zone = $2;
539     my $type = $3;
540     $self->{uridnsbls}->{$rulename} = {
541     zone => $zone, type => $type,
542     is_fullnsrhsbl => 1
543     };
544     }
545     elsif ($value =~ /^$/) {
546     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
547     }
548     else {
549     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
550     }
551     }
552     });
553    
554     push (@cmds, {
555     setting => 'urifullnsrhssub',
556     is_priv => 1,
557     code => sub {
558     my ($self, $key, $value, $line) = @_;
559     if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
560     my $rulename = $1;
561     my $zone = $2;
562     my $type = $3;
563     my $subrule = $4;
564     $self->{uridnsbls}->{$rulename} = {
565     zone => $zone, type => $type,
566     is_fullnsrhsbl => 1, is_subrule => 1
567     };
568     $self->{uridnsbl_subs}->{$zone} ||= { };
569     push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
570     }
571     elsif ($value =~ /^$/) {
572     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
573     }
574     else {
575     return $Mail::SpamAssassin::Conf::INVALID_VALUE;
576     }
577     }
578     });
579    
580     push (@cmds, {
581 felicity 56728 setting => 'uridnsbl_skip_domain',
582     default => {},
583 jm 818443 type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
584 felicity 56728 code => sub {
585     my ($self, $key, $value, $line) = @_;
586 dos 178359 if ($value =~ /^$/) {
587     return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
588     }
589 felicity 56728 foreach my $domain (split(/\s+/, $value)) {
590     $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
591     }
592 felicity 47380 }
593 felicity 56728 });
594 felicity 55981
595 jm 527859 # obsolete
596     push(@cmds, {
597     setting => 'uridnsbl_timeout',
598     code => sub {
599     # not a lint_warn(), since it's pretty harmless and we don't want
600     # to break stuff like sa-update
601     warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
602     return 0;
603     }
604     });
605    
606 felicity 56728 $conf->{parser}->register_commands(\@cmds);
607 jm 6898 }
608    
609     # ---------------------------------------------------------------------------
610    
611     sub query_domain {
612 felicity 439873 my ($self, $scanner, $dom) = @_;
613 jm 6898
614 quinlan 51813 #warn "uridnsbl: domain $dom\n";
615     #return;
616 jm 6898
617     $dom = lc $dom;
618 felicity 439873 return if $scanner->{uridnsbl_seen_domain}->{$dom};
619     $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
620 quinlan 160273 $self->log_dns_result("querying domain $dom");
621 jm 6898
622 mmartinec 567684 my $obj = { dom => $dom };
623 jm 6898
624 quinlan 160273 my $single_dnsbl = 0;
625 jm 394351 if ($dom =~ /^\d+\.\d+\.\d+\.\d+$/) {
626 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
627     my $IP_PRIVATE = IP_PRIVATE;
628     # only look up the IP if it is public and valid
629     if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
630 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
631 quinlan 160273 # and check the IP in RHSBLs too
632     if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
633     $dom = "$4.$3.$2.$1";
634     $single_dnsbl = 1;
635     }
636     }
637 jm 9881 }
638 felicity 47380 else {
639 quinlan 160273 $single_dnsbl = 1;
640     }
641    
642 jm 596095 my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
643 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
644 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
645 jm 596095 my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
646    
647 quinlan 160273 if ($single_dnsbl) {
648 jm 9881 # look up the domain in the RHSBL subset
649 jm 596095 foreach my $rulename (keys %{$rhsblrules}) {
650 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
651     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
652 quinlan 160273 $dom, $rulecf->{zone}, $rulecf->{type});
653 jm 394351
654     # see comment below
655 felicity 439873 $scanner->register_async_rule_start($rulename);
656 jm 9881 }
657    
658 jm 596095 # perform NS, A lookups to look up the domain in the non-RHSBL subset,
659     # but only if there are active reverse-IP-URIBL rules
660 jm 721524 if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ &&
661 jm 728782 (scalar keys %{$reviprules} ||
662     scalar keys %{$nsrhsblrules} ||
663     scalar keys %{$fullnsrhsblrules}))
664 jm 721524 {
665 felicity 439873 $self->lookup_domain_ns($scanner, $obj, $dom);
666 quinlan 160273 }
667 jm 6898 }
668 jm 394351
669     # note that these rules are now underway. important: unless the
670     # rule hits, in the current design, these will not be considered
671     # "finished" until harvest_dnsbl_queries() completes
672 jm 596095 foreach my $rulename (keys %{$reviprules}) {
673 felicity 439873 $scanner->register_async_rule_start($rulename);
674 jm 394351 }
675 jm 6898 }
676    
677     # ---------------------------------------------------------------------------
678    
679     sub lookup_domain_ns {
680 felicity 439873 my ($self, $scanner, $obj, $dom) = @_;
681 jm 6898
682     my $key = "NS:".$dom;
683 felicity 439873 return if $scanner->{async}->get_lookup($key);
684 jm 6898
685     # dig $dom ns
686 mmartinec 585292 my $ent = $self->start_lookup($scanner, $dom, 'NS',
687 mmartinec 564636 $self->res_bgsend($scanner, $dom, 'NS', $key),
688     $key);
689 jm 6898 $ent->{obj} = $obj;
690     }
691    
692     sub complete_ns_lookup {
693 felicity 439873 my ($self, $scanner, $ent, $dom) = @_;
694 jm 7002
695 jm 161157 my $packet = $ent->{response_packet};
696 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
697 jm 6898
698 quinlan 160273 my $IPV4_ADDRESS = IPV4_ADDRESS;
699     my $IP_PRIVATE = IP_PRIVATE;
700 jm 720211 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
701 jm 728782 my $fullnsrhsblrules = $scanner->{uridnsbl_active_rules_fullnsrhsbl};
702 quinlan 160273
703 jm 6898 foreach my $rr (@answer) {
704     my $str = $rr->string;
705     next unless (defined($str) && defined($dom));
706     $self->log_dns_result ("NSs for $dom: $str");
707    
708     if ($str =~ /IN\s+NS\s+(\S+)/) {
709 quinlan 160273 my $nsmatch = $1;
710 jm 720211 my $nsrhblstr = $nsmatch;
711 jm 728782 my $fullnsrhblstr = $nsmatch;
712     $fullnsrhblstr =~ s/\.$//;
713 quinlan 160273
714     if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+\.?$/) {
715     $nsmatch =~ s/\.$//;
716     # only look up the IP if it is public and valid
717     if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
718 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
719 quinlan 160273 }
720 jm 720211 $nsrhblstr = $nsmatch;
721 quinlan 160273 }
722     else {
723 felicity 439873 $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
724 jm 720211 $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
725 quinlan 160273 }
726 jm 720211
727     foreach my $rulename (keys %{$nsrhsblrules}) {
728     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
729     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
730     $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
731    
732     $scanner->register_async_rule_start($rulename);
733     }
734 jm 728782
735     foreach my $rulename (keys %{$fullnsrhsblrules}) {
736     my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
737     $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
738     $fullnsrhblstr, $rulecf->{zone}, $rulecf->{type});
739    
740     $scanner->register_async_rule_start($rulename);
741     }
742 jm 6898 }
743     }
744     }
745    
746     # ---------------------------------------------------------------------------
747    
748     sub lookup_a_record {
749 felicity 439873 my ($self, $scanner, $obj, $hname) = @_;
750 jm 6898
751     my $key = "A:".$hname;
752 felicity 439873 return if $scanner->{async}->get_lookup($key);
753 jm 6898
754     # dig $hname a
755 mmartinec 585292 my $ent = $self->start_lookup($scanner, $hname, 'A',
756 mmartinec 564636 $self->res_bgsend($scanner, $hname, 'A', $key),
757     $key);
758 jm 6898 $ent->{obj} = $obj;
759     }
760    
761     sub complete_a_lookup {
762 felicity 439873 my ($self, $scanner, $ent, $hname) = @_;
763 jm 6898
764 mmartinec 582903 my $packet = $ent->{response_packet};
765     my @answer = !defined $packet ? () : $packet->answer;
766     foreach my $rr (@answer) {
767 jm 6898 my $str = $rr->string;
768     $self->log_dns_result ("A for NS $hname: $str");
769    
770     if ($str =~ /IN\s+A\s+(\S+)/) {
771 felicity 439873 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
772 jm 6898 }
773     }
774     }
775    
776     # ---------------------------------------------------------------------------
777    
778     sub lookup_dnsbl_for_ip {
779 felicity 439873 my ($self, $scanner, $obj, $ip) = @_;
780 jm 6898
781     $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
782     my $revip = "$4.$3.$2.$1";
783    
784 felicity 439873 my $cf = $scanner->{uridnsbl_active_rules_revipbl};
785 jm 6898 foreach my $rulename (keys %{$cf}) {
786 felicity 439873 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
787     $self->lookup_single_dnsbl($scanner, $obj, $rulename,
788 quinlan 160273 $revip, $rulecf->{zone}, $rulecf->{type});
789 jm 6898 }
790     }
791    
792     sub lookup_single_dnsbl {
793 felicity 439873 my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
794 jm 6898
795 jm 9881 my $key = "DNSBL:".$dnsbl.":".$lookupstr;
796 felicity 439873 return if $scanner->{async}->get_lookup($key);
797 jm 9881 my $item = $lookupstr.".".$dnsbl;
798 jm 6898
799     # dig $ip txt
800 mmartinec 585292 my $ent = $self->start_lookup($scanner, $item, 'DNSBL',
801 mmartinec 564636 $self->res_bgsend($scanner, $item, $qtype, $key),
802     $key);
803 jm 6898 $ent->{obj} = $obj;
804     $ent->{rulename} = $rulename;
805 jm 21406 $ent->{zone} = $dnsbl;
806 jm 6898 }
807    
808     sub complete_dnsbl_lookup {
809 felicity 439873 my ($self, $scanner, $ent, $dnsblip) = @_;
810 jm 6898
811 felicity 439873 my $conf = $scanner->{conf};
812 mmartinec 571893 my @subtests;
813 jm 21406 my $rulename = $ent->{rulename};
814     my $rulecf = $conf->{uridnsbls}->{$rulename};
815    
816 jm 161157 my $packet = $ent->{response_packet};
817 mmartinec 582903 my @answer = !defined $packet ? () : $packet->answer;
818 jm 161157
819 quinlan 125719 my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}};
820 jm 21406 foreach my $rr (@answer)
821     {
822     next if ($rr->type ne 'A' && $rr->type ne 'TXT');
823    
824     my $rdatastr = $rr->rdatastr;
825 jm 6898 my $dom = $ent->{obj}->{dom};
826    
827 jm 21406 if (!$rulecf->{is_subrule}) {
828     # this zone is a simple rule, not a set of subrules
829 quinlan 125719 # skip any A record that isn't on 127/8
830 jm 158887 if ($rr->type eq 'A' && $rr->rdatastr !~ /^127\./) {
831 jm 165017 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
832     $packet->header->id." rr=".$rr->string);
833 quinlan 158898 next;
834 jm 158887 }
835 felicity 439873 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
836 jm 21406 }
837     else {
838     foreach my $subtest (keys (%{$uridnsbl_subs}))
839     {
840     if ($subtest eq $rdatastr) {
841 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
842     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
843     }
844 jm 21406 }
845     # bitmask
846     elsif ($subtest =~ /^\d+$/) {
847 sidney 433917 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
848     Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
849 jm 21406 {
850 dos 543724 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
851     $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
852     }
853 jm 21406 }
854     }
855     }
856     }
857 jm 6898 }
858     }
859    
860 jm 21406 sub got_dnsbl_hit {
861 felicity 439873 my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
862 jm 21406
863     $str =~ s/\s+/ /gs; # long whitespace => short
864 quinlan 51813 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
865 jm 21406
866 felicity 439873 if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
867     $scanner->{uridnsbl_hits}->{$rulename} = { };
868 jm 21406 };
869 felicity 439873 $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
870 jm 394348
871 felicity 439873 if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
872 jm 720211 || $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
873 jm 728782 || $scanner->{uridnsbl_active_rules_fullnsrhsbl}->{$rulename}
874 felicity 439873 || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
875 jm 394348 {
876     # TODO: this needs to handle multiple domain hits per rule
877 felicity 439873 $scanner->clear_test_state();
878     my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
879     $scanner->test_log ("URIs: $uris");
880     $scanner->got_hit ($rulename, "");
881 jm 394351
882     # note that this rule has completed (since it got at least 1 hit)
883 felicity 439873 $scanner->register_async_rule_finish($rulename);
884 jm 394348 }
885 jm 21406 }
886    
887 jm 6898 # ---------------------------------------------------------------------------
888    
889     sub start_lookup {
890 mmartinec 585292 my ($self, $scanner, $zone, $type, $id, $key) = @_;
891 jm 394348
892 jm 6898 my $ent = {
893 jm 394348 key => $key,
894 mmartinec 585292 zone => $zone, # serves to fetch other per-zone settings
895 jm 394348 type => "URI-".$type,
896     id => $id,
897     completed_callback => sub {
898     my $ent = shift;
899 mmartinec 580055 if (defined $ent->{response_packet}) { # not aborted or empty
900     $self->completed_lookup_callback ($scanner, $ent);
901     }
902 jm 394348 }
903 jm 6898 };
904 felicity 439873 $scanner->{async}->start_lookup($ent);
905 jm 394348 return $ent;
906 jm 6898 }
907    
908 jm 394348 sub completed_lookup_callback {
909 felicity 439873 my ($self, $scanner, $ent) = @_;
910 jm 394348 my $type = $ent->{type};
911     my $key = $ent->{key};
912     $key =~ /:(\S+?)$/; my $val = $1;
913 jm 6898
914 jm 394348 if ($type eq 'URI-NS') {
915 felicity 439873 $self->complete_ns_lookup ($scanner, $ent, $val);
916 jm 6898 }
917 jm 394348 elsif ($type eq 'URI-A') {
918 felicity 439873 $self->complete_a_lookup ($scanner, $ent, $val);
919 jm 6898 }
920 jm 394348 elsif ($type eq 'URI-DNSBL') {
921 felicity 439873 $self->complete_dnsbl_lookup ($scanner, $ent, $val);
922 jm 6898 }
923     }
924    
925     # ---------------------------------------------------------------------------
926    
927 jm 161148 sub res_bgsend {
928 mmartinec 564636 my ($self, $scanner, $host, $type, $key) = @_;
929 jm 161778
930     return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
931 mmartinec 564636 my ($pkt, $id, $timestamp) = @_;
932     $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp);
933 jm 161778 });
934 jm 161148 }
935    
936 jm 6898 sub log_dns_result {
937 felicity 57557 #my $self = shift;
938 quinlan 51813 #Mail::SpamAssassin::dbg("uridnsbl: ".join (' ', @_));
939 jm 6898 }
940    
941     # ---------------------------------------------------------------------------
942    
943     1;

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2