/[Apache-SVN]/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
ViewVC logotype

Contents of /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 720211 - (show annotations)
Mon Nov 24 15:35:56 2008 UTC (12 months ago) by jm
File size: 25439 byte(s)
bug 6020: add 'urinsrhsbl' and 'urinsrhssub' rule types to URIDNSBL plugin, allowing listing of NS record strings in URIBL blocklists
1 # <@LICENSE>
2 # Licensed to the Apache Software Foundation (ASF) under one or more
3 # contributor license agreements. See the NOTICE file distributed with
4 # this work for additional information regarding copyright ownership.
5 # The ASF licenses this file to you under the Apache License, Version 2.0
6 # (the "License"); you may not use this file except in compliance with
7 # the License. You may obtain a copy of the License at:
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 # </@LICENSE>
17
18 =head1 NAME
19
20 URIDNSBL - look up URLs against DNS blocklists
21
22 =head1 SYNOPSIS
23
24 loadplugin Mail::SpamAssassin::Plugin::URIDNSBL
25 uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
26
27 =head1 DESCRIPTION
28
29 This works by analysing message text and HTML for URLs, extracting the
30 domain names from those, querying their NS records in DNS, resolving
31 the hostnames used therein, and querying various DNS blocklists for
32 those IP addresses. This is quite effective.
33
34 =head1 USER SETTINGS
35
36 =over 4
37
38 =item uridnsbl_skip_domain domain1 domain2 ...
39
40 Specify a domain, or a number of domains, which should be skipped for the
41 URIBL checks. This is very useful to specify very common domains which are
42 not going to be listed in URIBLs.
43
44 =back
45
46 =head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
47
48 =over 4
49
50 =item uridnsbl NAME_OF_RULE dnsbl_zone lookuptype
51
52 Specify a lookup. C<NAME_OF_RULE> is the name of the rule to be
53 used, C<dnsbl_zone> is the zone to look up IPs in, and C<lookuptype>
54 is the type of lookup (B<TXT> or B<A>). Note that you must also
55 define a body-eval rule calling C<check_uridnsbl()> to use this.
56
57 Example:
58
59 uridnsbl URIBL_SBLXBL sbl-xbl.spamhaus.org. TXT
60 body URIBL_SBLXBL eval:check_uridnsbl('URIBL_SBLXBL')
61 describe URIBL_SBLXBL Contains a URL listed in the SBL/XBL blocklist
62
63 =item uridnssub NAME_OF_RULE dnsbl_zone lookuptype subtest
64
65 Specify a DNSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
66 name of the rule to be used, C<dnsbl_zone> is the zone to look up IPs in,
67 and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
68
69 C<subtest> is the sub-test to run against the returned data. The sub-test may
70 either be an IPv4 dotted address for DNSBLs that return multiple A records or a
71 non-negative decimal number to specify a bitmask for DNSBLs that return a
72 single A record containing a bitmask of results.
73
74 Note that, as with C<uridnsbl>, you must also define a body-eval rule calling
75 C<check_uridnsbl()> to use this.
76
77 Example:
78
79 uridnssub URIBL_DNSBL_4 dnsbl.example.org. A 127.0.0.4
80 uridnssub URIBL_DNSBL_8 dnsbl.example.org. A 8
81
82 =item urirhsbl NAME_OF_RULE rhsbl_zone lookuptype
83
84 Specify a RHSBL-style domain lookup. C<NAME_OF_RULE> is the name of the rule
85 to be used, C<rhsbl_zone> is the zone to look up domain names in, and
86 C<lookuptype> is the type of lookup (B<TXT> or B<A>). Note that you must also
87 define a body-eval rule calling C<check_uridnsbl()> to use this.
88
89 An RHSBL zone is one where the domain name is looked up, as a string; e.g. a
90 URI using the domain C<foo.com> will cause a lookup of
91 C<foo.com.uriblzone.net>. Note that hostnames are stripped from the domain
92 used in the URIBL lookup, so the domain C<foo.bar.com> will look up
93 C<bar.com.uriblzone.net>, and C<foo.bar.co.uk> will look up
94 C<bar.co.uk.uriblzone.net>.
95
96 If an URI consists of an IP address instead of a hostname, the IP address is
97 looked up (using the standard reversed quads method) in each C<rhsbl_zone>.
98
99 Example:
100
101 urirhsbl URIBL_RHSBL rhsbl.example.org. TXT
102
103 =item urirhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
104
105 Specify a RHSBL-style domain lookup with a sub-test. C<NAME_OF_RULE> is the
106 name of the rule to be used, C<rhsbl_zone> is the zone to look up domain names
107 in, and C<lookuptype> is the type of lookup (B<TXT> or B<A>).
108
109 C<subtest> is the sub-test to run against the returned data. The sub-test may
110 either be an IPv4 dotted address for RHSBLs that return multiple A records or a
111 non-negative decimal number to specify a bitmask for RHSBLs that return a
112 single A record containing a bitmask of results.
113
114 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
115 C<check_uridnsbl()> to use this.
116
117 Example:
118
119 urirhssub URIBL_RHSBL_4 rhsbl.example.org. A 127.0.0.4
120 urirhssub URIBL_RHSBL_8 rhsbl.example.org. A 8
121
122 =item urinsrhsbl NAME_OF_RULE rhsbl_zone lookuptype
123
124 Perform a RHSBL-style domain lookup against the contents of the NS records
125 for each URI. In other words, a URI using the domain C<foo.com> will cause
126 an NS lookup to take place; assuming that domain has an NS of C<ns0.bar.com>,
127 that will cause a lookup of C<bar.com.uriblzone.net>. Note that hostnames
128 are stripped from both the domain used in the URI, and the domain in the
129 lookup.
130
131 C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
132 to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
133 B<A>).
134
135 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
136 C<check_uridnsbl()> to use this.
137
138 =item urinsrhssub NAME_OF_RULE rhsbl_zone lookuptype subtest
139
140 Specify a RHSBL-style domain-NS lookup, as above, with a sub-test.
141 C<NAME_OF_RULE> is the name of the rule to be used, C<rhsbl_zone> is the zone
142 to look up domain names in, and C<lookuptype> is the type of lookup (B<TXT> or
143 B<A>). C<subtest> is the sub-test to run against the returned data; see
144 <urirhssub>.
145
146 Note that, as with C<urirhsbl>, you must also define a body-eval rule calling
147 C<check_uridnsbl()> to use this.
148
149 =back
150
151 =head1 ADMINISTRATOR SETTINGS
152
153 =over 4
154
155 =item uridnsbl_max_domains N (default: 20)
156
157 The maximum number of domains to look up.
158
159 =back
160
161 =head1 NOTES
162
163 The C<uridnsbl_timeout> option has been obsoleted by the C<rbl_timeout>
164 option. See the C<Mail::SpamAssassin::Conf> POD for details on C<rbl_timeout>.
165
166 =cut
167
168 package Mail::SpamAssassin::Plugin::URIDNSBL;
169
170 use Mail::SpamAssassin::Plugin;
171 use Mail::SpamAssassin::Constants qw(:ip);
172 use Mail::SpamAssassin::Util;
173 use Mail::SpamAssassin::Util::RegistrarBoundaries;
174 use Mail::SpamAssassin::Logger;
175 use strict;
176 use warnings;
177 use bytes;
178 use re 'taint';
179
180 use vars qw(@ISA);
181 @ISA = qw(Mail::SpamAssassin::Plugin);
182
183 use constant LOG_COMPLETION_TIMES => 0;
184
185 # constructor
186 sub new {
187 my $class = shift;
188 my $samain = shift;
189
190 # some boilerplate...
191 $class = ref($class) || $class;
192 my $self = $class->SUPER::new($samain);
193 bless ($self, $class);
194
195 # this can be effectively global, at least in each process, safely
196
197 $self->{finished} = { };
198
199 $self->register_eval_rule ("check_uridnsbl");
200 $self->set_config($samain->{conf});
201
202 return $self;
203 }
204
205 # this is just a placeholder; in fact the results are dealt with later
206 sub check_uridnsbl {
207 return 0;
208 }
209
210 # ---------------------------------------------------------------------------
211
212 # once the metadata is parsed, we can access the URI list. So start off
213 # the lookups here!
214 sub parsed_metadata {
215 my ($self, $opts) = @_;
216 my $scanner = $opts->{permsgstatus};
217
218 if (!$scanner->is_dns_available()) {
219 $self->{dns_not_available} = 1;
220 return;
221 } else {
222 # due to re-testing dns may become available after being unavailable
223 # DOS: I don't think dns_not_available is even used anymore
224 $self->{dns_not_available} = 0;
225 }
226
227 $scanner->{'uridnsbl_activerules'} = { };
228 $scanner->{'uridnsbl_hits'} = { };
229 $scanner->{'uridnsbl_seen_domain'} = { };
230
231 # only hit DNSBLs for active rules (defined and score != 0)
232 $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
233 $scanner->{'uridnsbl_active_rules_nsrhsbl'} = { };
234 $scanner->{'uridnsbl_active_rules_revipbl'} = { };
235
236 foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
237 next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
238
239 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
240 if ($rulecf->{is_rhsbl}) {
241 $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
242 } elsif ($rulecf->{is_nsrhsbl}) {
243 $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename} = 1;
244 } else {
245 $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
246 }
247 }
248
249 # get all domains in message
250
251 # don't keep dereferencing this
252 my $skip_domains = $scanner->{main}->{conf}->{uridnsbl_skip_domains};
253
254 # list of hashes to use in order
255 my @uri_ordered;
256
257 # Generate the full list of html-parsed domains.
258 my $uris = $scanner->get_uri_detail_list();
259
260 # go from uri => info to uri_ordered
261 # 0: a
262 # 1: form
263 # 2: img
264 # 3: !a_empty
265 # 4: parsed
266 # 5: a_empty
267 while (my($uri, $info) = each %{$uris}) {
268 # we want to skip mailto: uris
269 next if ($uri =~ /^mailto:/);
270
271 # no domains were found via this uri, so skip
272 next unless ($info->{domains});
273
274 my $entry = 3;
275
276 if ($info->{types}->{a}) {
277 $entry = 5;
278
279 # determine a vs a_empty
280 foreach my $at (@{$info->{anchor_text}}) {
281 if (length $at) {
282 $entry = 0;
283 last;
284 }
285 }
286 }
287 elsif ($info->{types}->{form}) {
288 $entry = 1;
289 }
290 elsif ($info->{types}->{img}) {
291 $entry = 2;
292 }
293 elsif ($info->{types}->{parsed} && (keys %{$info->{types}} == 1)) {
294 $entry = 4;
295 }
296
297 # take the usable domains and add to the ordered list
298 foreach ( keys %{ $info->{domains} } ) {
299 if (exists $skip_domains->{$_}) {
300 dbg("uridnsbl: domain $_ in skip list");
301 next;
302 }
303 $uri_ordered[$entry]->{$_} = 1;
304 }
305 }
306
307 # at this point, @uri_ordered is an ordered array of uri hashes
308
309 my %domlist;
310 my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
311 while (keys %domlist < $umd && @uri_ordered) {
312 my $array = shift @uri_ordered;
313 next unless $array;
314
315 # run through and find the new domains in this grouping
316 my @domains = grep(!$domlist{$_}, keys %{$array});
317 next unless @domains;
318
319 # the new domains are all useful, just add them in
320 if (keys(%domlist) + @domains <= $umd) {
321 foreach (@domains) {
322 $domlist{$_} = 1;
323 }
324 }
325 else {
326 # trim down to a limited number - pick randomly
327 my $i;
328 while (@domains && keys %domlist < $umd) {
329 my $r = int rand (scalar @domains);
330 $domlist{splice (@domains, $r, 1)} = 1;
331 }
332 }
333 }
334
335 # and query
336 dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
337 foreach my $dom (keys %domlist) {
338 $self->query_domain ($scanner, $dom);
339 }
340
341 return 1;
342 }
343
344 sub set_config {
345 my($self, $conf) = @_;
346 my @cmds;
347
348 push(@cmds, {
349 setting => 'uridnsbl_max_domains',
350 is_admin => 1,
351 default => 20,
352 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
353 });
354
355 push (@cmds, {
356 setting => 'uridnsbl',
357 is_priv => 1,
358 code => sub {
359 my ($self, $key, $value, $line) = @_;
360 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
361 my $rulename = $1;
362 my $zone = $2;
363 my $type = $3;
364 $self->{uridnsbls}->{$rulename} = {
365 zone => $zone, type => $type,
366 is_rhsbl => 0
367 };
368 }
369 elsif ($value =~ /^$/) {
370 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
371 }
372 else {
373 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
374 }
375 }
376 });
377
378 push (@cmds, {
379 setting => 'uridnssub',
380 is_priv => 1,
381 code => sub {
382 my ($self, $key, $value, $line) = @_;
383 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
384 my $rulename = $1;
385 my $zone = $2;
386 my $type = $3;
387 my $subrule = $4;
388 $self->{uridnsbls}->{$rulename} = {
389 zone => $zone, type => $type,
390 is_rhsbl => 0, is_subrule => 1
391 };
392 $self->{uridnsbl_subs}->{$zone} ||= { };
393 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
394 }
395 elsif ($value =~ /^$/) {
396 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
397 }
398 else {
399 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
400 }
401 }
402 });
403
404 push (@cmds, {
405 setting => 'urirhsbl',
406 is_priv => 1,
407 code => sub {
408 my ($self, $key, $value, $line) = @_;
409 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
410 my $rulename = $1;
411 my $zone = $2;
412 my $type = $3;
413 $self->{uridnsbls}->{$rulename} = {
414 zone => $zone, type => $type,
415 is_rhsbl => 1
416 };
417 }
418 elsif ($value =~ /^$/) {
419 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
420 }
421 else {
422 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
423 }
424 }
425 });
426
427 push (@cmds, {
428 setting => 'urirhssub',
429 is_priv => 1,
430 code => sub {
431 my ($self, $key, $value, $line) = @_;
432 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
433 my $rulename = $1;
434 my $zone = $2;
435 my $type = $3;
436 my $subrule = $4;
437 $self->{uridnsbls}->{$rulename} = {
438 zone => $zone, type => $type,
439 is_rhsbl => 1, is_subrule => 1
440 };
441 $self->{uridnsbl_subs}->{$zone} ||= { };
442 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
443 }
444 elsif ($value =~ /^$/) {
445 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
446 }
447 else {
448 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
449 }
450 }
451 });
452
453 push (@cmds, {
454 setting => 'urinsrhsbl',
455 is_priv => 1,
456 code => sub {
457 my ($self, $key, $value, $line) = @_;
458 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
459 my $rulename = $1;
460 my $zone = $2;
461 my $type = $3;
462 $self->{uridnsbls}->{$rulename} = {
463 zone => $zone, type => $type,
464 is_nsrhsbl => 1
465 };
466 }
467 elsif ($value =~ /^$/) {
468 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
469 }
470 else {
471 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
472 }
473 }
474 });
475
476 push (@cmds, {
477 setting => 'urinsrhssub',
478 is_priv => 1,
479 code => sub {
480 my ($self, $key, $value, $line) = @_;
481 if ($value =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\d{1,10}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/) {
482 my $rulename = $1;
483 my $zone = $2;
484 my $type = $3;
485 my $subrule = $4;
486 $self->{uridnsbls}->{$rulename} = {
487 zone => $zone, type => $type,
488 is_nsrhsbl => 1, is_subrule => 1
489 };
490 $self->{uridnsbl_subs}->{$zone} ||= { };
491 push (@{$self->{uridnsbl_subs}->{$zone}->{$subrule}->{rulenames}}, $rulename);
492 }
493 elsif ($value =~ /^$/) {
494 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
495 }
496 else {
497 return $Mail::SpamAssassin::Conf::INVALID_VALUE;
498 }
499 }
500 });
501
502 push (@cmds, {
503 setting => 'uridnsbl_skip_domain',
504 default => {},
505 code => sub {
506 my ($self, $key, $value, $line) = @_;
507 if ($value =~ /^$/) {
508 return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
509 }
510 foreach my $domain (split(/\s+/, $value)) {
511 $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
512 }
513 }
514 });
515
516 # obsolete
517 push(@cmds, {
518 setting => 'uridnsbl_timeout',
519 code => sub {
520 # not a lint_warn(), since it's pretty harmless and we don't want
521 # to break stuff like sa-update
522 warn("config: 'uridnsbl_timeout' is obsolete, use 'rbl_timeout' instead");
523 return 0;
524 }
525 });
526
527 $conf->{parser}->register_commands(\@cmds);
528 }
529
530 # ---------------------------------------------------------------------------
531
532 sub query_domain {
533 my ($self, $scanner, $dom) = @_;
534
535 #warn "uridnsbl: domain $dom\n";
536 #return;
537
538 $dom = lc $dom;
539 return if $scanner->{uridnsbl_seen_domain}->{$dom};
540 $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
541 $self->log_dns_result("querying domain $dom");
542
543 my $obj = { dom => $dom };
544
545 my $single_dnsbl = 0;
546 if ($dom =~ /^\d+\.\d+\.\d+\.\d+$/) {
547 my $IPV4_ADDRESS = IPV4_ADDRESS;
548 my $IP_PRIVATE = IP_PRIVATE;
549 # only look up the IP if it is public and valid
550 if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
551 $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
552 # and check the IP in RHSBLs too
553 if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
554 $dom = "$4.$3.$2.$1";
555 $single_dnsbl = 1;
556 }
557 }
558 }
559 else {
560 $single_dnsbl = 1;
561 }
562
563 my $rhsblrules = $scanner->{uridnsbl_active_rules_rhsbl};
564 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
565 my $reviprules = $scanner->{uridnsbl_active_rules_revipbl};
566
567 if ($single_dnsbl) {
568 # look up the domain in the RHSBL subset
569 foreach my $rulename (keys %{$rhsblrules}) {
570 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
571 $self->lookup_single_dnsbl($scanner, $obj, $rulename,
572 $dom, $rulecf->{zone}, $rulecf->{type});
573
574 # see comment below
575 $scanner->register_async_rule_start($rulename);
576 }
577
578 # perform NS, A lookups to look up the domain in the non-RHSBL subset,
579 # but only if there are active reverse-IP-URIBL rules
580 if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/ && (scalar keys %{$reviprules})) {
581 $self->lookup_domain_ns($scanner, $obj, $dom);
582 }
583 }
584
585 # note that these rules are now underway. important: unless the
586 # rule hits, in the current design, these will not be considered
587 # "finished" until harvest_dnsbl_queries() completes
588 foreach my $rulename (keys %{$reviprules}) {
589 $scanner->register_async_rule_start($rulename);
590 }
591 }
592
593 # ---------------------------------------------------------------------------
594
595 sub lookup_domain_ns {
596 my ($self, $scanner, $obj, $dom) = @_;
597
598 my $key = "NS:".$dom;
599 return if $scanner->{async}->get_lookup($key);
600
601 # dig $dom ns
602 my $ent = $self->start_lookup($scanner, $dom, 'NS',
603 $self->res_bgsend($scanner, $dom, 'NS', $key),
604 $key);
605 $ent->{obj} = $obj;
606 }
607
608 sub complete_ns_lookup {
609 my ($self, $scanner, $ent, $dom) = @_;
610
611 my $packet = $ent->{response_packet};
612 my @answer = !defined $packet ? () : $packet->answer;
613
614 my $IPV4_ADDRESS = IPV4_ADDRESS;
615 my $IP_PRIVATE = IP_PRIVATE;
616 my $nsrhsblrules = $scanner->{uridnsbl_active_rules_nsrhsbl};
617
618 foreach my $rr (@answer) {
619 my $str = $rr->string;
620 next unless (defined($str) && defined($dom));
621 $self->log_dns_result ("NSs for $dom: $str");
622
623 if ($str =~ /IN\s+NS\s+(\S+)/) {
624 my $nsmatch = $1;
625 my $nsrhblstr = $nsmatch;
626
627 if ($nsmatch =~ /^\d+\.\d+\.\d+\.\d+\.?$/) {
628 $nsmatch =~ s/\.$//;
629 # only look up the IP if it is public and valid
630 if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
631 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
632 }
633 $nsrhblstr = $nsmatch;
634 }
635 else {
636 $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
637 $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
638 }
639
640 foreach my $rulename (keys %{$nsrhsblrules}) {
641 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
642 $self->lookup_single_dnsbl($scanner, $ent->{obj}, $rulename,
643 $nsrhblstr, $rulecf->{zone}, $rulecf->{type});
644
645 $scanner->register_async_rule_start($rulename);
646 }
647 }
648 }
649 }
650
651 # ---------------------------------------------------------------------------
652
653 sub lookup_a_record {
654 my ($self, $scanner, $obj, $hname) = @_;
655
656 my $key = "A:".$hname;
657 return if $scanner->{async}->get_lookup($key);
658
659 # dig $hname a
660 my $ent = $self->start_lookup($scanner, $hname, 'A',
661 $self->res_bgsend($scanner, $hname, 'A', $key),
662 $key);
663 $ent->{obj} = $obj;
664 }
665
666 sub complete_a_lookup {
667 my ($self, $scanner, $ent, $hname) = @_;
668
669 my $packet = $ent->{response_packet};
670 my @answer = !defined $packet ? () : $packet->answer;
671 foreach my $rr (@answer) {
672 my $str = $rr->string;
673 $self->log_dns_result ("A for NS $hname: $str");
674
675 if ($str =~ /IN\s+A\s+(\S+)/) {
676 $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
677 }
678 }
679 }
680
681 # ---------------------------------------------------------------------------
682
683 sub lookup_dnsbl_for_ip {
684 my ($self, $scanner, $obj, $ip) = @_;
685
686 $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
687 my $revip = "$4.$3.$2.$1";
688
689 my $cf = $scanner->{uridnsbl_active_rules_revipbl};
690 foreach my $rulename (keys %{$cf}) {
691 my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
692 $self->lookup_single_dnsbl($scanner, $obj, $rulename,
693 $revip, $rulecf->{zone}, $rulecf->{type});
694 }
695 }
696
697 sub lookup_single_dnsbl {
698 my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
699
700 my $key = "DNSBL:".$dnsbl.":".$lookupstr;
701 return if $scanner->{async}->get_lookup($key);
702 my $item = $lookupstr.".".$dnsbl;
703
704 # dig $ip txt
705 my $ent = $self->start_lookup($scanner, $item, 'DNSBL',
706 $self->res_bgsend($scanner, $item, $qtype, $key),
707 $key);
708 $ent->{obj} = $obj;
709 $ent->{rulename} = $rulename;
710 $ent->{zone} = $dnsbl;
711 }
712
713 sub complete_dnsbl_lookup {
714 my ($self, $scanner, $ent, $dnsblip) = @_;
715
716 my $conf = $scanner->{conf};
717 my @subtests;
718 my $rulename = $ent->{rulename};
719 my $rulecf = $conf->{uridnsbls}->{$rulename};
720
721 my $packet = $ent->{response_packet};
722 my @answer = !defined $packet ? () : $packet->answer;
723
724 my $uridnsbl_subs = $conf->{uridnsbl_subs}->{$ent->{zone}};
725 foreach my $rr (@answer)
726 {
727 next if ($rr->type ne 'A' && $rr->type ne 'TXT');
728
729 my $rdatastr = $rr->rdatastr;
730 my $dom = $ent->{obj}->{dom};
731
732 if (!$rulecf->{is_subrule}) {
733 # this zone is a simple rule, not a set of subrules
734 # skip any A record that isn't on 127/8
735 if ($rr->type eq 'A' && $rr->rdatastr !~ /^127\./) {
736 warn("uridnsbl: bogus rr for domain=$dom, rule=$rulename, id=" .
737 $packet->header->id." rr=".$rr->string);
738 next;
739 }
740 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
741 }
742 else {
743 foreach my $subtest (keys (%{$uridnsbl_subs}))
744 {
745 if ($subtest eq $rdatastr) {
746 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
747 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
748 }
749 }
750 # bitmask
751 elsif ($subtest =~ /^\d+$/) {
752 if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
753 Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
754 {
755 foreach my $subrulename (@{$uridnsbl_subs->{$subtest}->{rulenames}}) {
756 $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
757 }
758 }
759 }
760 }
761 }
762 }
763 }
764
765 sub got_dnsbl_hit {
766 my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
767
768 $str =~ s/\s+/ /gs; # long whitespace => short
769 dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
770
771 if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
772 $scanner->{uridnsbl_hits}->{$rulename} = { };
773 };
774 $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
775
776 if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
777 || $scanner->{uridnsbl_active_rules_nsrhsbl}->{$rulename}
778 || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
779 {
780 # TODO: this needs to handle multiple domain hits per rule
781 $scanner->clear_test_state();
782 my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
783 $scanner->test_log ("URIs: $uris");
784 $scanner->got_hit ($rulename, "");
785
786 # note that this rule has completed (since it got at least 1 hit)
787 $scanner->register_async_rule_finish($rulename);
788 }
789 }
790
791 # ---------------------------------------------------------------------------
792
793 sub start_lookup {
794 my ($self, $scanner, $zone, $type, $id, $key) = @_;
795
796 my $ent = {
797 key => $key,
798 zone => $zone, # serves to fetch other per-zone settings
799 type => "URI-".$type,
800 id => $id,
801 completed_callback => sub {
802 my $ent = shift;
803 if (defined $ent->{response_packet}) { # not aborted or empty
804 $self->completed_lookup_callback ($scanner, $ent);
805 }
806 }
807 };
808 $scanner->{async}->start_lookup($ent);
809 return $ent;
810 }
811
812 sub completed_lookup_callback {
813 my ($self, $scanner, $ent) = @_;
814 my $type = $ent->{type};
815 my $key = $ent->{key};
816 $key =~ /:(\S+?)$/; my $val = $1;
817
818 if ($type eq 'URI-NS') {
819 $self->complete_ns_lookup ($scanner, $ent, $val);
820 }
821 elsif ($type eq 'URI-A') {
822 $self->complete_a_lookup ($scanner, $ent, $val);
823 }
824 elsif ($type eq 'URI-DNSBL') {
825 $self->complete_dnsbl_lookup ($scanner, $ent, $val);
826 }
827 }
828
829 # ---------------------------------------------------------------------------
830
831 sub res_bgsend {
832 my ($self, $scanner, $host, $type, $key) = @_;
833
834 return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
835 my ($pkt, $id, $timestamp) = @_;
836 $scanner->{async}->set_response_packet($id, $pkt, $key, $timestamp);
837 });
838 }
839
840 sub log_dns_result {
841 #my $self = shift;
842 #Mail::SpamAssassin::dbg("uridnsbl: ".join (' ', @_));
843 }
844
845 # ---------------------------------------------------------------------------
846
847 1;

apache@apache.org
ViewVC Help
Powered by ViewVC 1.1.2