# SpamAssassin rules file: header tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # This program is free software; you can redistribute it and/or modify # it under the terms of either the Artistic License or the GNU General # Public License as published by the Free Software Foundation; either # version 1 of the License, or (at your option) any later version. # # See the file "License" in the top level of the SpamAssassin source # distribution for more details. # ########################################################################### require_version @@VERSION@@ header NO_REAL_NAME From =~ /^["\s]*\?\s*$/ describe NO_REAL_NAME From: does not include a real name header FROM_ENDS_IN_NUMS From =~ /\d\d\@/ describe FROM_ENDS_IN_NUMS From: ends in numbers header FROM_STARTS_WITH_NUMS From =~ /^\d\d/ describe FROM_STARTS_WITH_NUMS From: starts with nums header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*\@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters # Faked addresses tend to come from big public sites. Stats show that # 5 digits is enough to get a 1.0 s/o ratio; 4 is too low (probably due # to folks called "jmason2002@yahoo.com" for example). header ADDR_NUMS_AT_BIGSITE ALL =~ /^(To|From|Cc|Reply-To):\s*) header FROM_NO_USER From =~ /(?:^\@|<\@| \@[^<]*$|<>)/ [if-unset: unset@unset.unset] describe FROM_NO_USER From: has no local-part before @ sign header TO_NO_USER To =~ /(?:^\@|<\@| \@[^<]*$|<>)/ [if-unset: unset@unset.unset] describe TO_NO_USER To: has no local-part before @ sign header TO_HAS_SPACES To:addr =~ /\s/ describe TO_HAS_SPACES To: address contains spaces header TO_EMPTY To =~ /^\s*$/ [if-unset: UNSET] describe TO_EMPTY To: is empty header REPLY_TO_EMPTY Reply-To =~ /^\s*$/ [if-unset: UNSET] describe REPLY_TO_EMPTY Reply-To: is empty header REPLY_TO_HAS_UNDERLINE_NUMS Reply-To =~ /_\S?(?:[a-z]+\w*?\d+|\d+\w*?[a-z]+)\w*\@/i [if-unset: UNSET] describe REPLY_TO_HAS_UNDERLINE_NUMS Reply-To: contains an underline and numbers/letters # You don't want to let this match "'user@foo.bar'" # (note single quotes) because some legitimate mailers seem to do that. header TO_ADDRESS_EQ_REAL To =~ /^\s*"([^"@]+\@[^"@]+)"\s+<\1>\s*$/i describe TO_ADDRESS_EQ_REAL To: repeats address as real name # NOTE: this is what 100% valid undisclosed-recipients mails look like. # If this gets a high score, that's a bug! header UNDISC_RECIPS To =~ /^undisclosed-recipients?:\s*;$/ describe UNDISC_RECIPS Valid-looking To "undisclosed-recipients" # also 100% valid header FAKED_UNDISC_RECIPS To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i describe FAKED_UNDISC_RECIPS Faked To "Undisclosed-Recipients" header PLING_QUERY Subject =~ /\?.*!|!.*\?/ describe PLING_QUERY Subject has exclamation mark and question mark header SUBJ_HAS_UNIQ_ID eval:check_for_unique_subject_id() describe SUBJ_HAS_UNIQ_ID Subject contains a unique ID header SUBJ_HAS_SPACES Subject =~ /(?:\s{6}|\t\s|\s\t)\S/ describe SUBJ_HAS_SPACES Subject contains lots of white space header SUBJ_ALL_CAPS eval:subject_is_all_caps() describe SUBJ_ALL_CAPS Subject is all capitals # (allow this test to pass if there's no Message-Id header) header MSGID_HAS_NO_AT MESSAGEID !~ /\@/ [if-unset: NO@MSGID] describe MSGID_HAS_NO_AT Message-Id has no @ sign header MSGID_SPAMSIGN_1 MESSAGEID =~ /<[0-9a-f]{12,12}\$[0-9a-f]{8,8}\$[0-9a-f]{8,8}\@>/ describe MSGID_SPAMSIGN_1 Message-Id generated by a spam tool # a good spamsign from another list header MSGID_SPAMSIGN_ZEROES MESSAGEID =~ /<0000[0-9a-f]{8}\$0000[0-9a-f]{4}\$0000[0-9a-f]{4}\@/ describe MSGID_SPAMSIGN_ZEROES Message-Id generated by spam tool (zeroes variant) # a good spamsign from another list header MSGID_SPAMSIGN_6LETTER MESSAGEID =~ /<[0-9][0-9][0-9][a-f]..[a-f]..[a-f].[a-f]\$[0-9a-f]{4}[a-f].[a-f].\$.[a-f][a-f]..[a-f][a-f].\@[a-z]{6}>/ describe MSGID_SPAMSIGN_6LETTER Message-Id generated by spam tool (6-letter variant) header MSGID_OE_SPAM_4ZERO MESSAGEID =~ /<[a-f0-9]{12}\$[a-f0-9]{8}\$0000[a-f0-9]{4}\@/ describe MSGID_OE_SPAM_4ZERO Message-Id generated by spam tool (4-zeroes variant) header MSGID_3_DOLLARS MESSAGEID =~ /^(?:$|\s)/m describe MSGID_NO_HOST Message-Id has no hostname # killer test, this one! header MSGID_OUTLOOK_TIME eval:check_outlook_timestamp_token() describe MSGID_OUTLOOK_TIME Message-Id is fake (in Outlook Express format) header INVALID_DATE Date !~ /^\s*(?:(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?[0-3 ]?[0-9] (?:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec) (?:[12][901])?[0-9]{2} [0-2][0-9](?:\:[0-5][0-9]){1,2} (?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200] describe INVALID_DATE Invalid Date: header (not RFC 2822) # allow +1300, NZ timezone header INVALID_DATE_TZ_ABSURD Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/ describe INVALID_DATE_TZ_ABSURD Invalid Date: header (timezone does not exist) header DATE_YEAR_ZERO_FIRST Date =~ /[nbrylgptvc]\s+0\d\d\d(?:\s|$)/ describe DATE_YEAR_ZERO_FIRST Invalid Date: year begins with zero header DATE_IN_PAST_03_06 eval:check_for_shifted_date('-6', '-3') describe DATE_IN_PAST_03_06 Date: is 3 to 6 hours before Received: date header DATE_IN_PAST_06_12 eval:check_for_shifted_date('-12', '-6') describe DATE_IN_PAST_06_12 Date: is 6 to 12 hours before Received: date header DATE_IN_PAST_12_24 eval:check_for_shifted_date('-24', '-12') describe DATE_IN_PAST_12_24 Date: is 12 to 24 hours before Received: date header DATE_IN_PAST_24_48 eval:check_for_shifted_date('-48', '-24') describe DATE_IN_PAST_24_48 Date: is 24 to 48 hours before Received: date header DATE_IN_PAST_48_96 eval:check_for_shifted_date('-96', '-48') describe DATE_IN_PAST_48_96 Date: is 48 to 96 hours before Received: date header DATE_IN_PAST_96_XX eval:check_for_shifted_date('undef', '-96') describe DATE_IN_PAST_96_XX Date: is 96 hours or more before Received: date header DATE_IN_FUTURE_03_06 eval:check_for_shifted_date('3', '6') describe DATE_IN_FUTURE_03_06 Date: is 3 to 6 hours after Received: date header DATE_IN_FUTURE_06_12 eval:check_for_shifted_date('6', '12') describe DATE_IN_FUTURE_06_12 Date: is 6 to 12 hours after Received: date header DATE_IN_FUTURE_12_24 eval:check_for_shifted_date('12', '24') describe DATE_IN_FUTURE_12_24 Date: is 12 to 24 hours after Received: date header DATE_IN_FUTURE_24_48 eval:check_for_shifted_date('24', '48') describe DATE_IN_FUTURE_24_48 Date: is 24 to 48 hours after Received: date header DATE_IN_FUTURE_48_96 eval:check_for_shifted_date('48', '96') describe DATE_IN_FUTURE_48_96 Date: is 48 to 96 hours after Received: date header DATE_IN_FUTURE_96_XX eval:check_for_shifted_date('96', 'undef') describe DATE_IN_FUTURE_96_XX Date: is 96 hours or more after Received: date # bug 681: Tony L. Svanstrom: rewrote ADVERT_CODE tests header ADVERT_CODE Subject =~ /^\W*ADV\b/i describe ADVERT_CODE Subject: starts with advertising tag header ADVERT_CODE2 Subject =~ /\bA\s*D\s*V\b/i describe ADVERT_CODE2 Subject: contains advertising tag # Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded # with quoted-printable or base64. # # \xbc\xba\xc0\xce means "adult" # \xb1\xa4\xb0\xed means "advertisement" # \xc1\xa4\xba\xb8 means "information" # \xc8\xab\xba\xb8 means "publicity" # # Each two byte sequence is one Korean letter; the spaces and periods are # sometimes used to obscure the words. \xb1\xa4\xb0\xed is the most common # tag and is sometimes very obscured so we look harder. # header KOREAN_UCE_SUBJECT Subject =~ /[({[<][. ]*(?:\xbc\xba[. ]*\xc0\xce[. ]*)?(?:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/ describe KOREAN_UCE_SUBJECT Subject: contains Korean unsolicited email tag header FRIEND_AT_PUBLIC To =~ /(?:yourdomain|you|your|public)\.(?:com|org|net)/i describe FRIEND_AT_PUBLIC sent to you@you.com or similar header FRIEND_PUBLIC ALL =~ /^(?:to|cc|from):.*friend\@public\.com/im describe FRIEND_PUBLIC sent from or to friend@public.com header DOMAINS_CHEAP Subject =~ /(?:new extensions|domain names) now only \$\s*\d+/i describe DOMAINS_CHEAP Subject: domain names are cheap header DOMAIN_SUBJECT Subject =~ /(?:\s(?:\.|dot\s+)(?:info|biz|name)|domain)\b.*\b(?:extension|info|regist(?:ry|ration|er)|submission)/i describe DOMAIN_SUBJECT Subject: domain registration spam subject header NO_DNS_FOR_FROM eval:check_for_from_dns() describe NO_DNS_FOR_FROM Domain in From header has no MX or A DNS records tflags NO_DNS_FOR_FROM net header FROM_AND_TO_SAME eval:check_for_from_to_same() describe FROM_AND_TO_SAME From and To are the same, but not exactly header BAD_HELO_WARNING eval:check_for_bad_helo() describe BAD_HELO_WARNING Fake name used in SMTP HELO command tflags BAD_HELO_WARNING net header SUBJ_FULL_OF_8BITS eval:check_subject_for_lotsa_8bit_chars() describe SUBJ_FULL_OF_8BITS Subject is full of 8-bit characters header HEADER_8BITS ALL =~ /[\x80-\xff]{3,}/ describe HEADER_8BITS Headers include 3 consecutive 8-bit characters header MDAEMON_2_7_4 Received =~ /with SMTP .MDaemon.v2.7.SP4.R./ describe MDAEMON_2_7_4 Received via buggy SMTP server (MDaemon 2.7.4SP4R) header FAKED_IP_IN_RCVD Received =~ /from [-0-9a-z\._]+_\[\d+\.\d+\.\d+\.\d+\] /i describe FAKED_IP_IN_RCVD Received: contains a name with a faked IP-address header SMTPD_IN_RCVD Received =~ /\(SMTPD32-\d+\..+\)/ describe SMTPD_IN_RCVD Received via SMTPD32 server (SMTPD32-n.n) # Multizone / Multi meaning BLs first # Osirusoft, like MAPS RBL+ is a multi-meaning BL, so it is treated separately header RCVD_IN_OSIRUSOFT_COM rbleval:check_rbl('osirusoft', 'relays.osirusoft.com.') describe RCVD_IN_OSIRUSOFT_COM Received via a relay in relays.osirusoft.com tflags RCVD_IN_OSIRUSOFT_COM net # X prefix was used to insure that it was run at the end, but it's not needed # anymore since we run the rule with rblreseval -- Marc header X_OSIRU_OPEN_RELAY rbleval:check_rbl_results_for('osirusoft', '127.0.0.2') describe X_OSIRU_OPEN_RELAY DNSBL: sender is Confirmed Open Relay tflags X_OSIRU_OPEN_RELAY net header X_OSIRU_DUL rbleval:check_rbl_results_for('osirusoft', '127.0.0.3') describe X_OSIRU_DUL DNSBL: sender ip address in in a dialup block tflags X_OSIRU_DUL net header X_OSIRU_SPAM_SRC rbleval:check_rbl_results_for('osirusoft', '127.0.0.4') describe X_OSIRU_SPAM_SRC DNSBL: sender is Confirmed Spam Source tflags X_OSIRU_SPAM_SRC net header X_OSIRU_SPAMWARE_SITE rbleval:check_rbl_results_for('osirusoft', '127.0.0.6') describe X_OSIRU_SPAMWARE_SITE DNSBL: sender is a Spamware site or vendor tflags X_OSIRU_SPAMWARE_SITE net header X_OSIRU_DUL_FH rbleval:check_rbl('osirusoft-dul-firsthop', 'relays.osirusoft.com.') describe X_OSIRU_DUL_FH Received from first hop dialup listed in relays.osirusoft.com tflags X_OSIRU_DUL_FH net nice # Now, single zone BLs follow: # the new first arg for check_rbl() indicates what type of check it is; # each type of check is stored in a separate set, and if an IP has already # been hit in that set, it will not be checked with any other zone in # that set. header RCVD_IN_RELAYS_ORDB_ORG rbleval:check_rbl('relay', 'relays.ordb.org.') describe RCVD_IN_RELAYS_ORDB_ORG Received via a relay in relays.ordb.org tflags RCVD_IN_RELAYS_ORDB_ORG net # SBL is the Spamhaus Block List: http://www.spamhaus.org/sbl/ . header RCVD_IN_SBL rbleval:check_rbl('relay', 'sbl.spamhaus.org.') describe RCVD_IN_SBL Received via SBLed relay, see http://www.spamhaus.org/sbl/ tflags RCVD_IN_SBL net header RCVD_IN_ORBS rbleval:check_rbl('relay', 'orbs.dorkslayers.com.') describe RCVD_IN_ORBS Received via a relay in orbs.dorkslayers.com tflags RCVD_IN_ORBS net # http://www.blitzed.org/opm/ , a recommended open-proxies list. header RCVD_IN_OPM rbleval:check_rbl('relay', 'opm.blitzed.org.') describe RCVD_IN_OPM Received via a relay in opm.blitzed.org tflags RCVD_IN_OPM net # DSBL catches open relays, badly-installed CGI scripts and open SOCKS and # HTTP proxies. list.dsbl.org lists servers tested by "trusted" users, # multihop.dsbl.org lists servers which open SMTP servers relay through, # unconfirmed.dsbl.org lists servers tested by "untrusted" users. # See http://dsbl.org/ for full details. # This is effectively an open relay BL, put in in the relay set too -- Marc header RCVD_IN_DSBL rbleval:check_rbl('relay', 'list.dsbl.org.') describe RCVD_IN_DSBL Received via a relay in list.dsbl.org tflags RCVD_IN_DSBL net # Warning, several big ISP's mail relays (not open to outside people) # are listed on multihop. Do not set a high score on this. Note too # that those IPs often are listed in unconfirmed.dsbl.org at the same # time. header RCVD_IN_MULTIHOP_DSBL rbleval:check_rbl('multihop', 'multihop.dsbl.org.') describe RCVD_IN_MULTIHOP_DSBL Received via a relay in multihop.dsbl.org tflags RCVD_IN_MULTIHOP_DSBL net # We want to count this in the open relay set so that someone doesn't get scored # twice (at least by default) for being listed there and in some other relay BL. # Users can request a double hit and double score by changing 'relay' with # 'unconfirmed_dsbl' or something like that, but I don't think it should be # a default -- Marc header RCVD_IN_UNCONFIRMED_DSBL rbleval:check_rbl('relay', 'unconfirmed.dsbl.org.') describe RCVD_IN_UNCONFIRMED_DSBL Received via a relay in unconfirmed.dsbl.org tflags RCVD_IN_UNCONFIRMED_DSBL net # Other miscellaneous RBLs are listed here: header RCVD_IN_RFCI rbleval:check_rbl('rfci', 'ipwhois.rfc-ignorant.org.') describe RCVD_IN_RFCI Received via a relay in ipwhois.rfc-ignorant.org tflags RCVD_IN_RFCI net header HABEAS_HIL rbleval:check_rbl('hil', 'hil.habeas.com.') describe HABEAS_HIL Sender is on www.habeas.com Habeas Infringer List tflags HABEAS_HIL net meta HABEAS_VIOLATOR (HABEAS_SWE && HABEAS_HIL) describe HABEAS_VIOLATOR On Habeas Infringer List and using Habeas warrant mark tflags HABEAS_VIOLATOR net # bondedsender.org provides an RBL-style whitelist for trusted relays header RCVD_IN_BONDEDSENDER rbleval:check_rbl('relay', 'query.bondedsender.org.') describe RCVD_IN_BONDEDSENDER Bonded sender, see http://www.bondedsender.org/referred.html tflags RCVD_IN_BONDEDSENDER net nice # NOTE: commercial test, see README file for details header RCVD_IN_BL_SPAMCOP_NET rbleval:check_rbl('spamcop', 'bl.spamcop.net.') describe RCVD_IN_BL_SPAMCOP_NET Received via a relay in bl.spamcop.net tflags RCVD_IN_BL_SPAMCOP_NET net # NOTE: commercial tests, see README file for details header RCVD_IN_RBL rbleval:check_rbl('rbl', 'blackholes.mail-abuse.org.') describe RCVD_IN_RBL Received via RBLed relay, see http://www.mail-abuse.org/rbl/ tflags RCVD_IN_RBL net header RCVD_IN_RSS rbleval:check_rbl('relay', 'relays.mail-abuse.org.') describe RCVD_IN_RSS Received via RSSed relay, see http://www.mail-abuse.org/rss/ tflags RCVD_IN_RSS net header RCVD_IN_DUL rbleval:check_rbl('dialup', 'dialups.mail-abuse.org.') describe RCVD_IN_DUL Received from dialup, see http://www.mail-abuse.org/dul/ tflags RCVD_IN_DUL net header RCVD_IN_DUL_FH rbleval:check_rbl('dialup-firsthop', 'dialups.mail-abuse.org.') describe RCVD_IN_DUL_FH Received from first hop dialup, see http://www.mail-abuse.org/dul/ tflags RCVD_IN_DUL_FH net header RCVD_IN_NJABL rbleval:check_rbl('njabl', 'dnsbl.njabl.org.') describe RCVD_IN_NJABL Received via a relay in dnsbl.njabl.org tflags RCVD_IN_NJABL net # note: X_ prefix so X_NJABL_OPEN_PROXY happens after RCVD_IN_NJABL. header X_NJABL_OPEN_PROXY rbleval:check_rbl_results_for('njabl', '127.0.0.2') describe X_NJABL_OPEN_PROXY NJABL: sender is proxy/relay/formmail/spam-source tflags X_NJABL_OPEN_PROXY net header X_NJABL_DIALUP rbleval:check_rbl_results_for('njabl', '127.0.0.3') describe X_NJABL_DIALUP NJABL: sender is on dialup/dynamic IP tflags X_NJABL_DIALUP net header LOTS_OF_CC_LINES eval:check_lots_of_cc_lines() describe LOTS_OF_CC_LINES Lots and lots of Cc: headers header FORGED_RCVD_TRAIL eval:check_for_forged_received_trail() describe FORGED_RCVD_TRAIL trail of Received: headers seems to be forged header FORGED_AOL_RCVD eval:check_for_fake_aol_relay_in_rcvd() describe FORGED_AOL_RCVD Received forged, contains fake AOL relays header FORGED_TELESP_RCVD Received =~ /\.(?!br).. \(\d+-\d+-\d+-\d+\.dsl\.telesp\.net\.br / describe FORGED_TELESP_RCVD Contains forged hostname for a DSL IP in Brazil # a forged Hotmail message; host HELO'd as hotmail.com, but it wasn't header FORGED_HOTMAIL_RCVD eval:check_for_forged_hotmail_received_headers() describe FORGED_HOTMAIL_RCVD Forged hotmail.com 'Received:' header found # this, by comparison is more common: from was @hotmail.com, but it wasn't header SEMIFORGED_HOTMAIL_RCVD eval:check_for_no_hotmail_received_headers() describe SEMIFORGED_HOTMAIL_RCVD hotmail.com 'From' address, but no 'Received:' header FORGED_EUDORAMAIL_RCVD eval:check_for_forged_eudoramail_received_headers() describe FORGED_EUDORAMAIL_RCVD Forged eudoramail.com 'Received:' header found header FORGED_YAHOO_RCVD eval:check_for_forged_yahoo_received_headers() describe FORGED_YAHOO_RCVD 'From' yahoo.com does not match 'Received' headers header FORGED_JUNO_RCVD eval:check_for_forged_juno_received_headers() describe FORGED_JUNO_RCVD 'From' juno.com does not match 'Received' headers header FORGED_GW05_RCVD eval:check_for_forged_gw05_received_headers() describe FORGED_GW05_RCVD Forged 'by gw05' 'Received:' header found # real hotmail outgoings use 'mc1-s3.law16...' or 'f206..' etc. header FORGED_MX_HOTMAIL Received =~ /^from mx\d+\.hotmail\.com /m describe FORGED_MX_HOTMAIL Forged hotmail.com Received 'from mx' header # forgery meta-rules: more reliable than their inputs meta CONFIRMED_FORGED (FORGED_RCVD_TRAIL && (FORGED_AOL_RCVD || FORGED_HOTMAIL_RCVD || FORGED_EUDORAMAIL_RCVD || FORGED_YAHOO_RCVD || FORGED_JUNO_RCVD || FORGED_GW05_RCVD || FORGED_MX_HOTMAIL)) describe CONFIRMED_FORGED Received headers are forged meta MULTI_FORGED ((FORGED_AOL_RCVD + FORGED_HOTMAIL_RCVD + FORGED_EUDORAMAIL_RCVD + FORGED_YAHOO_RCVD + FORGED_JUNO_RCVD + FORGED_GW05_RCVD) > 1) describe MULTI_FORGED Received headers indicate multiple forgeries # Sep 23 2002 jm: another spamhaus rule header RCVD_BY_QVES_COM Received =~ /by email.qves.com with Microsoft/ describe RCVD_BY_QVES_COM Sent by a known spamhaus (qves) header NONEXISTENT_CHARSET Content-Type =~ /charset=.?DEFAULT/ describe NONEXISTENT_CHARSET Character set doesn't exist header CHARSET_FARAWAY_HEADERS eval:check_for_faraway_charset_in_headers() describe CHARSET_FARAWAY_HEADERS A foreign language charset used in headers tflags CHARSET_FARAWAY_HEADERS userconf header X_MAILER_GIBBERISH X-Mailer =~ /^[A-Fa-f0-9\.]{48,}$/ describe X_MAILER_GIBBERISH 'X-Mailer' line contains gibberish header X_PRIORITY_HIGH X-Priority =~ /^1/ describe X_PRIORITY_HIGH Sent with 'X-Priority' set to high header X_MSMAIL_PRIORITY_HIGH X-Msmail-Priority =~ /^High/ describe X_MSMAIL_PRIORITY_HIGH Sent with 'X-Msmail-Priority' set to high header MSG_ID_ADDED_BY_MTA_2 eval:check_for_mta_message_id_later() describe MSG_ID_ADDED_BY_MTA_2 'Message-Id' was added by a relay (2) header MSG_ID_ADDED_BY_MTA_3 eval:check_for_mta_message_id_first() describe MSG_ID_ADDED_BY_MTA_3 'Message-Id' was added by a relay (3) header MANY_FROMS From =~ /^[^\"\<\(]+, [^\"\<\(]+$/ describe MANY_FROMS 'From' contains more than one address # *so* many spams come from here. header BTAMAIL_HEADER ALL =~ /\bbtamail\.net\.cn/i describe BTAMAIL_HEADER Header contains an address from btamail.net.cn header USER_IN_BLACKLIST eval:check_from_in_blacklist() describe USER_IN_BLACKLIST From: address is in the user's black-list tflags USER_IN_BLACKLIST userconf header USER_IN_WHITELIST eval:check_from_in_whitelist() describe USER_IN_WHITELIST From: address is in the user's white-list tflags USER_IN_WHITELIST userconf nice # noticed this implied in passing in Dan's CVS messages. ;) no nonspam hits # but only a small number of spam ones, for me. header HTML_ALL_CAPS Content-Type =~ /TEXT\/HTML/ describe HTML_ALL_CAPS Content type is "TEXT/HTML" in all caps # this variant is local, using the Received hdr itself... header ROUND_THE_WORLD_LOCAL eval:check_for_round_the_world_received_helo() describe ROUND_THE_WORLD_LOCAL Received: says mail bounced around the world (HELO) # and this one uses a DNS reverse lookup. so now we can use a version # of this test without a net connection, or in mass-check etc. header ROUND_THE_WORLD eval:check_for_round_the_world_received_revdns() describe ROUND_THE_WORLD Received: says mail bounced around the world (DNS) tflags ROUND_THE_WORLD net # this is a quite common false positive, as it's legal to remove a To but leave # a CC. so don't score it high. header MISSING_HEADERS eval:check_for_missing_to_header() describe MISSING_HEADERS Missing To: header header SUSPICIOUS_RECIPS eval:similar_recipients('0.6','1.2') describe SUSPICIOUS_RECIPS Similar addresses in recipient list header VERY_SUSP_RECIPS eval:similar_recipients('1.2','undef') describe VERY_SUSP_RECIPS Very similar addresses in recipient list header SORTED_RECIPS eval:sorted_recipients() describe SORTED_RECIPS Recipient list is sorted by address header USER_IN_WHITELIST_TO eval:check_to_in_whitelist() describe USER_IN_WHITELIST_TO User is listed in 'whitelist_to' tflags USER_IN_WHITELIST_TO userconf nice header USER_IN_MORE_SPAM_TO eval:check_to_in_more_spam() describe USER_IN_MORE_SPAM_TO User is listed in 'more_spam_to' tflags USER_IN_MORE_SPAM_TO userconf nice header USER_IN_ALL_SPAM_TO eval:check_to_in_all_spam() describe USER_IN_ALL_SPAM_TO User is listed in 'all_spam_to' tflags USER_IN_ALL_SPAM_TO userconf nice # the old version allowed more non-letter characters to be considered # spacing, but suffered from somewhat higher false positive rates header GAPPY_SUBJECT Subject =~ /\b(?:[a-z][-_. ]{1,3}){4,}/i describe GAPPY_SUBJECT Subject: contains G.a.p.p.y-T.e.x.t ### header existence tests (description is added automatically) # X-Fix example: NTMail fixed non RFC822 compliant EMail message # # X-PMFLAGS is all caps # # Headers that seem to only be used by a single spamming software and # are found together in the same message: # 1. X-MailingID and X-ServerHost # 2. X-Stormpost-To and X-List-Unsubscribe # # not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host, # X-Message-Id # bad FP rate: Comment, Date-warning # these headers have very high correlation with spam header X_ENC_PRESENT exists:X-Encoding header X_ESMTP exists:x-esmtp header X_LIBRARY exists:X-Library header X_LIST_UNSUBSCRIBE exists:X-List-Unsubscribe header X_MAIL_ID_PRESENT exists:X-MailingID header X_PMFLAGS_PRESENT exists:X-PMFLAGS header X_PRECEDENCE_REF exists:X-Precedence-Ref header X_SERV_HOST_PRESENT exists:X-ServerHost header X_STORMPOST_TO exists:X-Stormpost-To header X_X_PRESENT exists:X-x header X_FIX_PRESENT exists:X-Fix header COMPLAIN_TO exists:Complain-To header X_VMP_TEXT exists:X-VMP-Text header X_GCMULTI exists:X-GCMulti header X_MIME_KEY exists:X-Mime-Key header MICROSOFT exists:microsoft describe X_ENC_PRESENT Message has X-Encoding header describe X_ESMTP Message has x-esmtp header describe X_LIBRARY Message has X-Library header describe X_LIST_UNSUBSCRIBE Message has X-List-Unsubscribe header describe X_MAIL_ID_PRESENT Message has X-MailingID header describe X_PMFLAGS_PRESENT Message has X-PMFLAGS header describe X_PRECEDENCE_REF Message has X-Precedence-Ref header describe X_SERV_HOST_PRESENT Message has X-ServerHost header describe X_STORMPOST_TO Message has X-Stormpost-To header describe X_X_PRESENT Message has X-x header describe X_FIX_PRESENT Message has X-Fix header describe COMPLAIN_TO Message has Complain-To header describe X_VMP_TEXT Message has X-VMP-Text header describe X_GCMULTI Message has X-GCMulti header describe X_MIME_KEY Message has X-Mime-Key header describe MICROSOFT Message has microsoft header header MIME_ODD_CASE ALL =~ /\nMiME-Version: /s describe MIME_ODD_CASE MiME-Version header (oddly capitalized) header __HAS_MIMEOLE exists:X-MimeOLE header __HAS_MSMAIL_PRI exists:X-MSMail-Priority meta MISSING_MIMEOLE (__HAS_MSMAIL_PRI && !__HAS_MIMEOLE) describe MISSING_MIMEOLE Message has X-MSMail-Priority, but no X-MimeOLE header __HAS_X_MAILER exists:X-Mailer header __HAS_OUTLOOK_IN_MAILER X-Mailer =~ /Microsoft (CDO|Outlook)\b/ meta MISSING_OUTLOOK_NAME ((__HAS_MIMEOLE || __HAS_MSMAIL_PRI) && __HAS_X_MAILER && !__HAS_OUTLOOK_IN_MAILER) describe MISSING_OUTLOOK_NAME Message looks like Outlook, but isn't header __HAS_X_PRIORITY exists:X-Priority meta PRIORITY_NO_NAME ((__HAS_X_PRIORITY || __HAS_MSMAIL_PRI) && !__HAS_X_MAILER) describe PRIORITY_NO_NAME Message has priority setting, but no X-Mailer header SUBJ_AS_SEEN Subject =~ /\bAs Seen/i describe SUBJ_AS_SEEN Subject contains "As Seen" header SUBJ_DOLLARS Subject =~ /^\$[0-9.,]+\b/ describe SUBJ_DOLLARS Subject starts with dollar amount header SUBJ_DOUBLE_YOUR Subject =~ /Double Your/i describe SUBJ_DOUBLE_YOUR Subject contains "Double Your" header SUBJ_FOR_ONLY Subject =~ /For Only/i describe SUBJ_FOR_ONLY Subject contains "For Only" header SUBJ_FREE_CAP Subject =~ /FRE{2,}|F.R.E.E\b/ describe SUBJ_FREE_CAP Subject contains "FREE" in CAPS header SUBJ_FREE_INSTANT Subject =~ /Free Instant/i describe SUBJ_FREE_INSTANT Subject contains "Free Instant" header SUB_FREE_OFFER Subject =~ /^fre{2,}\b/i describe SUB_FREE_OFFER Subject starts with "Free" header SUBJ_GUARANTEED Subject =~ /^guaranteed|(?-i:GUARANTEE)/i describe SUBJ_GUARANTEED Subject GUARANTEED header SUB_HELLO Subject =~ /^hello\b/i describe SUB_HELLO Subject starts with "Hello" header SUBJ_LIFE_INSURANCE Subject =~ /life\s+insurance/i describe SUBJ_LIFE_INSURANCE Subject includes "life insurance" header SUBJ_NOW_ONLY Subject =~ /\bNow Only/i describe SUBJ_NOW_ONLY Subject contains "Now Only" header SUBJ_RIPPED Subject =~ /Ripped & Strong/i describe SUBJ_RIPPED Subject contains "Ripped & Strong" header SUBJ_VIAGRA Subject =~ /viagra/i describe SUBJ_VIAGRA Subject includes "viagra" header SUBJ_YOUR_DEBT Subject =~ /Your (?:Bills|Debt|Credit)/i describe SUBJ_YOUR_DEBT Subject contains "Your Bills" or similar header SUBJ_YOUR_FAMILY Subject =~ /Your Family/i describe SUBJ_YOUR_FAMILY Subject contains "Your Family" header SUBJ_YOUR_OWN Subject =~ /Your Own/i describe SUBJ_YOUR_OWN Subject contains "Your Own" header VAR_REF_IN_RECEIVED Received =~ /from \$\S+ \(/ describe VAR_REF_IN_RECEIVED Received contains a $variable reference # the real services never HELO as 'foo.com', instead 'mail.foo.com' or # something like that. Note: be careful when expanding this... legit dotcom # HELOers include: hotmail.com, drizzle.com, lockergnome.com. header RCVD_FAKE_HELO_DOTCOM Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m describe RCVD_FAKE_HELO_DOTCOM Received contains a faked HELO hostname # catch spammers who HELO as a host at a big site (aol, hotmail etc.) but whose # rDNS lists them at some dialup ISP or another. header RCVD_FAKE_HELO_DOTCOM_2 eval:check_for_fake_dotcom_helo() describe RCVD_FAKE_HELO_DOTCOM_2 Received contains a faked HELO hostname (2) header USERNAME_IN_SUBJECT eval:check_for_to_in_subject() describe USERNAME_IN_SUBJECT To: username at front of subject header LOSE_POUNDS Subject =~ /\bLose .*(?:pounds|lbs|weight)/i describe LOSE_POUNDS Subject talks about losing pounds header EXTRA_MPART_TYPE Content-Type =~ /(?:\s*multipart\/)?.* type=/i describe EXTRA_MPART_TYPE Message with extraneous Content-type:...type= header header TO_RECIP_MARKER To =~ /\#recipient\#/ describe TO_RECIP_MARKER To header contains 'recipient' marker header SAVINGS Subject =~ /\bsave\s+(?:on\s+your|up\s+to|big|over|at\s+least|\d+\%|you)(?:\s|\b|$)/i describe SAVINGS Subject talks about savings # MIME boundary tests; spam tools use distinctive patterns. header MIME_BOUND_DASH_DIGIT Content-Type =~ /boundary="_-{10}=_\d{19,22}"/ describe MIME_BOUND_DASH_DIGIT Spam tool pattern in MIME boundary header MIME_BOUND_HASHES Content-type =~ /boundary=\"\#{10}\"/ describe MIME_BOUND_HASHES Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_4 Content-Type =~ /boundary=\"_----------=_\d{18}\"/ describe MIME_BOUND_DIGITS_4 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_7 Content-Type =~ /boundary=\d{9}\.\d{13}/ describe MIME_BOUND_DIGITS_7 Spam tool pattern in MIME boundary header MIME_BOUND_HEX_24 Content-Type =~ /boundary=\"[\dA-F]{24}\"/ describe MIME_BOUND_HEX_24 Spam tool pattern in MIME boundary header MIME_BOUND_MA Content-Type =~ /boundary=\"----=_[a-zA-Z0-9]{8}_[a-zA-Z0-9]{8}_MA\"/ describe MIME_BOUND_MA Spam tool pattern in MIME boundary header MIME_BOUND_MANY_HEX Content-Type =~ /boundary="[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}"/ describe MIME_BOUND_MANY_HEX Spam tool pattern in MIME boundary header __NEXTPART_ALL Content-Type =~ /NextPart/ header __NEXTPART_NORMAL Content-Type =~ /="(?:----_?=_)?NextPart_[\dA-F]{3}(_[\dA-F]{3,8})?_[\dA-F]{8}\.[\dA-F]{8}"/ meta MIME_BOUND_NEXTPART (__NEXTPART_ALL && !__NEXTPART_NORMAL) describe MIME_BOUND_NEXTPART Spam tool pattern in MIME boundary header MIME_BOUND_OPTIN Content-Type =~ /boundary=\"[A-F\d]{8}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{12}OPTIN\"/ describe MIME_BOUND_OPTIN Spam tool pattern in MIME boundary header MIME_BOUND_MAIL_BOUND Content-Type =~ /boundary=\"____MAIL_BOUNDARY____\"/ describe MIME_BOUND_MAIL_BOUND Spam tool pattern in MIME boundary header MIME_BOUND_TEP Content-Type =~ /boundary="TEP-\d{9,10}\.\d{10}\.\d{10}"/ describe MIME_BOUND_TEP Spam tool pattern in MIME boundary header MIME_BOUND_RKFINDY Content-Type =~ /boundary=\"=_NextPart_2rfkindysadvnqw3nerasdf\"/ describe MIME_BOUND_RKFINDY Spam tool pattern in MIME boundary (rfkindy) # bug 637 # more specific FROM_HAS_MIXED_NUMS, less FPs header FROM_HAS_MIXED_NUMS2 From =~ /\w{2,}\d{4,}[a-z]{1,2}\d{2,}\@/i describe FROM_HAS_MIXED_NUMS2 From address matches known spammer format header DATE_MISSING Date =~ /^UNSET$/ [if-unset: UNSET] describe DATE_MISSING Missing Date: header # freqs: 0.001 0.003 0.000 1.00 2.66 POST_IN_RCVD header POST_IN_RCVD Received =~ / Post\.(?:sk|cz)/ describe POST_IN_RCVD Received contains fake 'Post.cz' hostname header TO_INVESTORS To =~ /\bInvestors\@/ describe TO_INVESTORS To: non-existent 'Investors' address header TO_MALFORMED To !~ /(?:(?:\"[^\"]+\"|\S+)\@\S+\.\S+|undisclosed-recipients:|<\S+(?:\!\S+){1,}>|^\s*$)/ [if-unset: unset@unset.unset] describe TO_MALFORMED To: has a malformed address # jm: somehow these guys keep slipping through, and they're very persistent. # they use both azoogle.com and azogle.com, but this header is always # in this format. header AZOOGLE X-Info =~ /service to abuse\@azoogle\.com$/ describe AZOOGLE From azoogle.com, azogle.com, etc. header SUBJECT_APPROVED Subject =~ /approv(?:ed|al).?[.!*]/i describe SUBJECT_APPROVED Subject talks about being approved header SUBJ_HAS_TIME_ID Subject =~ /\sTime[: ]+\d+:\d+:\d+ [AP]M\s*$/i describe SUBJ_HAS_TIME_ID Subject has a Time ID header __OPT_HEADER_SUBJ ALL =~ /^(?:Resent-)?Subject:.*opt.?(in|out|oem|ed|ion-in|[\d@])(?:\b|\d|\@)/im header __OPT_HEADER_ALL ALL =~ /opt.?(?:in|out|oem|ed|ion-in|[\d@])(?:\b|\d|\@)/i meta OPT_HEADER (__OPT_HEADER_ALL && !__OPT_HEADER_SUBJ) describe OPT_HEADER Headers include an "opt"ed phrase # Most/all of these require that From addresses do not start with numbers. header FROM_NUM_AT_WEBMAIL From:addr =~ /^\d\S+\@(?:msn\.com|flashmail\.com|mailexcite\.com|prodigy\.net|yahoo\.\S+|hotmail\.com|eudoramail\.com|aol\.com|excite\.com|email\.com|earthlink\.net|geocities\.com|hknetmail\.com|angelfire\.com)/i describe FROM_NUM_AT_WEBMAIL From address is webmail, but starts with a number header FROM_WEBMAIL_ENDS_IN_NUMS6 From:addr =~ /\d\d\d\d\d\d\@(?:aol|msn|bigfoot|compuserve|excite|hotmail|juno|prodigy|yahoo)\.(?:com|net|org)/i describe FROM_WEBMAIL_ENDS_IN_NUMS6 From address is webmail, and ends in lots of numbers header ADDR_FREE From =~ /\b(?-i:F)ree(?-i:[ A-Z]).*