# SpamAssassin rules file: tests. ########################################################################### require_version 2.40 header NO_REAL_NAME From =~ /^["\s]*\?\s*$/ describe NO_REAL_NAME From: does not include a real name header FROM_ENDS_IN_NUMS From =~ /\d\d\@/ describe FROM_ENDS_IN_NUMS From: ends in numbers header FROM_STARTS_WITH_NUMS From =~ /^\d\d/ describe FROM_STARTS_WITH_NUMS From: starts with nums header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*\@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters # (contrib: David Hull - fixed to support "foo @ foo . com" ) header FROM_NO_USER From =~ /(?:^\@|<\@| \@[^<]*$|<>)/ [if-unset: unset@unset.unset] describe FROM_NO_USER From: has no local-part before @ sign header TO_NO_USER To =~ /(?:^\@|<\@| \@[^<]*$|<>)/ [if-unset: unset@unset.unset] describe TO_NO_USER To: has no local-part before @ sign header TO_HAS_SPACES To:addr =~ /\s/ describe TO_HAS_SPACES To: address contains spaces header TO_EMPTY To =~ /^\s*$/ [if-unset: UNSET] describe TO_EMPTY To: is empty header REPLY_TO_EMPTY Reply-To =~ /^\s*$/ [if-unset: UNSET] describe REPLY_TO_EMPTY Reply-To: is empty # You don't want to let this match "'user@foo.bar'" # (note single quotes) because some legitimate mailers seem to do that. header TO_ADDRESS_EQ_REAL To =~ /^\s*"([^"@]+\@[^"@]+)"\s+<\1>\s*$/i describe TO_ADDRESS_EQ_REAL To: repeats address as real name # NOTE: this is what 100% valid undisclosed-recipients mails look like. # If this gets a high score, that's a bug! header UNDISC_RECIPS To =~ /^undisclosed-recipients?:\s*;$/ describe UNDISC_RECIPS Valid-looking To "undisclosed-recipients" # also 100% valid header FAKED_UNDISC_RECIPS To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i describe FAKED_UNDISC_RECIPS Faked To "Undisclosed-Recipients" header PLING_QUERY Subject =~ /\?.*!|!.*\?/ describe PLING_QUERY Subject has exclamation mark and question mark header SUBJ_HAS_UNIQ_ID eval:check_for_unique_subject_id() describe SUBJ_HAS_UNIQ_ID Subject contains a unique ID header SUBJ_HAS_SPACES Subject =~ /(?: {6}|\t)\S/ describe SUBJ_HAS_SPACES Subject contains lots of white space header SUBJ_ENDS_IN_SPACE Subject =~ /(?:[ ]{6}|\t)$/ describe SUBJ_ENDS_IN_SPACE Subject ends with lots of white space header SUBJ_ALL_CAPS eval:subject_is_all_caps() describe SUBJ_ALL_CAPS Subject is all capitals header SUBJ_MISSING Subject !~ /\S/ describe SUBJ_MISSING Subject: is empty or missing # (allow this test to pass if there's no Message-Id header) header MSGID_HAS_NO_AT Message-Id !~ /\@/ [if-unset: NO@MSGID] describe MSGID_HAS_NO_AT Message-Id has no @ sign header MSGID_SPAMSIGN_1 Message-Id =~ /^<[0-9a-f]{12,12}\$[0-9a-f]{8,8}\$[0-9a-f]{8,8}\@>$/ describe MSGID_SPAMSIGN_1 Message-Id generated by a spam tool # \# is used by quite a few legit mailers; lockergnome for one header MSGID_CHARS_SPAM Message-Id =~ /[:}{,!\/]/ describe MSGID_CHARS_SPAM Message-Id has characters indicating spam header MSGID_CHARS_WEIRD Message-Id =~ /["=\\~]/ describe MSGID_CHARS_WEIRD Message-Id has characters often found in spam header INVALID_MSGID Message-Id !~ /^<(?:\S+|\".+\")\@(?:\S+|\[[0-9.]{7,15}\])>(?:\s*\([^()]*\))?\s*$/ [if-unset: ] describe INVALID_MSGID Message-Id is not valid, according to RFC 2822 header INVALID_DATE Date !~ /^\s*(?:(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?[0-3 ]?[0-9] (?:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec) (?:[12][901])?[0-9]{2} [0-2][0-9](?:\:[0-5][0-9]){1,2} (?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200] describe INVALID_DATE Invalid Date: header (not RFC 2822) # allow +1300, NZ timezone header INVALID_DATE_TZ_ABSURD Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/ describe INVALID_DATE_TZ_ABSURD Invalid Date: header (timezone does not exist) header DATE_YEAR_ZERO_FIRST Date =~ /[nbrylgptvc]\s+0\d\d\d(?:\s|$)/ describe DATE_YEAR_ZERO_FIRST Invalid Date: year begins with zero header DATE_IN_PAST_03_06 eval:check_for_shifted_date('-6', '-3') describe DATE_IN_PAST_03_06 Date: is 3 to 6 hours before Received: date header DATE_IN_PAST_06_12 eval:check_for_shifted_date('-12', '-6') describe DATE_IN_PAST_06_12 Date: is 6 to 12 hours before Received: date header DATE_IN_PAST_12_24 eval:check_for_shifted_date('-24', '-12') describe DATE_IN_PAST_12_24 Date: is 12 to 24 hours before Received: date header DATE_IN_PAST_24_48 eval:check_for_shifted_date('-48', '-24') describe DATE_IN_PAST_24_48 Date: is 24 to 48 hours before Received: date header DATE_IN_PAST_48_96 eval:check_for_shifted_date('-96', '-48') describe DATE_IN_PAST_48_96 Date: is 48 to 96 hours before Received: date header DATE_IN_PAST_96_XX eval:check_for_shifted_date('undef', '-96') describe DATE_IN_PAST_96_XX Date: is 96 hours or more before Received: date header DATE_IN_FUTURE_03_06 eval:check_for_shifted_date('3', '6') describe DATE_IN_FUTURE_03_06 Date: is 3 to 6 hours after Received: date header DATE_IN_FUTURE_06_12 eval:check_for_shifted_date('6', '12') describe DATE_IN_FUTURE_06_12 Date: is 6 to 12 hours after Received: date header DATE_IN_FUTURE_12_24 eval:check_for_shifted_date('12', '18') describe DATE_IN_FUTURE_12_24 Date: is 12 to 24 hours after Received: date header DATE_IN_FUTURE_24_48 eval:check_for_shifted_date('24', '48') describe DATE_IN_FUTURE_24_48 Date: is 24 to 48 hours after Received: date header DATE_IN_FUTURE_48_96 eval:check_for_shifted_date('48', '96') describe DATE_IN_FUTURE_48_96 Date: is 48 to 96 hours after Received: date header DATE_IN_FUTURE_96_XX eval:check_for_shifted_date('96', 'undef') describe DATE_IN_FUTURE_96_XX Date: is 96 hours or more after Received: date # bug 681: Tony L. Svanstrom: rewrote ADVERT_CODE tests header ADVERT_CODE Subject =~ /^\W*ADV\b/i describe ADVERT_CODE Subject: starts with advertising tag header ADVERT_CODE2 Subject =~ /\bADV\b/i describe ADVERT_CODE2 Subject: contains advertising tag # Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded # with quoted-printable or base64. # # \xbc\xba\xc0\xce means "adult" # \xb1\xa4\xb0\xed means "advertisement" # \xc1\xa4\xba\xb8 means "information" # \xc8\xab\xba\xb8 means "publicity" # # Each two byte sequence is one Korean letter; the spaces and periods are # sometimes used to obscure the words. \xb1\xa4\xb0\xed is the most common # tag and is sometimes very obscured so we look harder. # header KOREAN_UCE_SUBJECT Subject =~ /[({[<][. ]*(?:\xbc\xba[. ]*\xc0\xce[. ]*)?(?:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/ describe KOREAN_UCE_SUBJECT Subject: contains Korean unsolicited email tag header FRIEND_AT_PUBLIC To =~ /(?:yourdomain|you|your|public)\.(?:com|org|net)/i describe FRIEND_AT_PUBLIC sent to you@you.com or similar header DOMAIN_SUBJECT Subject =~ /(?:\s(?:\.|dot\s+)(?:info|biz|name)|domain)\b.*\b(?:extension|info|regist(?:ry|ration|er)|submission)/i describe DOMAIN_SUBJECT Subject: domain registration spam subject header ALL_CAPS_HEADER ALL =~ /\n(?:TO|FROM|SUBJECT|DATE):/s describe ALL_CAPS_HEADER Header with all capitals found header NO_MX_FOR_FROM eval:check_for_from_mx() describe NO_MX_FOR_FROM No MX records for the From: domain tflags NO_MX_FOR_FROM net header FORGED_AOL_RCVD eval:check_for_fake_aol_relay_in_rcvd() describe FORGED_AOL_RCVD Received forged, contains fake AOL relays header FROM_AND_TO_SAME_1 eval:check_for_from_to_same('exact_both') describe FROM_AND_TO_SAME_1 From and To the same (1) header FROM_AND_TO_SAME_2 eval:check_for_from_to_same('exact_none') describe FROM_AND_TO_SAME_2 From and To are same (2) header FROM_AND_TO_SAME_3 eval:check_for_from_to_same('rough_both') describe FROM_AND_TO_SAME_3 From and To are same (3) header FROM_AND_TO_SAME_5 eval:check_for_from_to_same('rough_to') describe FROM_AND_TO_SAME_5 From and To are same (5) header FROM_AND_TO_SAME_6 eval:check_for_from_to_same('rough_none') describe FROM_AND_TO_SAME_6 From and To are same (6) header BAD_HELO_WARNING eval:check_for_bad_helo() describe BAD_HELO_WARNING Fake name used in SMTP HELO command tflags BAD_HELO_WARNING net header SUBJ_FULL_OF_8BITS eval:check_subject_for_lotsa_8bit_chars() describe SUBJ_FULL_OF_8BITS Subject is full of 8-bit characters header HEADER_8BITS ALL =~ /[\x80-\xff]{3,}/ describe HEADER_8BITS Headers include 3 consecutive 8-bit characters header MDAEMON_2_7_4 Received =~ /with SMTP .MDaemon.v2.7.SP4.R./ describe MDAEMON_2_7_4 Received via buggy SMTP server (MDaemon 2.7.4SP4R) header FAKED_IP_IN_RCVD Received =~ /from [-0-9a-z\._]+_\[\d+\.\d+\.\d+\.\d+\] /i describe FAKED_IP_IN_RCVD Received: contains a name with a faked IP-address header SMTPD_IN_RCVD Received =~ /\(SMTPD32-\d+\..+\)/ describe SMTPD_IN_RCVD Received via SMTPD32 server (SMTPD32-n.n) # Multizone / Multi meaning BLs first # Osirusoft, like MAPS RBL+ is a multi-meaning BL, so it is treated separately header RCVD_IN_OSIRUSOFT_COM rbleval:check_rbl('osirusoft', 'relays.osirusoft.com.') describe RCVD_IN_OSIRUSOFT_COM Received via a relay in relays.osirusoft.com tflags RCVD_IN_OSIRUSOFT_COM net # X prefix was used to insure that it was run at the end, but it's not needed # anymore since we run the rule with rblreseval -- Marc header X_OSIRU_SPAM_SRC rbleval:check_rbl_results_for('osirusoft', '127.0.0.4') describe X_OSIRU_SPAM_SRC DNSBL: sender is Confirmed Spam Source tflags X_OSIRU_SPAM_SRC net header X_OSIRU_SPAMWARE_SITE rbleval:check_rbl_results_for('osirusoft', '127.0.0.6') describe X_OSIRU_SPAMWARE_SITE DNSBL: sender is a Spamware site or vendor tflags X_OSIRU_SPAMWARE_SITE net header X_OSIRU_DUL_FH rbleval:check_rbl('osirusoft-dul-firsthop', 'dialups.mail-abuse.org.') describe X_OSIRU_DUL_FH Received from first hop dialup listed in relays.osirusoft.com tflags X_OSIRU_DUL_FH net # Now, single zone BLs follow: # the new first arg for check_rbl() indicates what type of check it is; # each type of check is stored in a separate set, and if an IP has already # been hit in that set, it will not be checked with any other zone in # that set. header RCVD_IN_RELAYS_ORDB_ORG rbleval:check_rbl('relay', 'relays.ordb.org.') describe RCVD_IN_RELAYS_ORDB_ORG Received via a relay in relays.ordb.org tflags RCVD_IN_RELAYS_ORDB_ORG net header RCVD_IN_VISI rbleval:check_rbl('relay', 'relays.visi.com.') describe RCVD_IN_VISI Received via a relay in relays.visi.com tflags RCVD_IN_VISI net # SBL is the Spamhaus Block List: http://www.spamhaus.org/sbl/ . header RCVD_IN_SBL rbleval:check_rbl('relay', 'sbl.spamhaus.org.') describe RCVD_IN_SBL Received via SBLed relay, see http://www.spamhaus.org/sbl/ tflags RCVD_IN_SBL net header RCVD_IN_ORBS rbleval:check_rbl('relay', 'orbs.dorkslayers.com.') describe RCVD_IN_ORBS Received via a relay in orbs.dorkslayers.com tflags RCVD_IN_ORBS net # DSBL catches open relays, badly-installed CGI scripts and open SOCKS and # HTTP proxies. list.dsbl.org lists servers tested by "trusted" users, # multihop.dsbl.org lists servers which open SMTP servers relay through, # unconfirmed.dsbl.org lists servers tested by "untrusted" users. # See http://dsbl.org/ for full details. # This is effectively an open relay BL, put in in the relay set too -- Marc header RCVD_IN_DSBL rbleval:check_rbl('relay', 'list.dsbl.org') describe RCVD_IN_DSBL Received via a relay in list.dsbl.org tflags RCVD_IN_DSBL net # Warning, several big ISP's mail relays (not open to outside people) are # listed on multihop. Do not set a high score on this. # Note too that those IPs often are listed in unconfirmed.dsbl.org at the same # time, so make sure that the score FUDGE_MULTIHOP_RELAY cancels one of the # two scores -- Marc header RCVD_IN_MULTIHOP_DSBL rbleval:check_rbl('multihop', 'multihop.dsbl.org') describe RCVD_IN_MULTIHOP_DSBL Received via a relay in multihop.dsbl.org tflags RCVD_IN_MULTIHOP_DSBL net # We want to count this in the open relay set so that someone doesn't get scored # twice (at least by default) for being listed there and in some other relay BL. # Users can request a double hit and double score by changing 'relay' with # 'unconfirmed_dsbl' or something like that, but I don't think it should be # a default -- Marc header RCVD_IN_UNCONFIRMED_DSBL rbleval:check_rbl('relay', 'unconfirmed.dsbl.org') describe RCVD_IN_UNCONFIRMED_DSBL Received via a relay in unconfirmed.dsbl.org tflags RCVD_IN_UNCONFIRMED_DSBL net # Other miscellaneous RBLs are listed here: header RCVD_IN_RFCI rbleval:check_rbl('rfci', 'ipwhois.rfc-ignorant.org.') describe RCVD_IN_RFCI Received via a relay in ipwhois.rfc-ignorant.org tflags RCVD_IN_RFCI net header HABEAS_HIL rbleval:check_rbl('hil', 'hil.habeas.com.') describe HABEAS_HIL Sender is on www.habeas.com Habeas Infringer List tflags HABEAS_HIL net # NOTE: commercial test, see README file for details header RCVD_IN_BL_SPAMCOP_NET rbleval:check_rbl('spamcop', 'bl.spamcop.net.') describe RCVD_IN_BL_SPAMCOP_NET Received via a relay in bl.spamcop.net tflags RCVD_IN_BL_SPAMCOP_NET net # NOTE: commercial tests, see README file for details header RCVD_IN_RBL rbleval:check_rbl('rbl', 'blackholes.mail-abuse.org.') describe RCVD_IN_RBL Received via RBLed relay, see http://www.mail-abuse.org/rbl/ tflags RCVD_IN_RBL net header RCVD_IN_RSS rbleval:check_rbl('relay', 'relays.mail-abuse.org.') describe RCVD_IN_RSS Received via RSSed relay, see http://www.mail-abuse.org/rss/ tflags RCVD_IN_RSS net header RCVD_IN_DUL rbleval:check_rbl('dialup', 'dialups.mail-abuse.org.') describe RCVD_IN_DUL Received from dialup, see http://www.mail-abuse.org/dul/ tflags RCVD_IN_DUL net header RCVD_IN_DUL_FH rbleval:check_rbl('dialup-firsthop', 'dialups.mail-abuse.org.') describe RCVD_IN_DUL_FH Received from first hop dialup, see http://www.mail-abuse.org/dul/ tflags RCVD_IN_DUL_FH net # Now, you can apply rules to counter for the effect of two similar BLs matching # together -- Marc header FUDGE_DUL_MAPS_OSIRU rblreseval:check_two_rbl_results('osirusoft', "127.0.0.3", 'dialup', "127.0.0.3") describe FUDGE_DUL_MAPS_OSIRU Do not double penalize for MAPS DUL and Osirusoft DUL tflags FUDGE_DUL_MAPS_OSIRU net header FUDGE_RELAY_OSIRU rblreseval:check_two_rbl_results('osirusoft', "127.0.0.2", 'relay', "127.0.0.2") describe FUDGE_RELAY_OSIRU Do not double penalize for being an open relay on Osirusoft and another RBL tflags FUDGE_RELAY_OSIRU net header FUDGE_DUL_OSIRU_FH rblreseval:check_two_rbl_results('osirusoft-dul-firsthop', "127.0.0.3", 'dialup-firsthop', "127.0.0.3") describe FUDGE_DUL_OSIRU_FH Do not double compensate for MAPS DUL and Osirusoft DUL first hop dialup tflags FUDGE_DUL_OSIRU_FH net header FUDGE_MULTIHOP_RELAY rblreseval:check_two_rbl_results('multihop', "127.0.0.2", 'relay', "127.0.0.2") describe FUDGE_MULTIHOP_RELAY Do not double penalize if an IP is a multihop and an open relay tflags FUDGE_MULTIHOP_RELAY net # don't add headers without testing for false positives (usually Unix MTAs and # list software) and especially don't add From:, Reply-To:, Date:, Message-ID: # # Jul 11 2002 jm: removed some headers, as it now gets a 5:1 # false-positive-to-spam ratio! headers: Content-Transfer-Encoding, # Content-Type, X-Mailer, X-Sender. v important to test this against a good # corpus of mailing list hosts like Yahoo, or webmail systems. # header FORGED_RCVD_FOUND ALL =~ /\n(?:To|Importance|Subject|X-MSMail-Priority):.*\nReceived: /s describe FORGED_RCVD_FOUND Possibly-forged 'Received:' header found header FORGED_RCVD_TRAIL eval:check_for_forged_received_trail() describe FORGED_RCVD_TRAIL trail of Received: headers seems to be forged # obsolete; hotmail have changed their header format. #header FROM_FORGED_HOTMAIL From =~ /^[^"]+\S+\@hotmail\.com/i #describe FROM_FORGED_HOTMAIL From: claims to be hotmail but not in their format header LOTS_OF_CC_LINES eval:check_lots_of_cc_lines() describe LOTS_OF_CC_LINES Lots and lots of Cc: headers # gets very bad FP rate header FORGED_HOTMAIL_RCVD eval:check_for_forged_hotmail_received_headers() describe FORGED_HOTMAIL_RCVD Forged hotmail.com 'Received:' header found header FORGED_EUDORAMAIL_RCVD eval:check_for_forged_eudoramail_received_headers() describe FORGED_EUDORAMAIL_RCVD Forged eudoramail.com 'Received:' header found header FORGED_YAHOO_RCVD eval:check_for_forged_yahoo_received_headers() describe FORGED_YAHOO_RCVD 'From' yahoo.com does not match 'Received' headers header FORGED_JUNO_RCVD eval:check_for_forged_juno_received_headers() describe FORGED_JUNO_RCVD 'From' juno.com does not match 'Received' headers header FORGED_GW05_RCVD eval:check_for_forged_gw05_received_headers() describe FORGED_GW05_RCVD Forged 'by gw05' 'Received:' header found header NONEXISTENT_CHARSET Content-Type =~ /charset=.?DEFAULT/ describe NONEXISTENT_CHARSET Character set doesn't exist header CHARSET_FARAWAY_HEADERS eval:check_for_faraway_charset_in_headers() describe CHARSET_FARAWAY_HEADERS A foreign language charset used in headers header X_MAILER_GIBBERISH X-Mailer =~ /^[A-Fa-f0-9\.]{48,}$/ describe X_MAILER_GIBBERISH 'X-Mailer' line contains gibberish header X_PRIORITY_HIGH X-Priority =~ /^1/ describe X_PRIORITY_HIGH Sent with 'X-Priority' set to high header X_MSMAIL_PRIORITY_HIGH X-Msmail-Priority =~ /^High/ describe X_MSMAIL_PRIORITY_HIGH Sent with 'X-Msmail-Priority' set to high header MSG_ID_ADDED_BY_MTA Message-Id =~ / \(added by / describe MSG_ID_ADDED_BY_MTA 'Message-Id' was added by a relay header MSG_ID_ADDED_BY_MTA_2 eval:check_for_mta_message_id_later() describe MSG_ID_ADDED_BY_MTA_2 'Message-Id' was added by a relay (2) header MSG_ID_ADDED_BY_MTA_3 eval:check_for_mta_message_id_first() describe MSG_ID_ADDED_BY_MTA_3 'Message-Id' was added by a relay (3) header MANY_FROMS From =~ /^[^\"\<\(]+, [^\"\<\(]+$/ describe MANY_FROMS 'From' contains more than one address # *so* many spams come from here. header FROM_BTAMAIL From =~ /\@btamail\.net\.cn/i describe FROM_BTAMAIL From an address @btamail.net.cn header USER_IN_BLACKLIST eval:check_from_in_blacklist() describe USER_IN_BLACKLIST From: address is in the user's black-list tflags USER_IN_BLACKLIST userconf header USER_IN_WHITELIST eval:check_from_in_whitelist() describe USER_IN_WHITELIST From: address is in the user's white-list tflags USER_IN_WHITELIST userconf # bad test: no hits #header DATE_MISSING Date =~ /^UNSET$/ [if-unset: UNSET] #describe DATE_MISSING Missing Date: header # Steve Linford via Charlie Watts: good test! header __RCVD_BY_HOTMAIL Received =~ / by hotmail.com / header __CTYPE_IS_HTML Content-Type =~ /^text\/html\b/ meta CTYPE_JUST_HTML (!__RCVD_BY_HOTMAIL && __CTYPE_IS_HTML) describe CTYPE_JUST_HTML HTML-only mail, with no text version header ROUND_THE_WORLD eval:check_for_round_the_world_received() describe ROUND_THE_WORLD Received: says mail bounced all around the world tflags ROUND_THE_WORLD net # this is a quite common false positive, as it's legal to remove a To but leave # a CC. so don't score it high. header MISSING_HEADERS eval:check_for_missing_to_header() describe MISSING_HEADERS Missing To: header header SUSPICIOUS_RECIPS ToCc =~ /(@[-a-z0-9_.]{2,}).*(?:\1.*){8,}/is describe SUSPICIOUS_RECIPS To: and Cc: contain similar domains at least 8 times header VERY_SUSP_RECIPS ToCc =~ /\b([a-z][a-z])[^@,<>\(\) ]{0,20}(@[-a-z0-9_\.]{3}).{0,80}?(?:\1[^@,<>\(\) ]{0,20}\2.{0,80}?){5,}/is describe VERY_SUSP_RECIPS To: and Cc: contain similar usernames at least 5 times header USER_IN_WHITELIST_TO eval:check_to_in_whitelist() describe USER_IN_WHITELIST_TO User is listed in 'whitelist_to' tflags USER_IN_WHITELIST_TO userconf header USER_IN_MORE_SPAM_TO eval:check_to_in_more_spam() describe USER_IN_MORE_SPAM_TO User is listed in 'more_spam_to' tflags USER_IN_MORE_SPAM_TO userconf header USER_IN_ALL_SPAM_TO eval:check_to_in_all_spam() describe USER_IN_ALL_SPAM_TO User is listed in 'all_spam_to' tflags USER_IN_ALL_SPAM_TO userconf header GAPPY_SUBJECT Subject =~ /\b(?:[a-z][-_\.\,\:\;\'\~\s]{1,3}){4,}/i describe GAPPY_SUBJECT 'Subject' contains G.a.p.p.y-T.e.x.t ### header existence tests (description is added automatically) # X-Fix example: NTMail fixed non RFC822 compliant EMail message # # X-PMFLAGS is all caps # # Headers that seem to only be used by a single spamming software and # are found together in the same message: # 1. X-MailingID and X-ServerHost # 2. X-Stormpost-To and X-List-Unsubscribe # # not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host, # X-Message-Id # these headers have very high correlation with spam header COMMENT exists:Comment header DATE_WARNING exists:Date-warning header X_ENC_PRESENT exists:X-Encoding header X_ESMTP exists:x-esmtp header X_LIBRARY exists:X-Library header X_LIST_UNSUBSCRIBE exists:X-List-Unsubscribe header X_MAIL_ID_PRESENT exists:X-MailingID header X_PMFLAGS_PRESENT exists:X-PMFLAGS header X_PRECEDENCE_REF exists:X-Precedence-Ref header X_SERV_HOST_PRESENT exists:X-ServerHost header X_SMTPEXP_REGISTRATION exists:X-SMTPExp-Registration header X_SMTPEXP_VERSION exists:X-SMTPExp-Version header X_STORMPOST_TO exists:X-Stormpost-To header X_X_PRESENT exists:X-x header X_FIX_PRESENT exists:X-Fix header COMPLAIN_TO exists:Complain-To header MIME_ODD_CASE ALL =~ /\nMiME-Version: /s describe MIME_ODD_CASE MiME-Version header (oddly capitalized) header __HAS_MIMEOLE exists:X-MimeOLE header __HAS_MSMAIL_PRI exists:X-MSMail-Priority meta MISSING_MIMEOLE (__HAS_MSMAIL_PRI && !__HAS_MIMEOLE) describe MISSING_MIMEOLE Message has X-MSMail-Priority, but no X-MimeOLE header __HAS_X_MAILER exists:X-Mailer header __HAS_OUTLOOK_IN_MAILER X-Mailer !~ /Microsoft Outlook/ meta MISSING_OUTLOOK_NAME ((__HAS_MIMEOLE || __HAS_MSMAIL_PRI) && __HAS_X_MAILER && __HAS_OUTLOOK_IN_MAILER) describe MISSING_OUTLOOK_NAME Message looks like Outlook, but isn't header __HAS_X_PRIORITY exists:X-Priority meta PRIORITY_NO_NAME ((__HAS_X_PRIORITY || __HAS_MSMAIL_PRI) && !__HAS_X_MAILER) describe PRIORITY_NO_NAME Message has priority setting, but no X-Mailer # commented out due to huge number of FPs, bug #689 # is this test worth keeping for the future (multi-match or decision tree)? header SUBJ_FREE_CAP Subject =~ /FRE{2,}|F.R.E.E\b/ describe SUBJ_FREE_CAP Subject contains "FREE" in CAPS header SUBJ_DOLLARS Subject =~ /^\$[0-9.,]+\b/ describe SUBJ_DOLLARS Subject starts with dollar amount header SUBJ_GUARANTEED Subject =~ /^guaranteed|(?-i:GUARANTEE)/i describe SUBJ_GUARANTEED Subject GUARANTEED header SUB_FREE_OFFER Subject =~ /^fre{2,}\b/i describe SUB_FREE_OFFER Subject starts with "Free" header SUB_HELLO Subject =~ /^hello\b/i describe SUB_HELLO Subject starts with "Hello" header VAR_REF_IN_RECEIVED Received =~ /from \$\S+ \(/ describe VAR_REF_IN_RECEIVED Received contains a $variable reference # the real services never HELO as 'foo.com', instead 'mail.foo.com' or # something like that. Note: be careful when expanding this... hotmail.com is # legit; so is lockergnome.com. header RCVD_FAKE_HELO_DOTCOM Received =~ /^from (msn|yahoo|yourwebsite|lycos|excite|aol|drizzle|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail)\.com \(/ describe RCVD_FAKE_HELO_DOTCOM Received contains a faked HELO hostname header USERNAME_IN_SUBJECT eval:check_for_to_in_subject(1) describe USERNAME_IN_SUBJECT To: username at front of subject header USERNAME_IN_SUBJECT_2 eval:check_for_to_in_subject(2) describe USERNAME_IN_SUBJECT_2 To: username at front of subject (2) header USERNAME_IN_SUBJECT_3 eval:check_for_to_in_subject(3) describe USERNAME_IN_SUBJECT_3 To: username at front of subject (3) header USERNAME_IN_SUBJECT_4 eval:check_for_to_in_subject(4) describe USERNAME_IN_SUBJECT_4 To: username at front of subject (4) header USERNAME_IN_SUBJECT_5 eval:check_for_to_in_subject(5) describe USERNAME_IN_SUBJECT_5 To: username at front of subject (5) header USERNAME_IN_SUBJECT_6 eval:check_for_to_in_subject(6) describe USERNAME_IN_SUBJECT_6 To: username at front of subject (6) header LOSE_POUNDS Subject =~ /\bLose .*(?:pounds|lbs|weight)/i describe LOSE_POUNDS Subject talks about losing pounds header EXTRA_MPART_TYPE Content-Type =~ /(?:\s*multipart\/)?.* type=/i describe EXTRA_MPART_TYPE Message with extraneous Content-type:...type= header header TO_RECIP_MARKER To =~ /\#recipient\#/ describe TO_RECIP_MARKER To header contains 'recipient' marker header SAVINGS Subject =~ /\bsave\s+(?:on\s+your|up\s+to|big|over|at\s+least|\d+\%|you)\b/i describe SAVINGS Subject talks about savings # MIME boundary tests; spam tools use distinctive patterns. header MIME_BOUND_HASHES Content-type =~ /boundary=\"\#{10}\"/ describe MIME_BOUND_HASHES Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_1 Content-Type =~ /boundary=\"----------=_\d{10}-\d{4}-\d{2}\"/ describe MIME_BOUND_DIGITS_1 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_2 Content-Type =~ /boundary=\"----------=_\d{10}-\d{5}-\d\"/ describe MIME_BOUND_DIGITS_2 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_3 Content-Type =~ /boundary=\"----0{63}\"/ describe MIME_BOUND_DIGITS_3 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_4 Content-Type =~ /boundary=\"_----------=_\d{18}\"/ describe MIME_BOUND_DIGITS_4 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_5 Content-Type =~ /boundary=\"\d-\d{9}-\d{10}=:\d{5}\"/ describe MIME_BOUND_DIGITS_5 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_7 Content-Type =~ /boundary=\d{9}\.\d{13}/ describe MIME_BOUND_DIGITS_7 Spam tool pattern in MIME boundary header MIME_BOUND_HEX_24 Content-Type =~ /boundary=\"[\dA-F]{24}\"/ describe MIME_BOUND_HEX_24 Spam tool pattern in MIME boundary header MIME_BOUND_MA Content-Type =~ /boundary=\"----=_[a-zA-Z0-9]{8}_[a-zA-Z0-9]{8}_MA\"/ describe MIME_BOUND_MA Spam tool pattern in MIME boundary header MIME_BOUND_SEP1 Content-Type =~ /boundary=\"--==--==_Separator1\"/ describe MIME_BOUND_SEP1 Spam tool pattern in MIME boundary header MIME_BOUND_HEX14 Content-Type =~ /boundary=\"==[0-9a-f]{14}\"/ describe MIME_BOUND_HEX14 Spam tool pattern in MIME boundary header MIME_BOUND_OPTIN Content-Type =~ /boundary=\"[A-F\d]{8}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{12}OPTIN\"/ describe MIME_BOUND_OPTIN Spam tool pattern in MIME boundary header MIME_BOUND_EQS_DASHES Content-Type =~ /boundary=\"==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==\"/ describe MIME_BOUND_EQS_DASHES Spam tool pattern in MIME boundary header MIME_BOUND_MIME_BOUND Content-Type =~ /boundary=\"MIME_BOUNDARY-\d{4}-\d-\d{10}\"/ describe MIME_BOUND_MIME_BOUND Spam tool pattern in MIME boundary header MIME_BOUND_MAIL_BOUND Content-Type =~ /boundary=\"____MAIL_BOUNDARY____\"/ describe MIME_BOUND_MAIL_BOUND Spam tool pattern in MIME boundary # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=699 header MICROSOFT exists:microsoft describe MICROSOFT A "microsoft" header was found # bug 637 # more specific FROM_HAS_MIXED_NUMS, less FPs header FROM_HAS_MIXED_NUMS2 From =~ /\w{2,}\d{4,}[a-z]{1,2}\d{2,}\@/i describe FROM_HAS_MIXED_NUMS2 From address matches known spammer format header MAY_BE_FORGED Received =~ /\(may be forged\)/i describe MAY_BE_FORGED 'Received:' has 'may be forged' warning header DATE_MISSING Date =~ /^UNSET$/ [if-unset: UNSET] describe DATE_MISSING Missing Date: header # freqs: 0.001 0.003 0.000 1.00 2.66 POST_IN_RCVD header POST_IN_RCVD Received =~ / Post\.(?:sk|cz)/ describe POST_IN_RCVD Received contains fake 'Post.cz' hostname header FROM_UGETMORE From =~ /\@ugetmore4less.net/i describe FROM_UGETMORE From an address @ugetmore4less.net header FROM_TOPICA From =~ /\@(?:\w\.)*email-publisher.com/i describe FROM_TOPICA From an address @email-publisher.com