# SpamAssassin rules file: header tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # This program is free software; you can redistribute it and/or modify # it under the terms of either the Artistic License or the GNU General # Public License as published by the Free Software Foundation; either # version 1 of the License, or (at your option) any later version. # # See the file "License" in the top level of the SpamAssassin source # distribution for more details. # ########################################################################### require_version @@VERSION@@ header NO_REAL_NAME From =~ /^["\s]*\?\s*$/ describe NO_REAL_NAME From: does not include a real name header FROM_ENDS_IN_NUMS From =~ /\d\d\@/ describe FROM_ENDS_IN_NUMS From: ends in numbers header FROM_STARTS_WITH_NUMS From =~ /^\d\d/ describe FROM_STARTS_WITH_NUMS From: starts with nums header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*\@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters # bug 637, more specific FROM_HAS_MIXED_NUMS, less FPs header FROM_HAS_MIXED_NUMS2 From =~ /\w{2,}\d{4,}[a-z]{1,2}\d{2,}\@/i describe FROM_HAS_MIXED_NUMS2 From address matches known spammer format # idea from Robert Menschel header FROM_HAS_MIXED_NUMS3 From:addr =~ /^[a-z]+\d+[a-z]+\d+[a-z]+\w*\@/i describe FROM_HAS_MIXED_NUMS3 From: contains numbers mixed in with letters # Faked addresses tend to come from big public sites. Stats show that # 5 digits is enough to get a 1.0 s/o ratio; 4 is too low (probably due # to folks called "jmason2002@yahoo.com" for example). header ADDR_NUMS_AT_BIGSITE ALL =~ /^(To|From|Cc|Reply-To):\s*)/ [if-unset: unset@unset.unset] describe FROM_NO_USER From: has no local-part before @ sign header TO_NO_USER To =~ /(?:^\@|<\@| \@[^\)<]*$|<>)/ [if-unset: unset@unset.unset] describe TO_NO_USER To: has no local-part before @ sign header TO_HAS_SPACES To:addr =~ /\s/ describe TO_HAS_SPACES To: address contains spaces header TO_EMPTY To =~ /^\s*$/ [if-unset: UNSET] describe TO_EMPTY To: is empty header REPLY_TO_EMPTY Reply-To =~ /^\s*$/ [if-unset: UNSET] describe REPLY_TO_EMPTY Reply-To: is empty header REPLY_TO_ULINE_NUMS Reply-To =~ /_\S?(?:[a-z]+\w*?\d+|\d+\w*?[a-z]+)\w*\@/i [if-unset: UNSET] describe REPLY_TO_ULINE_NUMS Reply-To: has an underline and numbers/letters # You don't want to let this match "'user@foo.bar'" # (note single quotes) because some legitimate mailers seem to do that. header TO_ADDRESS_EQ_REAL To =~ /^\s*"([^"@]+\@[^"@]+)"\s+<\1>\s*$/i describe TO_ADDRESS_EQ_REAL To: repeats address as real name # NOTE: this is what 100% valid undisclosed-recipients mails look like. # If this gets a high score, that's a bug! header UNDISC_RECIPS To =~ /^undisclosed-recipients?:\s*;$/ describe UNDISC_RECIPS Valid-looking To "undisclosed-recipients" # also 100% valid header FAKED_UNDISC_RECIPS To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i describe FAKED_UNDISC_RECIPS Faked To "Undisclosed-Recipients" header PLING_QUERY Subject =~ /\?.*!|!.*\?/ describe PLING_QUERY Subject has exclamation mark and question mark header SUBJ_HAS_UNIQ_ID eval:check_for_unique_subject_id() describe SUBJ_HAS_UNIQ_ID Subject contains a unique ID header SUBJ_HAS_SPACES Subject =~ /(?:\s{6}|\t\s|\s\t)\S/ describe SUBJ_HAS_SPACES Subject contains lots of white space header SUBJ_ALL_CAPS eval:subject_is_all_caps() describe SUBJ_ALL_CAPS Subject is all capitals # (allow this test to pass if there's no Message-Id header) header MSGID_HAS_NO_AT MESSAGEID !~ /\@/ [if-unset: NO@MSGID] describe MSGID_HAS_NO_AT Message-Id has no @ sign header MSGID_SPAM_1 MESSAGEID =~ /<[0-9a-f]{12,12}\$[0-9a-f]{8,8}\$[0-9a-f]{8,8}\@>/ describe MSGID_SPAM_1 Message-Id generated by a spam tool # a good spamsign from another list header MSGID_SPAM_6LETTER MESSAGEID =~ /<[0-9][0-9][0-9][a-f]..[a-f]..[a-f].[a-f]\$[0-9a-f]{4}[a-f].[a-f].\$.[a-f][a-f]..[a-f][a-f].\@[a-z]{6}>/ describe MSGID_SPAM_6LETTER Spam tool Message-Id: (6-letter variant) header MSGID_SPAM_99X9XX99 MESSAGEID =~ /^<\d\d\d\d\d\d[a-z]\d[a-z][a-z]\d\d\$[a-z][a-z][a-z]\d\d\d\d\d\$\d\d\d\d\d\d\d\d\@/ describe MSGID_SPAM_99X9XX99 Spam tool Message-Id: (99x9xx99 variant) header MSGID_SPAM_ZEROES MESSAGEID =~ /<0000[0-9a-f]{8}\$0000[0-9a-f]{4}\$0000[0-9a-f]{4}\@/ describe MSGID_SPAM_ZEROES Spam tool Message-Id: (12-zeroes variant) header MSGID_3_DOLLARS MESSAGEID =~ /^(?:$|\s)/m describe MSGID_NO_HOST Message-Id has no hostname header MSGID_OUTLOOK_INVALID eval:check_outlook_message_id() describe MSGID_OUTLOOK_INVALID Message-Id is fake (in Outlook Express format) ########################################################################### header MSGID_FROM_MTA_SHORT eval:mta_added_message_id('short') describe MSGID_FROM_MTA_SHORT Message-Id was added by a relay header MSGID_FROM_MTA_LATER eval:mta_added_message_id('later') describe MSGID_FROM_MTA_LATER Message-Id was added by a relay header MSGID_FROM_MTA_BACKUP eval:mta_added_message_id('backup') describe MSGID_FROM_MTA_BACKUP Message-Id was added by a relay tflags MSGID_FROM_MTA_BACKUP net header __MSGID_BEFORE_RECEIVED ALL =~ /\nMessage-Id:.*\nReceived:/si header __MSGID_BEFORE_OKAY Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/ meta MSGID_FROM_MTA_HEADER (__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY) describe MSGID_FROM_MTA_HEADER Message-Id was added by a relay header MSGID_FROM_MTA_HOTMAIL Message-Id =~ // describe MSGID_FROM_MTA_HOTMAIL Message-Id was added by a hotmail.com relay ########################################################################### header DATE_SPAMWARE_Y2K Date =~ /^[A-Z][a-z]{2}, \d\d [A-Z][a-z]{2} [0-6]\d \d\d:\d\d:\d\d [A-Z]{3}$/ describe DATE_SPAMWARE_Y2K Date header uses unusual Y2K formatting header INVALID_DATE Date !~ /^\s*(?:(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?[0-3 ]?[0-9] (?:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec) (?:[12][901])?[0-9]{2} [0-2][0-9](?:\:[0-5][0-9]){1,2} (?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200] describe INVALID_DATE Invalid Date: header (not RFC 2822) # allow +1300, NZ timezone header INVALID_DATE_TZ_ABSURD Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/ describe INVALID_DATE_TZ_ABSURD Invalid Date: header (timezone does not exist) header DATE_YEAR_ZERO_FIRST Date =~ /[nbrylgptvc]\s+0\d\d\d(?:\s|$)/ describe DATE_YEAR_ZERO_FIRST Invalid Date: year begins with zero header DATE_IN_PAST_03_06 eval:check_for_shifted_date('-6', '-3') describe DATE_IN_PAST_03_06 Date: is 3 to 6 hours before Received: date header DATE_IN_PAST_06_12 eval:check_for_shifted_date('-12', '-6') describe DATE_IN_PAST_06_12 Date: is 6 to 12 hours before Received: date header DATE_IN_PAST_12_24 eval:check_for_shifted_date('-24', '-12') describe DATE_IN_PAST_12_24 Date: is 12 to 24 hours before Received: date header DATE_IN_PAST_24_48 eval:check_for_shifted_date('-48', '-24') describe DATE_IN_PAST_24_48 Date: is 24 to 48 hours before Received: date header DATE_IN_PAST_48_96 eval:check_for_shifted_date('-96', '-48') describe DATE_IN_PAST_48_96 Date: is 48 to 96 hours before Received: date header DATE_IN_PAST_96_XX eval:check_for_shifted_date('undef', '-96') describe DATE_IN_PAST_96_XX Date: is 96 hours or more before Received: date header DATE_IN_FUTURE_03_06 eval:check_for_shifted_date('3', '6') describe DATE_IN_FUTURE_03_06 Date: is 3 to 6 hours after Received: date header DATE_IN_FUTURE_06_12 eval:check_for_shifted_date('6', '12') describe DATE_IN_FUTURE_06_12 Date: is 6 to 12 hours after Received: date header DATE_IN_FUTURE_12_24 eval:check_for_shifted_date('12', '24') describe DATE_IN_FUTURE_12_24 Date: is 12 to 24 hours after Received: date header DATE_IN_FUTURE_24_48 eval:check_for_shifted_date('24', '48') describe DATE_IN_FUTURE_24_48 Date: is 24 to 48 hours after Received: date header DATE_IN_FUTURE_48_96 eval:check_for_shifted_date('48', '96') describe DATE_IN_FUTURE_48_96 Date: is 48 to 96 hours after Received: date header DATE_IN_FUTURE_96_XX eval:check_for_shifted_date('96', 'undef') describe DATE_IN_FUTURE_96_XX Date: is 96 hours or more after Received: date header ADVERT_CODE Subject =~ /^\W*ADV\b/i describe ADVERT_CODE Subject: starts with advertising tag header ADVERT_CODE2 Subject =~ /\w.*\b(?!ADV\.)A\s*D\s*V\b/i describe ADVERT_CODE2 Subject: contains advertising tag ########################################################################### # illegal characters that should be MIME encoded # might want to exempt users using languages that don't use Latin # alphabets, but do it in the eval header SUBJ_ILLEGAL_CHARS eval:check_illegal_chars('Subject','0.00','2') describe SUBJ_ILLEGAL_CHARS Subject contains too many raw illegal characters header FROM_ILLEGAL_CHARS eval:check_illegal_chars('From','0.20','2') describe FROM_ILLEGAL_CHARS From contains too many raw illegal characters header HEAD_ILLEGAL_CHARS eval:check_illegal_chars('ALL','0.005','2') describe HEAD_ILLEGAL_CHARS Header contains too many raw illegal characters ########################################################################### # ADV tags in non-English languages # alan premselaar , see SpamAssassin-talk list 2003-03 # quinlan: 2003-03-23 here are more generic Japanese iso-2022-jp codes # ("not yet acceptance" or "email") + "announcement" # FWIW, according to Peter Evans, this should be sufficient to catch the # UCE tag and a common attempt at evasion (using the "sue" instead of # "mi" Chinese character). header JAPANESE_UCE_SUBJECT Subject =~ /\e\$B.*(?:L\$>5Bz|EE;R%a!<%k)9-9p/ describe JAPANESE_UCE_SUBJECT Subject contains a Japanese UCE tag # quinlan: "advertisement" in Russian KOI8-R header RUSSIAN_UCE_SUBJECT Subject =~ /\xf0\xe5\xea\xeb\xe0\xec\xf3/ describe RUSSIAN_UCE_SUBJECT Subject contains a Russian UCE tag # Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded # with quoted-printable or base64. # # \xbc\xba\xc0\xce means "adult" # \xb1\xa4\xb0\xed means "advertisement" # \xc1\xa4\xba\xb8 means "information" # \xc8\xab\xba\xb8 means "publicity" # # Each two byte sequence is one Korean letter; the spaces and periods are # sometimes used to obscure the words. \xb1\xa4\xb0\xed is the most common # tag and is sometimes very obscured so we look harder. # header KOREAN_UCE_SUBJECT Subject =~ /[({[<][. ]*(?:\xbc\xba[. ]*\xc0\xce[. ]*)?(?:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/ describe KOREAN_UCE_SUBJECT Subject: contains Korean unsolicited email tag ########################################################################### header FRIEND_AT_PUBLIC To =~ /(?:yourdomain|you|your|(? 1) describe MULTI_FORGED Received headers indicate multiple forgeries # Sep 23 2002 jm: another spamhaus rule header RCVD_BY_QVES_COM Received =~ /by email.qves.com with Microsoft/ describe RCVD_BY_QVES_COM Sent by a known spamhaus (qves) header NONEXISTENT_CHARSET Content-Type =~ /charset=.?DEFAULT/ describe NONEXISTENT_CHARSET Character set doesn't exist header CHARSET_FARAWAY_HEADER eval:check_for_faraway_charset_in_headers() describe CHARSET_FARAWAY_HEADER A foreign language charset used in headers tflags CHARSET_FARAWAY_HEADER userconf header X_MAILER_GIBBERISH X-Mailer =~ /^[A-Fa-f0-9\.]{48,}$/ describe X_MAILER_GIBBERISH 'X-Mailer' line contains gibberish header X_PRIORITY_HIGH X-Priority =~ /^1/ describe X_PRIORITY_HIGH Sent with 'X-Priority' set to high header X_MSMAIL_PRIORITY_HIGH X-Msmail-Priority =~ /^High/ describe X_MSMAIL_PRIORITY_HIGH Sent with 'X-Msmail-Priority' set to high header MANY_FROMS From =~ /^[^\"\<\(]+, [^\"\<\(]+$/ describe MANY_FROMS 'From' contains more than one address # *so* many spams come from here. header BTAMAIL_HEADER ALL =~ /\bbtamail\.net\.cn/i describe BTAMAIL_HEADER Header contains an address from btamail.net.cn header USER_IN_BLACKLIST eval:check_from_in_blacklist() describe USER_IN_BLACKLIST From: address is in the user's black-list tflags USER_IN_BLACKLIST userconf header USER_IN_WHITELIST eval:check_from_in_whitelist() describe USER_IN_WHITELIST From: address is in the user's white-list tflags USER_IN_WHITELIST userconf nice header USER_IN_DEF_WHITELIST eval:check_from_in_default_whitelist() describe USER_IN_DEF_WHITELIST From: address is in the default white-list tflags USER_IN_DEF_WHITELIST userconf nice # noticed this implied in passing in Dan's CVS messages. ;) no nonspam hits # but only a small number of spam ones, for me. header HTML_ALL_CAPS Content-Type =~ /TEXT\/HTML/ describe HTML_ALL_CAPS Content type is "TEXT/HTML" in all caps # this variant is local, using the Received hdr itself... header ROUND_THE_WORLD_LOCAL eval:check_for_round_the_world_received_helo() describe ROUND_THE_WORLD_LOCAL Received: says mail sent around the world (HELO) # and this one uses a DNS reverse lookup. so now we can use a version # of this test without a net connection, or in mass-check etc. header ROUND_THE_WORLD eval:check_for_round_the_world_received_revdns() describe ROUND_THE_WORLD Received: says mail sent around the world (DNS) tflags ROUND_THE_WORLD net # this is a quite common false positive, as it's legal to remove a To but leave # a CC. so don't score it high. header MISSING_HEADERS eval:check_for_missing_to_header() describe MISSING_HEADERS Missing To: header header SUSPICIOUS_RECIPS eval:similar_recipients('0.6','1.2') describe SUSPICIOUS_RECIPS Similar addresses in recipient list header VERY_SUSP_RECIPS eval:similar_recipients('1.2','undef') describe VERY_SUSP_RECIPS Very similar addresses in recipient list header SORTED_RECIPS eval:sorted_recipients() describe SORTED_RECIPS Recipient list is sorted by address header USER_IN_BLACKLIST_TO eval:check_to_in_blacklist() describe USER_IN_BLACKLIST_TO User is listed in 'blacklist_to' tflags USER_IN_BLACKLIST_TO userconf nice header USER_IN_WHITELIST_TO eval:check_to_in_whitelist() describe USER_IN_WHITELIST_TO User is listed in 'whitelist_to' tflags USER_IN_WHITELIST_TO userconf nice header USER_IN_MORE_SPAM_TO eval:check_to_in_more_spam() describe USER_IN_MORE_SPAM_TO User is listed in 'more_spam_to' tflags USER_IN_MORE_SPAM_TO userconf nice header USER_IN_ALL_SPAM_TO eval:check_to_in_all_spam() describe USER_IN_ALL_SPAM_TO User is listed in 'all_spam_to' tflags USER_IN_ALL_SPAM_TO userconf nice header GAPPY_SUBJECT Subject =~ /\b(?:[a-z]([-_. =~\/:,*!\@\#\$\%\^&+;\"\'<>\\])\1{0,2}){4,}/i describe GAPPY_SUBJECT Subject: contains G.a.p.p.y-T.e.x.t ### header existence tests (description is added automatically) # X-Fix example: NTMail fixed non RFC822 compliant EMail message # # X-PMFLAGS is all caps # # Headers that seem to only be used by a single spamming software and # are found together in the same message: # 1. X-MailingID and X-ServerHost # 2. X-Stormpost-To and X-List-Unsubscribe # # not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host, # X-Message-Id # bad FP rate: Comment, Date-warning # slower version with same stats as of 2003-05-12 # header X_LIST_UNSUBSCRIBE X-List-Unsubscribe:addr =~ /\d{3}.*\d{3}.*\@/ header X_LIST_UNSUBSCRIBE exists:X-List-Unsubscribe describe X_LIST_UNSUBSCRIBE Message has X-List-Unsubscribe header # these headers have very high correlation with spam header X_ENC_PRESENT exists:X-Encoding header X_ESMTP exists:x-esmtp header X_LIBRARY exists:X-Library header X_MAIL_ID_PRESENT exists:X-MailingID header X_PMFLAGS_PRESENT exists:X-PMFLAGS header X_PRECEDENCE_REF exists:X-Precedence-Ref header X_SERV_HOST_PRESENT exists:X-ServerHost header X_STORMPOST_TO exists:X-Stormpost-To header X_X_PRESENT exists:X-x header X_FIX_PRESENT exists:X-Fix header COMPLAIN_TO exists:Complain-To header X_VMP_TEXT exists:X-VMP-Text header X_GCMULTI exists:X-GCMulti header X_MIME_KEY exists:X-Mime-Key header MICROSOFT exists:microsoft describe X_ENC_PRESENT Message has X-Encoding header describe X_ESMTP Message has x-esmtp header describe X_LIBRARY Message has X-Library header describe X_MAIL_ID_PRESENT Message has X-MailingID header describe X_PMFLAGS_PRESENT Message has X-PMFLAGS header describe X_PRECEDENCE_REF Message has X-Precedence-Ref header describe X_SERV_HOST_PRESENT Message has X-ServerHost header describe X_STORMPOST_TO Message has X-Stormpost-To header describe X_X_PRESENT Message has X-x header describe X_FIX_PRESENT Message has X-Fix header describe COMPLAIN_TO Message has Complain-To header describe X_VMP_TEXT Message has X-VMP-Text header describe X_GCMULTI Message has X-GCMulti header describe X_MIME_KEY Message has X-Mime-Key header describe MICROSOFT Message has microsoft header header MIME_ODD_CASE ALL =~ /\nMiME-Version: /s describe MIME_ODD_CASE MiME-Version header (oddly capitalized) header __HAS_MIMEOLE exists:X-MimeOLE header __HAS_MSMAIL_PRI exists:X-MSMail-Priority header __HAS_SQUIRRELMAIL_IN_MAILER X-Mailer =~ /SquirrelMail\b/ meta MISSING_MIMEOLE (__HAS_MSMAIL_PRI && !__HAS_MIMEOLE && !__HAS_SQUIRRELMAIL_IN_MAILER) describe MISSING_MIMEOLE Message has X-MSMail-Priority, but no X-MimeOLE header __HAS_X_MAILER exists:X-Mailer header __HAS_OUTLOOK_IN_MAILER X-Mailer =~ /Microsoft (CDO|Outlook|Office Outlook)\b/ meta MISSING_OUTLOOK_NAME ((__HAS_MIMEOLE || __HAS_MSMAIL_PRI) && __HAS_X_MAILER && !__HAS_OUTLOOK_IN_MAILER && !__HAS_SQUIRRELMAIL_IN_MAILER) describe MISSING_OUTLOOK_NAME Message looks like Outlook, but isn't header __HAS_X_PRIORITY exists:X-Priority meta PRIORITY_NO_NAME ((__HAS_X_PRIORITY || __HAS_MSMAIL_PRI) && !__HAS_X_MAILER) describe PRIORITY_NO_NAME Message has priority setting, but no X-Mailer header SUBJ_AS_SEEN Subject =~ /\bAs Seen/i describe SUBJ_AS_SEEN Subject contains "As Seen" header SUBJ_DOLLARS Subject =~ /^\$[0-9.,]+\b/ describe SUBJ_DOLLARS Subject starts with dollar amount header SUBJ_DOUBLE_YOUR Subject =~ /Double Your/i describe SUBJ_DOUBLE_YOUR Subject contains "Double Your" header SUBJ_FOR_ONLY Subject =~ /For Only/i describe SUBJ_FOR_ONLY Subject contains "For Only" header SUBJ_FREE_CAP Subject =~ /FRE{2,}|F.R.E.E\b/ describe SUBJ_FREE_CAP Subject contains "FREE" in CAPS header SUBJ_FREE_INSTANT Subject =~ /Free Instant/i describe SUBJ_FREE_INSTANT Subject contains "Free Instant" header SUB_FREE_OFFER Subject =~ /^fre{2,}\b/i describe SUB_FREE_OFFER Subject starts with "Free" header SUBJ_GUARANTEED Subject =~ /^guaranteed|(?-i:GUARANTEE)/i describe SUBJ_GUARANTEED Subject GUARANTEED header SUB_HELLO Subject =~ /^hello\b/i describe SUB_HELLO Subject starts with "Hello" header SUBJ_LIFE_INSURANCE Subject =~ /life\s+insurance/i describe SUBJ_LIFE_INSURANCE Subject includes "life insurance" header SUBJ_NOW_ONLY Subject =~ /\bNow Only/i describe SUBJ_NOW_ONLY Subject contains "Now Only" header SUBJ_RIPPED Subject =~ /Ripped & Strong/i describe SUBJ_RIPPED Subject contains "Ripped & Strong" header SUBJ_VIAGRA Subject =~ /viagra/i describe SUBJ_VIAGRA Subject includes "viagra" header SUBJ_YOUR_DEBT Subject =~ /Your (?:Bills|Debt|Credit)/i describe SUBJ_YOUR_DEBT Subject contains "Your Bills" or similar header SUBJ_YOUR_FAMILY Subject =~ /Your Family/i describe SUBJ_YOUR_FAMILY Subject contains "Your Family" header SUBJ_YOUR_OWN Subject =~ /Your Own/i describe SUBJ_YOUR_OWN Subject contains "Your Own" header VAR_REF_IN_RECEIVED Received =~ /from \$\S+ \(/ describe VAR_REF_IN_RECEIVED Received contains a $variable reference # the real services never HELO as 'foo.com', instead 'mail.foo.com' or # something like that. Note: be careful when expanding this... legit dotcom # HELOers include: hotmail.com, drizzle.com, lockergnome.com. header RCVD_FAKE_HELO_DOTCOM Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m describe RCVD_FAKE_HELO_DOTCOM Received contains a faked HELO hostname header USERNAME_IN_SUBJECT eval:check_for_to_in_subject() describe USERNAME_IN_SUBJECT To: username at front of subject header LOSE_POUNDS Subject =~ /\bLose .*(?:pounds|lbs|weight)/i describe LOSE_POUNDS Subject talks about losing pounds header EXTRA_MPART_TYPE Content-Type =~ /(?:\s*multipart\/)?.* type=/i describe EXTRA_MPART_TYPE Header has extraneous Content-type:...type= entry header TO_RECIP_MARKER To =~ /\#recipient\#/ describe TO_RECIP_MARKER To header contains 'recipient' marker header SAVINGS Subject =~ /\bsave\s+(?:on\s+your|up\s+to|big|over|at\s+least|\d+\%|you)(?:\s|\b|$)/i describe SAVINGS Subject talks about savings # MIME boundary tests; spam tools use distinctive patterns. header MIME_BOUND_DASH_DIGIT Content-Type =~ /boundary="_-{10}=_\d{19,22}"/ describe MIME_BOUND_DASH_DIGIT Spam tool pattern in MIME boundary header MIME_BOUND_HASHES Content-type =~ /boundary=\"\#{10}\"/ describe MIME_BOUND_HASHES Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_4 Content-Type =~ /boundary=\"_----------=_\d{18}\"/ describe MIME_BOUND_DIGITS_4 Spam tool pattern in MIME boundary header MIME_BOUND_DIGITS_7 Content-Type =~ /boundary=\d{9}\.\d{13}/ describe MIME_BOUND_DIGITS_7 Spam tool pattern in MIME boundary header MIME_BOUND_HEX_24 Content-Type =~ /boundary=\"[\dA-F]{24}\"/ describe MIME_BOUND_HEX_24 Spam tool pattern in MIME boundary header MIME_BOUND_MA Content-Type =~ /boundary=\"----=_[a-zA-Z0-9]{8}_[a-zA-Z0-9]{8}_MA\"/ describe MIME_BOUND_MA Spam tool pattern in MIME boundary header MIME_BOUND_MANY_HEX Content-Type =~ /boundary="[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}"/ describe MIME_BOUND_MANY_HEX Spam tool pattern in MIME boundary header __NEXTPART_ALL Content-Type =~ /NextPart/ header __NEXTPART_NORMAL Content-Type =~ /="(?:----_?=_)?NextPart_[\dA-F]{3}(_[\dA-F]{3,8})?_[\dA-F]{8}\.[\dA-F]{8}"/ meta MIME_BOUND_NEXTPART (__NEXTPART_ALL && !__NEXTPART_NORMAL) describe MIME_BOUND_NEXTPART Spam tool pattern in MIME boundary header MIME_BOUND_OPTIN Content-Type =~ /boundary=\"[A-F\d]{8}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{4}-[A-F\d]{12}OPTIN\"/ describe MIME_BOUND_OPTIN Spam tool pattern in MIME boundary header MIME_BOUND_MAIL_BOUND Content-Type =~ /boundary=\"____MAIL_BOUNDARY____\"/ describe MIME_BOUND_MAIL_BOUND Spam tool pattern in MIME boundary header MIME_BOUND_TEP Content-Type =~ /boundary="TEP-\d{9,10}\.\d{10}\.\d{10}"/ describe MIME_BOUND_TEP Spam tool pattern in MIME boundary header MIME_BOUND_RKFINDY Content-Type =~ /boundary=\"=_NextPart_2rfkindysadvnqw3nerasdf\"/ describe MIME_BOUND_RKFINDY Spam tool pattern in MIME boundary (rfkindy) header DATE_MISSING Date =~ /^UNSET$/ [if-unset: UNSET] describe DATE_MISSING Missing Date: header # freqs: 0.001 0.003 0.000 1.00 2.66 POST_IN_RCVD header POST_IN_RCVD Received =~ / Post\.(?:sk|cz)/ describe POST_IN_RCVD Received contains fake 'Post.cz' hostname header TO_INVESTORS To =~ /\bInvestors\@/ describe TO_INVESTORS To: non-existent 'Investors' address header TO_MALFORMED To !~ /(?:(?:\"[^\"]+\"|\S+)\@\S+\.\S+|^\s*.+:\s*;|^\s*\"[^\"]+\":\s*;|^\s*\([^\)]*\)\s*$|<\S+(?:\!\S+){1,}>|^\s*$)/ [if-unset: unset@unset.unset] describe TO_MALFORMED To: has a malformed address # jm: somehow these guys keep slipping through, and they're very persistent. # they use both azoogle.com and azogle.com, but this header is always # in this format. header AZOOGLE X-Info =~ /service to abuse\@azoogle\.com$/ describe AZOOGLE From azoogle.com, azogle.com, etc. header SUBJECT_APPROVED Subject =~ /approv(?:ed|al).?[.!*]/i describe SUBJECT_APPROVED Subject talks about being approved header SUBJ_HAS_TIME_ID Subject =~ /\sTime[: ]+\d+:\d+:\d+ [AP]M\s*$/i describe SUBJ_HAS_TIME_ID Subject has a Time ID header __OPT_HEADER_SUBJ ALL =~ /^(?:Resent-)?Subject:.*opt.?(in|out|oem|ed|ion-in|[\d@])(?:\b|\d|\@)/im header __OPT_HEADER_ALL ALL =~ /opt.?(?:in|out|oem|ed|ion-in|[\d@])(?:\b|\d|\@)/i meta OPT_HEADER (__OPT_HEADER_ALL && !__OPT_HEADER_SUBJ) describe OPT_HEADER Headers include an "opt"ed phrase # Most/all of these require that From addresses do not start with numbers. header FROM_NUM_AT_WEBMAIL From:addr =~ /^\d\S+\@(?:msn\.com|flashmail\.com|mailexcite\.com|prodigy\.net|yahoo\.\S+|hotmail\.com|eudoramail\.com|aol\.com|excite\.com|email\.com|earthlink\.net|geocities\.com|hknetmail\.com|angelfire\.com)/i describe FROM_NUM_AT_WEBMAIL From address is webmail, but starts with a number header FROM_WEBMAIL_END_NUMS6 From:addr =~ /\d\d\d\d\d\d\@(?:aol|msn|bigfoot|compuserve|excite|hotmail|juno|prodigy|yahoo)\.(?:com|net|org)/i describe FROM_WEBMAIL_END_NUMS6 From webmail service and address ends in numbers header ADDR_FREE From =~ /\b(?-i:F)ree(?-i:[ A-Z]).*h # usually also has INVALID_DATE and NO_REAL_NAME # this rule excludes about 0.02% of matches since some low random # numbers in the second or third part will be missed. header MSGID_THREESIXSIX Message-Id =~ /<\d{1,3}\.\d{3,6}\.\d{3,6}\@\S+>/ describe MSGID_THREESIXSIX Message-Id header indicates message is spam header __USER_AGENT_MSN X-Mailer =~ /^MSN Explorer / # hmm, interesting. Ensure the MUA is using consistent priorities. # http://www3.cds.ne.jp/~marimo/oka/research/em/data0002.html header __X_PRI_HI X-Priority =~ /^[12]/ header __X_MSPRI_HI X-Msmail-Priority =~ /^High/ meta X_PRI_MISMATCH_HI (__X_PRI_HI && __HAS_MSMAIL_PRI && !__X_MSPRI_HI) describe X_PRI_MISMATCH_HI 'X-Priority' does not match 'X-MSMail-Priority' header FORGED_RCVD_NET_HELO eval:check_for_forged_received_ip_helo() describe FORGED_RCVD_NET_HELO Host HELO'd using the wrong IP network header NO_RDNS_DOTCOM_HELO eval:check_for_no_rdns_dotcom_helo() describe NO_RDNS_DOTCOM_HELO Host HELO'd as a big ISP, but had no rDNS header X_ORIG_HOST X-Originating-Host =~ /^\[/ describe X_ORIG_HOST Message has X-Originating-Host header # Hotmail's DAV interface uses this and it's heavily exploited right now. As # far as I can tell, it requires an msn.com or hotmail.com X-Originating-Email: # but allows anything for From: so use that as a spamsign. header __HAS_MSN_RCVD_DAV Received =~ / by \S+\.(?:hotmail|msn)\.com with (?:HTTP|DAV)\;/ header __HAS_MSN_ORIG_EMAIL X-Originating-Email =~ /(?:hotmail|msn)\.com\b/ header __HAS_MSN_FROM From =~ /(?:hotmail|msn)\.com\b/ meta FAKED_HOTMAIL_DAV (__HAS_MSN_RCVD_DAV && __HAS_MSN_ORIG_EMAIL && !__HAS_MSN_FROM) describe FAKED_HOTMAIL_DAV X-Originating-Email header does not match From ########################################################################### header X_MESSAGE_INFO exists:X-Message-Info describe X_MESSAGE_INFO Bulk email fingerprint (X-Message-Info) found header MIME_BOUND_DD_DIGITS Content-Type =~ /boundary=\"--\d+\"/ describe MIME_BOUND_DD_DIGITS Spam tool pattern in MIME boundary header __DOUBLE_IP_SPAM_1 Received =~ /from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/ header __DOUBLE_IP_SPAM_2 Received =~ /from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/ meta RCVD_DOUBLE_IP_SPAM (__DOUBLE_IP_SPAM_1 || __DOUBLE_IP_SPAM_2) describe RCVD_DOUBLE_IP_SPAM Bulk email fingerprint (double IP) found header MSGID_SPAM_CAPS Message-ID =~ /^\s*