# experiments based on masscheck results meta MALFORMED_FREEMAIL (MISSING_HEADERS||__HDRS_LCASE) && FREEMAIL_FROM describe MALFORMED_FREEMAIL Bad headers on message from free email service #score MALFORMED_FREEMAIL 0.1 header FROM_WEBSITE From:raw =~ m'\b(?:f|ht)tps?://[^\s"]+\.(?!com|COM)[^.>]+>/ describe FROM_NOT_DOTCOM Sender NAME has .com but sender ADDRESS does not header KHOP_BIG_TO_CC ToCc =~ /(?:[^,\@]{1,60}\@[^,]{4,25},){10}/ describe KHOP_BIG_TO_CC Sent to 10+ recipients instaed of Bcc or a list # see also 70_sare_spoof.cf's __EBAY_ADDRESS: # header __EBAY_ADDRESS From:addr =~ /[\@\.]ebay\.(?:com(?:\.au|\.cn|\.hk|\.my|\.sg)?|co\.uk|at|be|ca|fr|de|in|ie|it|nl|ph|pl|es|se|ch)/i header __EBAY_ADDRESS From:addr =~ /[\@.]ebay\..{3,5}$/i meta KHOP_FAKE_EBAY __EBAY_ADDRESS && !__NOT_SPOOFED describe KHOP_FAKE_EBAY Sender falsely claims to be from eBay #score KHOP_FAKE_EBAY 2.25 # 20090408 # masscheck doesn't cover ifplugin lines #ifplugin Mail::SpamAssassin::Plugin::URIDetail # uri_detail doesn't support m{foo}i notation #uri_detail KHOP_FOREIGN_CLICK text =~ /\b(?:cli(?:quez\W|ck\Wa)ici\b|cli(?:cca\W|c\Wa|que\Wa)qu[^<.,a ]|klie?k(?:\Whi?er|ni(?:j|nite)\Wtu[tk]aj)\b)/i #else rawbody KHOP_FOREIGN_CLICK m{\bhref=[^>]{9,199}>[^<]{0,80}(?:<(?!/a\b)[^>]{0,299}>[^<]{0,80}){0,9}[^<]{0,80}\b(?:cli(?:quez\W|ck\Wa)ici\b|cli(?:cca\W|c\Wa|que\Wa)qu[^<.,a ]|klie?k(?:\Whi?er|ni(?:j|nite)\Wtu[tk]aj)\b)}si #endif # includes fr, es, it, pt, nl, da, ca, sl, af, and probably others describe KHOP_FOREIGN_CLICK Click here link in non-English Latin text #score KHOP_FOREIGN_CLICK 0.1 # 20090526 see also SARE_UN7 tflags KHOP_FOREIGN_CLICK nopublish # re-do ifplugin to publish uri __SHORT_URL /^http:\/\/[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ # list from http://techcrunch.com/2010/01/06/bit-ly-market-share/ containing # anything that ranked 0.1% or more of twitter's traffic that day, plus anything # bold below that threshold. uri URL_SHORTENER /^http:\/\/(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/[^\/]{3}\/?/ describe URL_SHORTENER Has a shortened URL (can hide a blacklisted link) tflags URL_SHORTENER nopublish #uri SHORT_URL /^http:\/\/(!?(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/)[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ meta SHORT_URL __SHORT_URL && !URL_SHORTENER && !ALL_TRUSTED describe SHORT_URL Has a short URL without a shortening service # I don't think this ever fires uri URI_HIDDEN m'.{7}\/\.\.?/?\w' describe URI_HIDDEN Contains a hidden directory #score URI_HIDDEN 0.7 # 20090515 13:29 by Adam Katz (me) on sa-users list # no subdomain; sent by example.com rather than server.example.com header __RDNS_NO_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=[^. ]*\.\w+ / # Relays with 5+ subdomains in their rDNS. header __5_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=(?:[^. ]*\.){6,}\w+ / # 5.7617/0.0344 spam/ham, 0.994 s/o @ 20091214 # 6.8885/0.0129 spam/ham, 0.998 s/o @ 20100417 # 6.8984/0.0251 spam/ham, 0.996 s/o @ 20100420 # 20100420 results for 4_subdom=6.8577/2.0856, 3=15.3590/8.4512 # which means 4+ was 13.7561/2.1106@0.866 and 3+ was 29.1151/10.5619@0.734 # IP address in relay's rDNS or HELO header __IP_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=(\d+)\.(\d+)\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\D\2\D\3\D\4|\4\D\3\D\2\D\1)/ # Rough nonplugin regexs from John Rudd's Botnet plugin (v0.8, 2007-08-05). # http://people.ucsc.edu/~jrudd/spamassassin/Botnet-0.8.tar # I've purposefully removed anything alrady in RDNS_DYNAMIC # NOTE: this is GPLv2, which is incompatible with the Apache License. # My *brief* read is that Apache->GPLv2 is taboo but GPLv2->Apache is fine. # I wrote the next four rules from his base words, so these are fine. header __BOTNET_CLIENT1 X-Spam-Relays-External =~ /^[\]]+ rdns=\S+\b(?:ddns|dial-?(?:in|up)|dyn(?:amic)?ip|resident(?:ial)?|bredband)[^a-z]/i header __BOTNET_CLIENT2 X-Spam-Relays-External =~ /^[\]]+ rdns=\S+(?:\b(?:pool|user)[^a-z]|[-.]ip[-.])/i header __BOTNET_SERVER X-Spam-Relays-External =~ /^[\]]+ rdns=\S+\b(?:e?mail(?:out)?|mta|mx(?:pool)?|relay|smtp|exch(?:ange)?)[^a-z]/i meta BOTNET_NOPLUGIN !__BOTNET_SERVER && (__BOTNET_CLIENT1||__BOTNET_CLIENT2) # shawcable.net uses customer hostnames that don't match other botnet patterns describe BOTNET_SHAWCABLE Shawcable.net customer address meta BOTNET_SHAWCABLE (__BOTNET_SHAWCABLE && __BOTNET_NOTRUST) header __BOTNET_SHAWCABLE X-Spam-Relays-Untrusted =~ /^[^\]]+ rdns=s[0-9a-f]*\...\.shawcable\.net\b/i #score BOTNET_SHAWCABLE 5.0 tflags BOTNET_SHAWCABLE nopublish # confirm license with author first # ocn.ne.jp uses customer hostnames that don't match other botnet patterns describe BOTNET_OCNNEJP Ocn.ne.jp customer address meta BOTNET_OCNNEJP (__BOTNET_OCNNEJP && __BOTNET_NOTRUST) header __BOTNET_OCNNEJP X-Spam-Relays-Untrusted =~ /^[^\]]+ rdns=p\d{4}-ip\S*\.ocn\.ne\.jp\b/i #score BOTNET_OCNNEJP 5.0 tflags BOTNET_OCNNEJP nopublish # confirm license with author first # If the message was authenticated or hit a trusted host, then we want to # exempt these 'non-module' rules. describe __BOTNET_NOTRUST Message has no trusted relays header __BOTNET_NOTRUST X-Spam-Relays-Trusted !~ /ip=/i tflags __BOTNET_NOTRUST nopublish # confirm license with author first # license to this one (same author as botnet) is unclear. # http://people.ucsc.edu/~jrudd/spamassassin/jr_rfc1912.cf describe JR_RCVD_TOO_FEW_HOPS Just one hop means direct untrusted client header JR_RCVD_TOO_FEW_HOPS X-Spam-Relays-Untrusted =~ /^\[[^\]]+\]$/ #score JR_RCVD_TOO_FEW_HOPS 1.0 tflags JR_RCVD_TOO_FEW_HOPS nopublish # confirm license with author first header __MSGID_JAVAMAIL Message-ID =~ /\.JavaMail\./ tflags __MSGID_JAVAMAIL nice header __TAB_IN_SUBJ Subject =~ /\t/ # "i owe u 4 something b4 tomorrow" hits this four times: header __SUBJ_INFORMAL Subject =~ /(?:^| )(?:[iuU4]|[Bb]4)(?: |$)/ # these two ignore leading things like "Re:" and "[foo list]" header __SUBJ_4LOWER Subject =~ /^(?:[a-zA-Z]{2,4}: +)?(?:\[[ \w]+\] +)?(?:.*[a-z]){4}/ header __SUBJ_2UPPER Subject =~ /^(?:[a-zA-Z]{2,4}: +)?(?:\[[ \w]+\] +)?(?:.*[A-Z]){2}/ header __SUBJ_SHORT Subject =~ /^.{0,8}$/ header __SUBJ_IMPORTANT Subject =~ /\b(?:important|IMPORTANT)\b/ # attempts to fix SUBJ_ALL_CAPS, which has an S/O of 0.563 yet is published meta SUBJ_ALL_CAPS2 SUBJ_ALL_CAPS && __SUBJ_4LOWER && __SUBJ_2UPPER && !__SUBJ_SHORT meta SUBJ_ALL_CAPS3 SUBJ_ALL_CAPS && __SUBJ_4LOWER && __SUBJ_2UPPER && !(__SUBJ_SHORT||__SUBJ_IMPORTANT) meta AOL_ALL_CAPS __AT_AOL_MSGID && UPPERCASE_75_100 describe AOL_ALL_CAPS AOL users sometimes write mail in all uppercase tflags AOL_ALL_CAPS nice # testing warren's theory from "Uppercase E-mail in Latin America" 2009-10-06 # http://old.nabble.com/Uppercase-E-mail-in-Latin-America-td25748291.html # note, masscheck probably doesn't have enough LACNIC ham for this test meta LACNIC_ALL_CAPS __RCVD_VIA_LACNIC && UPPERCASE_75_100 describe LACNIC_ALL_CAPS Latino users sometimes write mail in all uppercase tflags LACNIC_ALL_CAPS nice nopublish header __HOTMAIL_HELO Received =~ /from ([A-Z]{3})\d[^.]+ [^\n]+ by \1\d+-[^\n ]+\.\1\d+\.hotmail\.com with Microsoft/i tflags __HOTMAIL_HELO nice # 1 & 2 are in 20_head_tests.cf ... this one doesn't use eval rules meta FORGED_HOTMAIL_RCVD3 __HOST_HOTMAIL && (!__HOTMAIL_HELO || __DOS_SINGLE_EXT_RELAY) header RCVD_VIA_IPV6 X-Spam-Relays-Untrusted =~ /^[^\]]+ (?:by|ip)=[^\] ]+:[^\] .]+ / describe RCVD_VIA_IPV6 Received by the last trusted relay via IPv6 tflags RCVD_VIA_IPV6 nice # A blend of sidney's UPPERCASE_HTTP and jhardin's URI_UC for bug 6408 # This one avoids Http: which I think is the biggest problem. uri UPPERCASE_URI /^[^:A-Z]+[A-Z]/ describe UPPERCASE_URI Link protocol has unexpected mixed case