# experiments based on masscheck results meta MALFORMED_FREEMAIL (MISSING_HEADERS||__HDRS_LCASE) && FREEMAIL_FROM describe MALFORMED_FREEMAIL Bad headers on message from free email service #score MALFORMED_FREEMAIL 0.1 header FROM_WEBSITE From =~ m'\b(?:f|ht)tps?://[^\s"]{9,199}>[^<]{0,80}(?:<(?!/a\b)[^>]{0,299}>[^<]{0,80}){0,9}[^<]{0,80}\b(?:cli(?:quez\W|ck\Wa)ici\b|cli(?:cca\W|c\Wa|que\Wa)qu[^<.,a ]|klie?k(?:\Whi?er|ni(?:j|nite)\Wtu[tk]aj)\b)}si #endif # includes fr, es, it, pt, nl, da, ca, sl, af, and probably others describe KHOP_FOREIGN_CLICK Click here link in non-English Latin text #score KHOP_FOREIGN_CLICK 0.1 # 20090526 see also SARE_UN7 tflags KHOP_FOREIGN_CLICK nopublish # re-do ifplugin to publish uri __SHORT_URL /^http:\/\/[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ # list from http://techcrunch.com/2010/01/06/bit-ly-market-share/ containing # anything that ranked 0.1% or more of twitter's traffic that day, plus anything # bold below that threshold. uri URL_SHORTENER /^http:\/\/(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/[^\/]{3}\/?/ describe URL_SHORTENER Has a shortened URL (can hide a blacklisted link) #uri SHORT_URL /^http:\/\/(!?(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/)[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ meta SHORT_URL __SHORT_URL && !URL_SHORTENER && !ALL_TRUSTED describe SHORT_URL Has a short URL without a shortening service # I don't think this ever fires uri URI_HIDDEN m'.{7}\/\.\.?/?\w' describe URI_HIDDEN Contains a hidden directory #score URI_HIDDEN 0.7 # 20090515 13:29 by Adam Katz (me) on sa-users list # no subdomain; sent by example.com rather than server.example.com header __RDNS_NO_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=[^. ]*\.\w+ / # Relays with 5+ subdomains. # My data (post-greylisting) is 1.7869/0.0682 spam/ham, s/o = .897 # Those should be significantly better sans-greylisting (they can't get worse). # @ 20091214, 5.7617/0.0344 spam/ham, 0.994 s/o. header __5_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=(?:[^. ]*\.){6,}\w+ / # IP address in relay's rDNS or HELO header __IP_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=(\d+)\.(\d+)\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\D\2\D\3\D\4|\4\D\3\D\2\D\1)/ header __IP_PART_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=\d+\.\d+\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\W\2\W|\2\W\1)\b/ header __MSGID_DOTZERO Message-ID =~ /\.0\.0\./ header __MSGID_JAVAMAIL Message-ID =~ /\.JavaMail\./ tflags __MSGID_JAVAMAIL nice meta AOL_ALL_CAPS __AT_AOL_MSGID && UPPERCASE_75_100 describe AOL_ALL_CAPS AOL users sometimes write mail in all uppercase tflags AOL_ALL_CAPS nice # testing warren's theory from "Uppercase E-mail in Latin America" 2009-10-06 # http://old.nabble.com/Uppercase-E-mail-in-Latin-America-td25748291.html # note, masscheck probably doesn't have enough LACNIC ham for this test meta LACNIC_ALL_CAPS __RCVD_VIA_LACNIC && UPPERCASE_75_100 describe LACNIC_ALL_CAPS Latino users sometimes write mail in all uppercase tflags LACNIC_ALL_CAPS nice nopublish header __LONG_NOBR_ADDR From:addr =~ /[a-zA-Z0-9]{20,}\@/i meta LONG_FREEMAIL_ADDR __LONG_NOBR_ADDR && FREEMAIL_FROM && !__freemail_safe describe LONG_FREEMAIL_ADDR Freemail address has 20+ unbroken characters header __HOTMAIL_HELO Received =~ /from ([A-Z]{3})\d[^.]+ [^\n]+ by \1\d+-[^\n ]+\.\1\d+\.hotmail\.com with Microsoft/i tflags __HOTMAIL_HELO nice # 1 & 2 are in 20_head_tests.cf ... this one doesn't use eval rules meta FORGED_HOTMAIL_RCVD3 __HOST_HOTMAIL && (!__HOTMAIL_HELO || __DOS_SINGLE_EXT_RELAY) header RCVD_VIA_IPV6 X-Spam-Relays-Untrusted =~ /^[^\]]+ (?:by|ip)=[^\] ]+:[^\] .]+ / describe RCVD_VIA_IPV6 Received by the last trusted relay via IPv6 tflags RCVD_VIA_IPV6 nice