# experiments based on masscheck results meta MALFORMED_FREEMAIL (MISSING_HEADERS||__HDRS_LCASE) && FREEMAIL_FROM describe MALFORMED_FREEMAIL Bad headers on message from free email service #score MALFORMED_FREEMAIL 0.1 header FROM_WEBSITE From:raw =~ m'\b(?:f|ht)tps?://[^\s"]{9,199}>[^<]{0,80}(?:<(?!/a\b)[^>]{0,299}>[^<]{0,80}){0,9}[^<]{0,80}\b(?:cli(?:quez\W|ck\Wa)ici\b|cli(?:cca\W|c\Wa|que\Wa)qu[^<.,a ]|klie?k(?:\Whi?er|ni(?:j|nite)\Wtu[tk]aj)\b)}si #else # uri_detail doesn't support m{foo}i notation # uri_detail KHOP_FOREIGN_CLICK text =~ /\b(?:cli(?:quez\W|ck\Wa)ici\b|cli(?:cca\W|c\Wa|que\Wa)qu[^<.,a ]|klie?k(?:\Whi?er|ni(?:j|nite)\Wtu[tk]aj)\b)/i #endif # includes fr, es, it, pt, nl, da, ca, sl, af, and probably others #describe KHOP_FOREIGN_CLICK Click here link in non-English Latin text #score KHOP_FOREIGN_CLICK 0.01 # 20090526 see also SARE_UN7 #tflags KHOP_FOREIGN_CLICK nopublish # re-do ifplugin to publish # 20100319. 0.8 -> 0.3, re-enabled in masscheck w/out ifplugin, output pending # 0.0097/0.0100 spam/ham, 0.493 s/o @ 20100330 0.3 -> 0.1 # 0.0176/0.0116 spam/ham, 0.603 s/o @ 20100417 net. 0.1 -> 0.01 # 0.0175/0.0120 spam/ham, 0.594 s/o @ 20100425 net. fired from khop-general. # uri_detail lacks support for carrying matches across consecutive regexps #uri_detail SPOOFED_URL raw =~ /^https?:..(.{6,50})/ text =~ /\bhttps?:..(?!$1).{5}/ # reduced to 30 chars (35 w/ http:) for URL wrapping, e.g. LeadLander wraps @35 rawbody __SPOOFED_URL m/]{0,2048}\bhref=(?:3D)?.?(https?:[^>"'\# ]{8,29}[^>"'\# :\/?&=])[^>]{0,2048}>(?:[^<]{0,1024}<(?!\/a)[^>]{1,1024}>){0,99}\s{0,10}(?!\1)https?[^\w<]{1,3}[^<]{5}/i # even with scrubbing, probably can't handle 'legit' tracking redirectors meta SPOOFED_URL __SPOOFED_URL && !(__VIA_ML || __SENDER_BOT || __YAHOO_BULK || __UNSUB_LINK || __THREADED || __URL_SHORTENER) describe SPOOFED_URL Has a link whose text is a different URL uri __SHORT_URL /^http:\/\/[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ # list from http://techcrunch.com/2010/01/06/bit-ly-market-share/ containing # anything that ranked 0.1% or more of twitter's traffic that day, plus anything # bold below that threshold. # 20130113 JHardin: split into __subrule + scored rule to fix dependencies broken by non-promotion uri __URL_SHORTENER /^http:\/\/(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/[^\/]{3}\/?/ meta URL_SHORTENER __URL_SHORTENER describe URL_SHORTENER Has a shortened URL (can hide a blacklisted link) tflags URL_SHORTENER nopublish #uri SHORT_URL /^http:\/\/(!?(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/)[^\/]{3,6}\.\w\w\/[^\/]{3,8}\/?$/ meta SHORT_URL __SHORT_URL && !__URL_SHORTENER && !ALL_TRUSTED describe SHORT_URL Has a short URL without a shortening service rawbody SHORTENED_URL_SRC /<[^>]{1,99}\ssrc=\W?http:\/\/(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/[^\/]{3}/ rawbody SHORTENED_URL_HREF /<[^>]{1,99}\shref=\W?http:\/\/(?:bit\.ly|tinyurl\.com|ow\.ly|is\.gd|tumblr\.com|formspring\.me|ff\.im|youtu\.be|tl\.gd|plurk\.com|migre\.me|j\.mp|cli\.gs|goo\.gl|yfrog\.com|lnk\.ms|su\.pr|fb\.me|alturl\.com|wp\.me|ping\.fm|chatter\.com|post\.ly|twurl\.nl|tiny\.cc|4sq\.com|ustre\.am|short\.to|u\.nu|flic\.kr|budurl\.com|digg\.com|twitvid\.com|gowal\.la|om\.ly|justin\.tv|icio\.us|p\.gs|loopt\.us|tcrn\.ch|xrl\.us|wpo\.st|bkite\.com)\/[^\/]{3}/ score SHORTENED_URL_HREF 1.0 # I don't think this ever fires uri URI_HIDDEN m'.{7}\/\.\.?/?\w' describe URI_HIDDEN Contains a hidden directory #score URI_HIDDEN 0.7 # 20090515 13:29 by Adam Katz (me) on sa-users list # no subdomain; sent by example.com rather than server.example.com header __RDNS_NO_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=[^. ]*\.\w+ / header __RDNS_IS_WWW X-Spam-Relays-External =~ /^[^\]]+ rdns=www\./ header __RELAY_THRU_WWW Received =~ /from (?:[^ \@]+\@)?www\./ header __FROM_WEB_DAEMON From:addr =~ /(?:apache|www|web|tomcat|\biis\b).*\@/i meta KHOP_FROM_WWW (__RDNS_IS_WWW || __RELAY_THRU_WWW) && ! (__FROM_WEB_DAEMON || __MSGID_JAVAMAIL) # a REALLY simple test... header __RDNS_LONG X-Spam-Relays-External =~ /^[^\]]+ rdns=\S{30}/ header __RDNS_SHORT X-Spam-Relays-External =~ /^[^\]]+ rdns=\S{4,14} / # Rough nonplugin regexs from John Rudd's Botnet plugin (v0.8, 2007-08-05). # http://people.ucsc.edu/~jrudd/spamassassin/Botnet-0.8.tar # I've purposefully removed anything alrady in RDNS_DYNAMIC # NOTE: this is GPLv2, which is incompatible with the Apache License. # My *brief* read is that Apache->GPLv2 is taboo but GPLv2->Apache is fine. # I wrote the next four rules from his base words, so these are fine. header __BOTNET_CLIENT1 X-Spam-Relays-External =~ /^[\]]+ rdns=\S*\b(?:ddns|dial-?(?:in|up)|dyn(?:amic)?ip|resident(?:ial)?|bredband)[^a-z]/i header __BOTNET_CLIENT2 X-Spam-Relays-External =~ /^[\]]+ rdns=\S*(?:\b(?:pool|user)[^a-z]|[-.]ip[-.])/i header __BOTNET_SERVER X-Spam-Relays-External =~ /^[\]]+ rdns=\S*\b(?:e?mail(?:out)?|mta|mx(?:pool)?|relay|smtp|exch(?:ange)?)[^a-z]/i meta BOTNET_NOPLUGIN !__BOTNET_SERVER && (__BOTNET_CLIENT1||__BOTNET_CLIENT2) # shawcable.net uses customer hostnames that don't match other botnet patterns describe BOTNET_SHAWCABLE Shawcable.net customer address meta BOTNET_SHAWCABLE (__BOTNET_SHAWCABLE && __BOTNET_NOTRUST) header __BOTNET_SHAWCABLE X-Spam-Relays-Untrusted =~ /^[^\]]+ rdns=s[0-9a-f]*\...\.shawcable\.net\b/i #score BOTNET_SHAWCABLE 5.0 tflags BOTNET_SHAWCABLE nopublish # confirm license with author first # ocn.ne.jp uses customer hostnames that don't match other botnet patterns describe BOTNET_OCNNEJP Ocn.ne.jp customer address meta BOTNET_OCNNEJP (__BOTNET_OCNNEJP && __BOTNET_NOTRUST) header __BOTNET_OCNNEJP X-Spam-Relays-Untrusted =~ /^[^\]]+ rdns=p\d{4}-ip\S*\.ocn\.ne\.jp\b/i #score BOTNET_OCNNEJP 5.0 tflags BOTNET_OCNNEJP nopublish # confirm license with author first # If the message was authenticated or hit a trusted host, then we want to # exempt these 'non-module' rules. describe __BOTNET_NOTRUST Message has no trusted relays header __BOTNET_NOTRUST X-Spam-Relays-Trusted !~ /ip=/i tflags __BOTNET_NOTRUST nopublish # confirm license with author first # Note: unfair regarding RFC 2821, see http://en.wikipedia.org/wiki/FCrDNS#Uses header __HELO_NOT_RDNS X-Spam-Relays-External =~ /^[^\]]+ rdns=(\S+) helo=(?!\1)\S/ meta KHOP_HELO_FCRDNS __HELO_NOT_RDNS && !(__VIA_ML || __freemail_safe || __RCVD_IN_DNSWL || __NOT_SPOOFED || __RDNS_SHORT) describe KHOP_HELO_FCRDNS Relay HELO differs from its IP's reverse DNS score KHOP_HELO_FCRDNS 0.4 # 20090603 # 33.9858/7.3415 spam/ham, 0.822 s/o @ 20100203 # 40.5025/2.1738 spam/ham, 0.949 s/o @ 20100417 net # 44.5263/5.6620 spam/ham, 0.887 s/o @ 20100514. Added rdns_short 5/17. # 38.8872/2.9614 spam/ham, 0.929 s/o @ 20110812 @302k spam # inspired by SPF HELO portion of http://www.chaosreigns.com/mtx/#comparisons meta FORGED_SPF_HELO __HELO_NOT_RDNS && SPF_HELO_PASS && !SPF_PASS # nice theory, expecting poor practice, maybe 0.0453/0.1227 s/h @ 0.270 s/o # This *really* needs properly defined trusted networks. header __HELO_AS_VICTIM X-Spam-Relays-External =~ /^[^\]]+ helo=(\S+) [^\]]*\bby=\1 / meta KHOP_HELO_AS_VICTIM __HELO_AS_VICTIM && !__BOTNET_NOTRUST tflags KHOP_HELO_AS_VICTIM nopublish # confirm license with author first for __BOTNET_NOTRUST meta HELO_NO_DOMAIN __HELO_NO_DOMAIN && !HELO_LOCALHOST describe HELO_NO_DOMAIN Relay reports its domain incorrectly #score HELO_NO_DOMAIN 2.375 0.327 1.497 0.884 # scores derived from 90% of RDNS_DYNAMIC's sa3.3 proposal (attachment 4565) # because they have such similar definitions, numbers, and merits header __NAME_IS_EMAIL From:raw =~ /\w\@[\w.-]+\.\w\w+["'`]*\s*<\w+\@\w/ header __NAME_EQ_EMAIL From:raw =~ /([\w+.-]+\@[\w.-]+\.\w\w+)["'`\s]*<\s*\1>/i meta NAME_EMAIL_DIFF __NAME_IS_EMAIL && ! __NAME_EQ_EMAIL describe NAME_EMAIL_DIFF Sender NAME is an unrelated email address #score NAME_EMAIL_DIFF 0.375 # tot=0.5, low for noreply@dom 20090811 header ADV_SUBJ Subject =~ /\[ ?(?:ADV|A D V) ?\]/i describe ADV_SUBJ Marked by sender as an advertisement #score ADV_SUBJ 1.5 # 20090304 header __MSGID_JAVAMAIL Message-ID =~ /\.JavaMail\./ tflags __MSGID_JAVAMAIL nice header __TAB_IN_SUBJ Subject =~ /\t/ # "i owe u 4 something b4 tomorrow" hits this four times: header __SUBJ_INFORMAL Subject =~ /(?:^| )(?:[iuU4]|[Bb]4)(?: |$)/ header __SUBJ_ALL_LOWER Subject =~ /^[a-z -]$/ # these two ignore leading things like "Re:" and "[foo list]" header __SUBJ_4LOWER Subject =~ /^(?:[a-zA-Z]{2,4}: +)?(?:\[[ \w]+\] +)?(?:.*[a-z]){4}/ header __SUBJ_2UPPER Subject =~ /^(?:[a-zA-Z]{2,4}: +)?(?:\[[ \w]+\] +)?(?:.*[A-Z]){2}/ header __SUBJ_SHORT Subject =~ /^.{0,8}$/ header __SUBJ_IMPORTANT Subject =~ /\b(?:[Ii]mportant|IMPORTANT)\b/ # attempts to fix SUBJ_ALL_CAPS, which has an S/O of 0.563 yet is published meta SUBJ_ALL_CAPS2 SUBJ_ALL_CAPS && __SUBJ_4LOWER && __SUBJ_2UPPER && !__SUBJ_SHORT meta SUBJ_ALL_CAPS3 SUBJ_ALL_CAPS && __SUBJ_4LOWER && __SUBJ_2UPPER && !(__SUBJ_SHORT||__SUBJ_IMPORTANT) header __HOTMAIL_HELO Received =~ /from ([A-Z]{3})\d[^.]+ [^\n]+ by \1\d+-[^\n ]+\.\1\d+\.hotmail\.com with Microsoft/i tflags __HOTMAIL_HELO nice # 1 & 2 are in 20_head_tests.cf ... this one doesn't use eval rules meta FORGED_HOTMAIL_RCVD3 __HOST_HOTMAIL && (!__HOTMAIL_HELO || __DOS_SINGLE_EXT_RELAY) # A blend of sidney's UPPERCASE_HTTP and jhardin's URI_UC for bug 6408 # This one avoids Http: which I think is the biggest problem. uri UPPERCASE_URI /^[^:A-Z]+[A-Z]/ describe UPPERCASE_URI Link protocol has unexpected mixed case # doesn't match enough, has a higher S/O than __GREYLISTING, therefore useless. # header __GREYLISTED ALL =~ /(?:^|\n)X-(?:Scam-Grey|Greylist(?:ing)?):\s+delay(?:ed)? (?:for )?\d+(?: ?s(?:ec(?:ond)?s?)?|:\d\d)/im # meta KHOP_GREYED __GREYLISTED && (RDNS_NONE||RDNS_DYNAMIC||__HELO_NO_DOMAIN) # describe KHOP_GREYED Greylisted and sent from dynamically-named relay # score KHOP_GREYED 0.1 # This is useless while __DKIM_EXISTS continues to perform so well. # This is useless while ruleqa continues to lack DKIM support. ifplugin Mail::SpamAssassin::Plugin::DKIM meta DKIM_INVALID __DKIM_EXISTS && !DKIM_VALID describe DKIM_INVALID DKIM-Signature header exists but is not valid # masscheck ignores ifplugins and thus always has DKIM_INVALID==__DKIM_EXISTS endif ifplugin Mail::SpamAssassin::Plugin::MIMEHeader # { # probably all over the place, indicating only a MS document is attached, # which might hit only ham and virii full __BASE64_MDAW /^(?:MDAw){12}/ meta __REMOTE_IMAGE (__HTML_IMG_ONLY || __HTML_LINK_IMAGE) && !(__SUBSCRIPTION_INFO || __VIA_ML || __SENDER_BOT || __ANY_IMAGE_ATTACH) meta REMOTE_IMAGE __REMOTE_IMAGE describe REMOTE_IMAGE Message contains an external image endif # } # rawbody __BUGGED_IMG m{]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})}i # inspired by retired SA 2.x HTML_WEB_BUGS rule's eval:html_test('web_bugs') # rawbody __WEB_BUGS m{<[^>]{4,100}\ssrc=[^>]{8,200}\.(?:pl|cgi|php\d?|asp|jsp|cfm|py|\?[^>]{4})\b}i # Idea from Michael Scheidell of SECNAP on SpamAssassin Users List 2010-05-08 # http://old.nabble.com/yahoo-X-YMail-OSG-tp28496110p28496110.html #header LONG_HEADER_LINE_80 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{80,159}$))/m #header LONG_HEADER_LINE_160 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{160,239}$))/m #header LONG_HEADER_LINE_240 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{240,319}$))/m #header LONG_HEADER_LINE_320 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{320,399}$))/m #header LONG_HEADER_LINE_400 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{400,499}$))/m #header LONG_HEADER_LINE_500 ALL:raw =~ /^(?-xis:(?=(?!X-Spam|X-MailScan|Received).{500}))/m #describe LONG_HEADER_LINE_80 A header line contains 80-159 characters #describe LONG_HEADER_LINE_160 A header line contains 160-239 characters #describe LONG_HEADER_LINE_240 A header line contains 240-319 characters #describe LONG_HEADER_LINE_320 A header line contains 320-399 characters #describe LONG_HEADER_LINE_400 A header line contains 400-499 characters #describe LONG_HEADER_LINE_500 A header line contains 500+ characters #header __SINGLE_HEADER_1K ALL:raw =~ /(?-xim:(?=(?!X-Spam|X-MailScan|D\w{3,8}-Signature)(?:^|\n)[^\s\n]+:(?:.(?!\n\S)){1024,2047}.(?:\n\S|$)))/s #meta SINGLE_HEADER_1K __SINGLE_HEADER_1K && !__VIA_ML && !__THREADED #header SINGLE_HEADER_2K ALL:raw =~ /(?-xim:(?=(?:^|\n)[^\s\n]+:(?:.(?!\n\S)){2048,3071}.(?:\n\S|$)))/s #header SINGLE_HEADER_3K ALL:raw =~ /(?-xim:(?=(?:^|\n)[^\s\n]+:(?:.(?!\n\S)){3072,4095}.(?:\n\S|$)))/s #header SINGLE_HEADER_4K ALL:raw =~ /(?-xim:(?=(?:^|\n)[^\s\n]+:(?:.(?!\n\S)){4096,5119}.(?:\n\S|$)))/s #header SINGLE_HEADER_5K ALL:raw =~ /(?-xim:(?=(?:^|\n)[^\s\n]+:(?:.(?!\n\S)){5120}))/s #describe SINGLE_HEADER_1K A single header contains 1K-2K characters #describe SINGLE_HEADER_2K A single header contains 2K-3K characters #describe SINGLE_HEADER_3K A single header contains 3K-4K characters #describe SINGLE_HEADER_4K A single header contains 4K-5K characters #describe SINGLE_HEADER_5K A single header contains 5K+ characters #header BIG_HEADERS_2K ALL:raw =~ /^(?=.{2048,3071}$)/s #header BIG_HEADERS_3K ALL:raw =~ /^(?=.{3072,4095}$)/s #header BIG_HEADERS_4K ALL:raw =~ /^(?=.{4096,5119}$)/s #header BIG_HEADERS_5K ALL:raw =~ /^(?=.{5120})/s #describe BIG_HEADERS_2K Headers contain 2K-3K characters total #describe BIG_HEADERS_3K Headers contain 3K-4K characters total #describe BIG_HEADERS_4K Headers contain 4K-5K characters total #describe BIG_HEADERS_5K Headers contain 5K+ characters total #header MS_XYMOSG_1K X-YMail-OSG =~ /^(?=.{1024,2047}$)/s #header MS_XYMOSG_2K X-YMail-OSG =~ /^(?=.{2048,3071}$)/s #header MS_XYMOSG_3K X-YMail-OSG =~ /^(?=.{3072,4095}$)/s #header MS_XYMOSG_4K X-YMail-OSG =~ /^(?=.{4096,5119}$)/s #header MS_XYMOSG_5K X-YMail-OSG =~ /^(?=.{4096})/s # from mailing list, subject "Tonns of russian DOT info spam" 2011-02-21 #header __LISTID_DEBIAN List-Id =~ /\.lists\.debian\.org>/ #body __RAJONAA_INFO m' rajonaa: http://www\.[\w-]{0,50}\.info !$' #meta DEB_RAJONAA __LISTID_DEBIAN && __RAJONAA_INFO #describe DEB_RAJONAA Latvian text with .info URI on Debian List #score DEB_RAJONAA 3.0 # Requested on sa-users list # See http://old.nabble.com/username-in-from-address-tp31213779p31213779.html # See also __TO_EQ_FROM_DOM header __TO_EQ_FROM_USR_1 ALL =~ /\nFrom:\s+(?:[^\n<]{0,80}<)?([^\n\s\@>]+)\@[^\n\s]+>?\n(?:[^\n]{1,100}\n)*To:\s+(?:[^\n]{0,80}<)?\1[\@>,\s\n]/ism header __TO_EQ_FROM_USR_2 ALL =~ /\nTo:\s+(?:[^\n<]{0,80}<)?([^\n\s\@>]+)\@[^\n\s]+>?\n(?:[^\n]{1,100}\n)*From:\s+(?:[^\n]{0,80}<)?\1[\@>,\s\n]/ism meta __TO_EQ_FROM_USR (__TO_EQ_FROM_USR_1 || __TO_EQ_FROM_USR_2) && !(__FROM_DNS || __FROM_INFO || __SENDER_BOT) describe __TO_EQ_FROM_USR To: username same as From: username header __TO_EQ_FROM_USR_NN_1 ALL =~ /\nFrom:\s+(?:[^\n<]{0,80}<)?([^\n\s\@>]{4,80}?)\d*\@[^\n\s]+>?\n(?:[^\n]{1,100}\n)*To:\s+(?:[^\n]{0,80}<)?\1\d*[\@>,\s\n]/ism header __TO_EQ_FROM_USR_NN_2 ALL =~ /\nTo:\s+(?:[^\n<]{0,80}<)?([^\n\s\@>]{4,80}?)\d*\@[^\n\s]+>?\n(?:[^\n]{1,100}\n)*From:\s+(?:[^\n]{0,80}<)?\1\d*[\@>,\s\n]/ism meta __TO_EQ_FROM_USR_NN (__TO_EQ_FROM_USR_NN_1 || __TO_EQ_FROM_USR_NN_2) && !(__FROM_DNS || __FROM_INFO || __SENDER_BOT) describe __TO_EQ_FROM_USR_NN To: username same as From: username sans trailing nums # JHardin: # __TO_EQ_FROM_USR_NN recent S/O is 0.992 on a large corpus # with most hits at <= 5 points # let's see if we can get those low-scored spams some more points # FP observation: __TO_EQ_FROM_USR overlaps ~99% of _USER_NN ham hits and no spam; # this suggests the primary spam indicator is having *different* suffixes # Many of the rest are fairly reliable ham indicators # suggested scored FP avoidance rule: meta __TO_EQ_FROM_USR_NN_MINFP __TO_EQ_FROM_USR_NN && !__TO_EQ_FROM_USR_1 && !__TO_EQ_FROM && !__TO_EQ_FROM_DOM && !__LCL__ENV_AND_HDR_FROM_MATCH && !__DKIM_EXISTS && !__NOT_SPOOFED && !__RCD_RDNS_SMTP && !__RCD_RDNS_MX_MESSY && !__THREADED header __SUBJ_NOT_SHORT Subject =~ /^.{16}/ header __SUBJ_HAS_WORDS Subject =~ /(?:^|\s)[^\W0-9_]{3,15}(?:\s|$)/ meta SUBJ_LACKS_WORDS __SUBJ_NOT_SHORT && !__SUBJ_HAS_WORDS && !__SUBJECT_ENCODED_B64 describe SUBJ_LACKS_WORDS Subject is not short yet lacks words