# experiments based on masscheck results

meta	 MALFORMED_FREEMAIL	(MISSING_HEADERS||__HDRS_LCASE) && FREEMAIL_FROM
describe MALFORMED_FREEMAIL	Bad headers on message from free email service
#score	 MALFORMED_FREEMAIL	0.1

header	 FROM_WEBSITE	From =~ m'\b(?:f|ht)tps?://[^\s"</\@]{3,60}\.\w\w'i
describe FROM_WEBSITE	Sender name appears to be a link
header	 FROM_WWW	From:name =~ /\bwww\.[^\s"<\/\@]{4,60}\.\w\w/i
describe FROM_WWW	Sender name appears to be a website

header	 FROM_2_EMAILS	From =~ /([^\@]{2,}\@[^\@]{2,60}\.\w\w).*(?!\1)[^\@]{2,}\@[^\@]/
describe FROM_2_EMAILS	Sender claims to have a different email

header	 __FROM_THE	From:name =~ /\b(?:THE|[Tt]he)\b/
meta	 FROM_THE	__FROM_THE && !(__VIA_ML || __SENDER_BOT || __DOS_HAS_LIST_UNSUB || __REPLYTO_EXISTS)
describe FROM_THE	Non-bulk sender is "The" something
# I don't actually expect this to do well.  maybe s/o around 0.450?
# There's also the argument that Bayes should handle this, since (iirc) it codifies tokens like "from:the" indicating that "the" occurs in "From" headers...

header	 KHOP_BIG_TO_CC      ToCc =~ /(?:[^,\@]{1,60}\@[^,]{4,25},){10}/
describe KHOP_BIG_TO_CC      Sent to 10+ recipients instaed of Bcc or a list
score	 KHOP_BIG_TO_CC      0.3 # 20090527

header	 __KHOP_EBAY_ADDY	From:addr =~ /\@(?:.+\.)?ebay\..{3,5}$/i
meta	 KHOP_FAKE_EBAY 	__KHOP_EBAY_ADDY && !__NOT_SPOOFED
describe KHOP_FAKE_EBAY 	Sender falsely claims to be from eBay
#score	 KHOP_FAKE_EBAY 	2.25 # 20090408

ifplugin Mail::SpamAssassin::Plugin::URIDetail
  uri_detail KHOP_FOREIGN_CLICK	text =~ /\b(?:cliquez\Wici\b|clic aqu[^<.,a ])/i
else
  rawbody    KHOP_FOREIGN_CLICK	m{\bhref=[^>]{9,199}>[^<]{0,80}(?:<(?!/a\b)[^>]{0,299}>[^<]{0,80}){0,9}[^<]{0,80}\b(?:cliquez\Wici\b|clic aqu[^<.,a ])}si
endif
describe KHOP_FOREIGN_CLICK	Click here link in French or Spanish
#score	 KHOP_FOREIGN_CLICK	1.1	# 20090526 see also SARE_UN7

# I don't think this ever fires
uri	 URI_HIDDEN	m'.{7}\/\.\.?/?\w'
describe URI_HIDDEN	Contains a hidden directory
#score	 URI_HIDDEN	0.7 # 20090515 13:29 by Adam Katz (me) on sa-users list

# no subdomain; sent by example.com rather than server.example.com
header __RDNS_NO_SUBDOM	X-Spam-Relays-External =~ /^[^\]]+ rdns=[^. ]*\.\w+ /

# Relays with 5+ subdomains.
# My data (post-greylisting) is 1.7869/0.0682 spam/ham, s/o = .897
# Those should be significantly better sans-greylisting (they can't get worse).
# @ 20091214, 5.7617/0.0344 spam/ham, 0.994 s/o.
header __5_SUBDOM X-Spam-Relays-External =~ /^[^\]]+ rdns=(?:[^. ]*\.){6,}\w+ /

# IP address in relay's rDNS or HELO
header __IP_IN_RELAY  X-Spam-Relays-External =~ /^\[ ip=(\d+)\.(\d+)\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\D\2\D\3\D\4|\4\D\3\D\2\D\1)/
header __IP_PART_IN_RELAY X-Spam-Relays-External =~ /^\[ ip=\d+\.\d+\.(\d+)\.(\d+) (?:[^\]]* )?(?:rdns|helo)=\S*(?:\1\W\2\W|\2\W\1)\b/

header __MSGID_DOTZERO	Message-ID =~ /\.0\.0\./

header __MSGID_JAVAMAIL	Message-ID =~ /\.JavaMail\./
tflags __MSGID_JAVAMAIL	nice

meta	 AOL_ALL_CAPS	__AT_AOL_MSGID && UPPERCASE_75_100
describe AOL_ALL_CAPS	AOL users sometimes write mail in all uppercase
tflags	 AOL_ALL_CAPS	nice

# testing warren's theory from "Uppercase E-mail in Latin America" 2009-10-06
# http://old.nabble.com/Uppercase-E-mail-in-Latin-America-td25748291.html
# note, masscheck probably doesn't have enough LACNIC ham for this test
meta	 LACNIC_ALL_CAPS __RCVD_VIA_LACNIC && UPPERCASE_75_100
describe LACNIC_ALL_CAPS Latino users sometimes write mail in all uppercase
tflags	 LACNIC_ALL_CAPS nice nopublish

header	 __LONG_NOBR_ADDR	From:addr =~ /[a-zA-Z0-9]{20,}\@/i
meta LONG_FREEMAIL_ADDR  __LONG_NOBR_ADDR && FREEMAIL_FROM && !__freemail_safe
describe LONG_FREEMAIL_ADDR	Freemail address has 20+ unbroken characters

header	 __HOTMAIL_HELO	Received =~ /from ([A-Z]{3})\d[^.]+ [^\n]+ by \1\d+-[^\n ]+\.\1\d+\.hotmail\.com with Microsoft/i
tflags	 __HOTMAIL_HELO 	nice
# 1 & 2 are in 20_head_tests.cf ... this one doesn't use eval rules
meta	 FORGED_HOTMAIL_RCVD3	__HOST_HOTMAIL && (!__HOTMAIL_HELO || __DOS_SINGLE_EXT_RELAY)

# As cross-posted between sa-users list and sare-users list at
# http://old.nabble.com/forum/ViewPost.jtp?post=27358692&framed=y
# SARE_RECV_SPAM_DOMN0b examines all received headers for a dynamic host on
# hinet, which is unfair and likely unneccessary given we can do about as well
# with this safer version.  It also appears that people think this rule useful
# even today, so I'm testing it here.  The rDNS dynamic tests will likely trump.
header SARE_RECV_SPAM_DOMN0B Received =~ /\bdynamic.hinet\.(?:com|net|org|info)/
tflags SARE_RECV_SPAM_DOMN0B	nopublish
header SARE_RECV_SPAM_DOMN0B2	X-Spam-Relays-External =~ /^[^\]]+ rdns=[^\] ]{0,25}\bdynamic.hinet\.(?:com|net|org|info)(?:\.tw)? /
tflags SARE_RECV_SPAM_DOMN0B2	nopublish
header SARE_RECV_SPAM_DOMN0B3	X-Spam-Relays-External =~ /^[^\]]+ rdns=[^\] ]{0,25}\bdynamic.hinet\.net /
tflags SARE_RECV_SPAM_DOMN0B3	nopublish

header	 RCVD_VIA_IPV6	X-Spam-Relays-Untrusted =~ /^[^\]]+ (?:by|ip)=[^\] ]+:[^\] .]+ /
describe RCVD_VIA_IPV6	Received by the last trusted relay via IPv6
tflags	 RCVD_VIA_IPV6	nice