# SpamAssassin rules file: body tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# Note: body tests are run with long lines, so be sure to limit the
# size of searches; use /.{0,30}/ instead of /.*/ to avoid huge
# search times.
#
# Note: If you are adding a rule which looks for a phrase in the body
# (as most of them do), please add it to rules/20_phrases.cf instead.
#
###########################################################################

require_version @@VERSION@@

###########################################################################
# GTUBE test - the generic test for UBE.
body GTUBE		/XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/
describe GTUBE		Generic Test for Unsolicited Bulk Email

###########################################################################
# Message digest tests

full RAZOR_CHECK	eval:check_razor1()
describe RAZOR_CHECK	Listed in Razor1, see http://razor.sf.net/
tflags RAZOR_CHECK	net

full RAZOR2_CHECK	eval:check_razor2()
describe RAZOR2_CHECK	Listed in Razor2, see http://razor.sf.net/
tflags RAZOR2_CHECK	net

# cf (confidence level) is how likely the message is spam.  RAZOR2_CHECK
# returns true if cf>=min_cf (as defined by user/config).  These return
# true depending on what cf value the message has.  The algorithm goes:
# check the message via razor, then go through each mime part and check
# how razor scored it.  If the part is contested (ie: it's been reported
# as both ham and spam) it's ignored.  SA takes the highest non-contested
# part cf score and returns it for the range rules.  ie: This is essentially
# Razor 2's logic_method 4.
#
# Note: Disabling RAZOR2_CHECK (score RAZOR2_CHECK 0) will also disable
# these checks.
#
# Note: The scores are set to 0 on these tests right now until they get
# better integrated with SA overall.
#
body	RAZOR2_CF_RANGE_01_10	eval:check_razor2_range('01','10')
body	RAZOR2_CF_RANGE_11_20	eval:check_razor2_range('11','20')
body	RAZOR2_CF_RANGE_21_30	eval:check_razor2_range('21','30')
body	RAZOR2_CF_RANGE_31_40	eval:check_razor2_range('31','40')
body	RAZOR2_CF_RANGE_41_50	eval:check_razor2_range('41','50')
body	RAZOR2_CF_RANGE_51_60	eval:check_razor2_range('51','60')
body	RAZOR2_CF_RANGE_61_70	eval:check_razor2_range('61','70')
body	RAZOR2_CF_RANGE_71_80	eval:check_razor2_range('71','80')
body	RAZOR2_CF_RANGE_81_90	eval:check_razor2_range('81','90')
body	RAZOR2_CF_RANGE_91_100	eval:check_razor2_range('91','100')
describe RAZOR2_CF_RANGE_01_10	Razor2 gives a spam confidence level between 1 and 10
describe RAZOR2_CF_RANGE_11_20	Razor2 gives a spam confidence level between 11 and 20
describe RAZOR2_CF_RANGE_21_30	Razor2 gives a spam confidence level between 21 and 30
describe RAZOR2_CF_RANGE_31_40	Razor2 gives a spam confidence level between 31 and 40
describe RAZOR2_CF_RANGE_41_50	Razor2 gives a spam confidence level between 41 and 50
describe RAZOR2_CF_RANGE_51_60	Razor2 gives a spam confidence level between 51 and 60
describe RAZOR2_CF_RANGE_61_70	Razor2 gives a spam confidence level between 61 and 70
describe RAZOR2_CF_RANGE_71_80	Razor2 gives a spam confidence level between 71 and 80
describe RAZOR2_CF_RANGE_81_90	Razor2 gives a spam confidence level between 81 and 90
describe RAZOR2_CF_RANGE_91_100	Razor2 gives a spam confidence level between 91 and 100
tflags RAZOR2_CF_RANGE_01_10	net
tflags RAZOR2_CF_RANGE_11_20	net
tflags RAZOR2_CF_RANGE_21_30	net
tflags RAZOR2_CF_RANGE_31_40	net
tflags RAZOR2_CF_RANGE_41_50	net
tflags RAZOR2_CF_RANGE_51_60	net
tflags RAZOR2_CF_RANGE_61_70	net
tflags RAZOR2_CF_RANGE_71_80	net
tflags RAZOR2_CF_RANGE_81_90	net
tflags RAZOR2_CF_RANGE_91_100	net

full DCC_CHECK		eval:check_dcc()
describe DCC_CHECK	Listed in DCC, see http://rhyolite.com/anti-spam/dcc/
tflags DCC_CHECK	net

full PYZOR_CHECK	eval:check_pyzor()
describe PYZOR_CHECK	Listed in Pyzor, see http://pyzor.sf.net/
tflags PYZOR_CHECK	net

###########################################################################

body REMOVE_IN_QUOTES		/\"remove\"/i
describe REMOVE_IN_QUOTES	List removal information

body     __BILL_1618    /\D301\D+a\W*2\W*c\D+1618\D/i
describe __BILL_1618    Possible mention of bill 1618 (anti-spam bill)

meta     ANTISPAM_BILL  REMOVE_IN_QUOTES && __BILL_1618
describe ANTISPAM_BILL  Very likely mention of anti-spam bill

###########################################################################
# CLICK rules
# note HTML_LINK_CLICK* rules in HTML parser section

body CLICK_TO_REMOVE_2		/mailto:.{0,50}click.{0,50}remove/is
describe CLICK_TO_REMOVE_2	Click-to-remove with mailto: found beforehand

rawbody CARRIAGE_RETURNS	eval:check_carriage_returns()
describe CARRIAGE_RETURNS	Message contains a lot of ^M characters

# fixed by ms to not allow entry field onto next line
rawbody ASCII_FORM_ENTRY	/[^<][A-Za-z][A-Za-z]+.{1,15}?[\x09\x20]*_{30,}/
describe ASCII_FORM_ENTRY	Contains an ASCII-formatted form

# this seems to be the new fashion (as of Jul 5 2002).  base64-encoded parts need to
# be stripped before this match
body TRACKER_ID		/^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\s*\z/is
describe TRACKER_ID	Incorporates a tracking ID number

# heh heh heh... tags used to (presumably) generate those anti-Razor random IDs
body MARKUP_RAND                /\{%RAND%\}/
describe MARKUP_RAND            RAND found, spammer forgot to run the random-ID generator
body MARKUP_SSPL                /SSPLTM/
describe MARKUP_SSPL            SSPL found, spammer forgot to run the random-ID generator

# (contrib: Matt Sergeant)
body LARGE_HEX  		/[0-9a-fA-F]{70,}/
describe LARGE_HEX		Contains a large block of hexadecimal code

# converted to use eval method by Matthew Cline
body LINES_OF_YELLING		eval:check_for_yelling()
describe LINES_OF_YELLING       A WHOLE LINE OF YELLING DETECTED

body     LINES_OF_YELLING_2     eval:check_for_num_yelling_lines("2")
describe LINES_OF_YELLING_2     2 WHOLE LINES OF YELLING DETECTED

body     LINES_OF_YELLING_3     eval:check_for_num_yelling_lines("3")
describe LINES_OF_YELLING_3     3 WHOLE LINES OF YELLING DETECTED

body WEIRD_QUOTING	/[\042\223\224\262\263\271]{2}\S{0,16}[\042\223\224\262\263\271]{2}/
describe WEIRD_QUOTING	Weird repeated double-quotation marks in body

###########################################################################
# these tests doesn't actually use rawbody since rawbody isn't raw enough;
# they must be written very carefully to avoid modifying the original content

rawbody BASE64_ENC_TEXT		eval:check_for_mime('mime_base64_encoded_text')
describe BASE64_ENC_TEXT	Message text disguised using base-64 encoding

rawbody MIME_EXCESSIVE_QP	eval:check_for_mime_excessive_qp('0.011')
describe MIME_EXCESSIVE_QP	Excessive quoted-printable encoding in body

rawbody MIME_HTML_NO_CHARSET	eval:check_for_mime('mime_html_no_charset')
describe MIME_HTML_NO_CHARSET	Message text in HTML without specified charset

rawbody  MIME_LONG_LINE_QP	eval:check_for_mime('mime_long_line_qp')
describe MIME_LONG_LINE_QP	Quoted-printable line longer than 76 characters

rawbody  MIME_MISSING_BOUNDARY	eval:check_for_mime('mime_missing_boundary')
describe MIME_MISSING_BOUNDARY	MIME section missing boundary

# actually indicates viruses, typically; just used here to clean corpora.
rawbody  MICROSOFT_EXECUTABLE	eval:check_for_mime('microsoft_executable')
describe MICROSOFT_EXECUTABLE	Message includes Microsoft executable program
# todo: better tflags category for these tests
tflags MICROSOFT_EXECUTABLE userconf

# actually indicates viruses, typically; just used here to clean corpora.
rawbody  MIME_SUSPECT_NAME	eval:check_for_mime('mime_suspect_name')
describe MIME_SUSPECT_NAME	MIME filename does not match content
# todo: better tflags category for these tests
tflags MIME_SUSPECT_NAME userconf

# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
rawbody __MIME_CHARSET_FARAWAY	eval:check_for_mime('mime_faraway_charset')
body __HIGHBITS			/(?:[\x80-\xff].?){4,}/
meta MIME_CHARSET_FARAWAY	(__MIME_CHARSET_FARAWAY && __HIGHBITS)
describe MIME_CHARSET_FARAWAY	MIME character set indicates foreign language
tflags MIME_CHARSET_FARAWAY	userconf

###########################################################################

body CHARSET_FARAWAY		eval:check_for_faraway_charset()
describe CHARSET_FARAWAY	Character set indicates a foreign language
tflags CHARSET_FARAWAY          userconf

body UNDESIRED_LANGUAGE_BODY		eval:check_language()
describe UNDESIRED_LANGUAGE_BODY	Written in an undesired language
tflags UNDESIRED_LANGUAGE_BODY          userconf

body BODY_8BITS			/[\x80-\xff]{8,}/
describe BODY_8BITS		Body includes 8 consecutive 8-bit characters

rawbody MIME_DEFICIENT_QP	eval:check_for_mime('mime_qp_illegal')
describe MIME_DEFICIENT_QP	Deficient quoted-printable encoding in body