# SpamAssassin rules file: body tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # Note: body tests are run with long lines, so be sure to limit the # size of searches; use /.{0,30}/ instead of /.*/ to avoid huge # search times. # # Note: If you are adding a rule which looks for a phrase in the body # (as most of them do), please add it to rules/20_phrases.cf instead. # ########################################################################### require_version @@VERSION@@ ########################################################################### # GTUBE test - the generic test for UBE. body GTUBE /XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/ describe GTUBE Generic Test for Unsolicited Bulk Email ########################################################################### # Message digest tests full RAZOR_CHECK eval:check_razor1() describe RAZOR_CHECK Listed in Razor1, see http://razor.sf.net/ tflags RAZOR_CHECK net full RAZOR2_CHECK eval:check_razor2() describe RAZOR2_CHECK Listed in Razor2, see http://razor.sf.net/ tflags RAZOR2_CHECK net # cf (confidence level) is how likely the message is spam. RAZOR2_CHECK # returns true if cf>=min_cf (as defined by user/config). These return # true depending on what cf value the message has. The algorithm goes: # check the message via razor, then go through each mime part and check # how razor scored it. If the part is contested (ie: it's been reported # as both ham and spam) it's ignored. SA takes the highest non-contested # part cf score and returns it for the range rules. ie: This is essentially # Razor 2's logic_method 4. # # Note: Disabling RAZOR2_CHECK (score RAZOR2_CHECK 0) will also disable # these checks. # # Note: The scores are set to 0 on these tests right now until they get # better integrated with SA overall. # body RAZOR2_CF_RANGE_01_10 eval:check_razor2_range('01','10') body RAZOR2_CF_RANGE_11_20 eval:check_razor2_range('11','20') body RAZOR2_CF_RANGE_21_30 eval:check_razor2_range('21','30') body RAZOR2_CF_RANGE_31_40 eval:check_razor2_range('31','40') body RAZOR2_CF_RANGE_41_50 eval:check_razor2_range('41','50') body RAZOR2_CF_RANGE_51_60 eval:check_razor2_range('51','60') body RAZOR2_CF_RANGE_61_70 eval:check_razor2_range('61','70') body RAZOR2_CF_RANGE_71_80 eval:check_razor2_range('71','80') body RAZOR2_CF_RANGE_81_90 eval:check_razor2_range('81','90') body RAZOR2_CF_RANGE_91_100 eval:check_razor2_range('91','100') describe RAZOR2_CF_RANGE_01_10 Razor2 gives a spam confidence level between 1 and 10 describe RAZOR2_CF_RANGE_11_20 Razor2 gives a spam confidence level between 11 and 20 describe RAZOR2_CF_RANGE_21_30 Razor2 gives a spam confidence level between 21 and 30 describe RAZOR2_CF_RANGE_31_40 Razor2 gives a spam confidence level between 31 and 40 describe RAZOR2_CF_RANGE_41_50 Razor2 gives a spam confidence level between 41 and 50 describe RAZOR2_CF_RANGE_51_60 Razor2 gives a spam confidence level between 51 and 60 describe RAZOR2_CF_RANGE_61_70 Razor2 gives a spam confidence level between 61 and 70 describe RAZOR2_CF_RANGE_71_80 Razor2 gives a spam confidence level between 71 and 80 describe RAZOR2_CF_RANGE_81_90 Razor2 gives a spam confidence level between 81 and 90 describe RAZOR2_CF_RANGE_91_100 Razor2 gives a spam confidence level between 91 and 100 tflags RAZOR2_CF_RANGE_01_10 net tflags RAZOR2_CF_RANGE_11_20 net tflags RAZOR2_CF_RANGE_21_30 net tflags RAZOR2_CF_RANGE_31_40 net tflags RAZOR2_CF_RANGE_41_50 net tflags RAZOR2_CF_RANGE_51_60 net tflags RAZOR2_CF_RANGE_61_70 net tflags RAZOR2_CF_RANGE_71_80 net tflags RAZOR2_CF_RANGE_81_90 net tflags RAZOR2_CF_RANGE_91_100 net full DCC_CHECK eval:check_dcc() describe DCC_CHECK Listed in DCC, see http://rhyolite.com/anti-spam/dcc/ tflags DCC_CHECK net full PYZOR_CHECK eval:check_pyzor() describe PYZOR_CHECK Listed in Pyzor, see http://pyzor.sf.net/ tflags PYZOR_CHECK net ########################################################################### body REMOVE_IN_QUOTES /\"remove\"/i describe REMOVE_IN_QUOTES List removal information body __BILL_1618 /\D301\D+a\W*2\W*c\D+1618\D/i describe __BILL_1618 Possible mention of bill 1618 (anti-spam bill) meta ANTISPAM_BILL REMOVE_IN_QUOTES && __BILL_1618 describe ANTISPAM_BILL Very likely mention of anti-spam bill ########################################################################### # CLICK rules # note HTML_LINK_CLICK* rules in HTML parser section body CLICK_TO_REMOVE_2 /mailto:.{0,50}click.{0,50}remove/is describe CLICK_TO_REMOVE_2 Click-to-remove with mailto: found beforehand rawbody CARRIAGE_RETURNS eval:check_carriage_returns() describe CARRIAGE_RETURNS Message contains a lot of ^M characters # fixed by ms to not allow entry field onto next line rawbody ASCII_FORM_ENTRY /[^<][A-Za-z][A-Za-z]+.{1,15}?[\x09\x20]*_{30,}/ describe ASCII_FORM_ENTRY Contains an ASCII-formatted form # this seems to be the new fashion (as of Jul 5 2002). base64-encoded parts need to # be stripped before this match body TRACKER_ID /^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\s*\z/is describe TRACKER_ID Incorporates a tracking ID number # heh heh heh... tags used to (presumably) generate those anti-Razor random IDs body MARKUP_RAND /\{%RAND%\}/ describe MARKUP_RAND RAND found, spammer forgot to run the random-ID generator body MARKUP_SSPL /SSPLTM/ describe MARKUP_SSPL SSPL found, spammer forgot to run the random-ID generator # (contrib: Matt Sergeant) body LARGE_HEX /[0-9a-fA-F]{70,}/ describe LARGE_HEX Contains a large block of hexadecimal code # converted to use eval method by Matthew Cline body LINES_OF_YELLING eval:check_for_yelling() describe LINES_OF_YELLING A WHOLE LINE OF YELLING DETECTED body LINES_OF_YELLING_2 eval:check_for_num_yelling_lines("2") describe LINES_OF_YELLING_2 2 WHOLE LINES OF YELLING DETECTED body LINES_OF_YELLING_3 eval:check_for_num_yelling_lines("3") describe LINES_OF_YELLING_3 3 WHOLE LINES OF YELLING DETECTED body WEIRD_QUOTING /[\042\223\224\262\263\271]{2}\S{0,16}[\042\223\224\262\263\271]{2}/ describe WEIRD_QUOTING Weird repeated double-quotation marks in body ########################################################################### # these tests doesn't actually use rawbody since rawbody isn't raw enough; # they must be written very carefully to avoid modifying the original content rawbody BASE64_ENC_TEXT eval:check_for_mime('mime_base64_encoded_text') describe BASE64_ENC_TEXT Message text disguised using base-64 encoding rawbody MIME_EXCESSIVE_QP eval:check_for_mime_excessive_qp('0.011') describe MIME_EXCESSIVE_QP Excessive quoted-printable encoding in body rawbody MIME_HTML_NO_CHARSET eval:check_for_mime('mime_html_no_charset') describe MIME_HTML_NO_CHARSET Message text in HTML without specified charset rawbody MIME_LONG_LINE_QP eval:check_for_mime('mime_long_line_qp') describe MIME_LONG_LINE_QP Quoted-printable line longer than 76 characters rawbody MIME_MISSING_BOUNDARY eval:check_for_mime('mime_missing_boundary') describe MIME_MISSING_BOUNDARY MIME section missing boundary # actually indicates viruses, typically; just used here to clean corpora. rawbody MICROSOFT_EXECUTABLE eval:check_for_mime('microsoft_executable') describe MICROSOFT_EXECUTABLE Message includes Microsoft executable program # todo: better tflags category for these tests tflags MICROSOFT_EXECUTABLE userconf # actually indicates viruses, typically; just used here to clean corpora. rawbody MIME_SUSPECT_NAME eval:check_for_mime('mime_suspect_name') describe MIME_SUSPECT_NAME MIME filename does not match content # todo: better tflags category for these tests tflags MIME_SUSPECT_NAME userconf # note: __HIGHBITS is used by HTML_CHARSET_FARAWAY rawbody __MIME_CHARSET_FARAWAY eval:check_for_mime('mime_faraway_charset') body __HIGHBITS /(?:[\x80-\xff].?){4,}/ meta MIME_CHARSET_FARAWAY (__MIME_CHARSET_FARAWAY && __HIGHBITS) describe MIME_CHARSET_FARAWAY MIME character set indicates foreign language tflags MIME_CHARSET_FARAWAY userconf ########################################################################### body CHARSET_FARAWAY eval:check_for_faraway_charset() describe CHARSET_FARAWAY Character set indicates a foreign language tflags CHARSET_FARAWAY userconf body UNDESIRED_LANGUAGE_BODY eval:check_language() describe UNDESIRED_LANGUAGE_BODY Written in an undesired language tflags UNDESIRED_LANGUAGE_BODY userconf body BODY_8BITS /[\x80-\xff]{8,}/ describe BODY_8BITS Body includes 8 consecutive 8-bit characters rawbody MIME_DEFICIENT_QP eval:check_for_mime('mime_qp_illegal') describe MIME_DEFICIENT_QP Deficient quoted-printable encoding in body