# SpamAssassin rules file: body tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # Note: body tests are run with long lines, so be sure to limit the # size of searches; use /.{0,30}/ instead of /.*/ to avoid huge # search times. # # Note: If you are adding a rule which looks for a phrase in the body # (as most of them do), please add it to rules/20_phrases.cf instead. # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ########################################################################### ########################################################################### # GTUBE test - the generic test for UBE. body GTUBE /XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/ describe GTUBE Generic Test for Unsolicited Bulk Email tflags GTUBE userconf noautolearn ########################################################################### # this seems to be the new fashion (as of Jul 5 2002). base64-encoded # parts need to be stripped before this match body TRACKER_ID /^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\s*\z/is describe TRACKER_ID Incorporates a tracking ID number body WEIRD_QUOTING /[\042\223\224\262\263\271]{2}\S{0,16}[\042\223\224\262\263\271]{2}/ describe WEIRD_QUOTING Weird repeated double-quotation marks ########################################################################### # these tests doesn't actually use rawbody since rawbody isn't raw enough; # they must be written very carefully to avoid modifying the original content # MIME Content-Transfer-Encoding control rules rawbody __MIME_BASE64 eval:check_for_mime('mime_base64_count') describe __MIME_BASE64 Includes a base64 attachment rawbody __MIME_QP eval:check_for_mime('mime_qp_count') describe __MIME_QP Includes a quoted-printable attachment rawbody MIME_BASE64_BLANKS eval:check_for_mime('mime_base64_blanks') describe MIME_BASE64_BLANKS Extra blank lines in base64 encoding rawbody MIME_BASE64_NO_NAME eval:check_for_mime('mime_base64_no_name') describe MIME_BASE64_NO_NAME base64 attachment does not have a file name rawbody MIME_BASE64_TEXT eval:check_for_mime('mime_base64_encoded_text') describe MIME_BASE64_TEXT Message text disguised using base64 encoding rawbody MIME_MISSING_BOUNDARY eval:check_for_mime('mime_missing_boundary') describe MIME_MISSING_BOUNDARY MIME section missing boundary body MISSING_MIME_HB_SEP eval:check_msg_parse_flags('missing_mime_head_body_separator') describe MISSING_MIME_HB_SEP Missing blank line between MIME header and body body MIME_HTML_MOSTLY eval:check_mime_multipart_ratio('0.00','0.01') describe MIME_HTML_MOSTLY Multipart message mostly text/html MIME # Steve Linford via Charlie Watts: good test! body MIME_HTML_ONLY eval:check_for_mime_html_only() describe MIME_HTML_ONLY Message only has text/html MIME parts # multipart/alternative has very good accuracy, other multipart types are # similar to MIME_HTML_ONLY so they don't need a separate rule header __CTYPE_MULTIPART_ALT Content-Type =~ /multipart\/alternative/i meta MIME_HTML_ONLY_MULTI (__CTYPE_MULTIPART_ALT && MIME_HTML_ONLY) describe MIME_HTML_ONLY_MULTI Multipart message only has text/html MIME parts rawbody MIME_QP_LONG_LINE eval:check_for_mime('mime_qp_long_line') describe MIME_QP_LONG_LINE Quoted-printable line longer than 76 chars # note: __HIGHBITS is used by HTML_CHARSET_FARAWAY rawbody __MIME_CHARSET_FARAWAY eval:check_for_mime('mime_faraway_charset') body __HIGHBITS /(?:[\x80-\xff].?){4}/ meta MIME_CHARSET_FARAWAY (__MIME_CHARSET_FARAWAY && __HIGHBITS) describe MIME_CHARSET_FARAWAY MIME character set indicates foreign language tflags MIME_CHARSET_FARAWAY userconf # This rule uses a simple algorithm to determine if the text and html # parts of an multipart/alternative message are different. body MPART_ALT_DIFF eval:multipart_alternative_difference('99', '100') describe MPART_ALT_DIFF HTML and text parts are different body MPART_ALT_DIFF_COUNT eval:multipart_alternative_difference_count('3', '1') describe MPART_ALT_DIFF_COUNT HTML and text parts are different body MIME_BAD_ISO_CHARSET eval:check_for_mime('mime_bad_iso_charset') describe MIME_BAD_ISO_CHARSET MIME character set is an unknown ISO charset ########################################################################### body CHARSET_FARAWAY eval:check_for_faraway_charset() describe CHARSET_FARAWAY Character set indicates a foreign language tflags CHARSET_FARAWAY userconf # duncf body EMAIL_ROT13 /\b[a-z(\]-]+\^[a-z-]+\([a-z]{2,3}\b/ describe EMAIL_ROT13 Body contains a ROT13-encoded email address test EMAIL_ROT13 ok qhabs^ebtref(pbz test EMAIL_ROT13 ok zxrggyre^riv-vap(pbz test EMAIL_ROT13 fail duncf-nospam@rogers.com body BLANK_LINES_70_80 eval:check_blank_line_ratio('70','80','4') body BLANK_LINES_80_90 eval:check_blank_line_ratio('80','90','4') body BLANK_LINES_90_100 eval:check_blank_line_ratio('90','100','4') describe BLANK_LINES_70_80 Message body has 70-80% blank lines describe BLANK_LINES_80_90 Message body has 80-90% blank lines describe BLANK_LINES_90_100 Message body has 90-100% blank lines body UNIQUE_WORDS eval:check_unique_words('0.946', '3.1') describe UNIQUE_WORDS Message body has many words used only once body DOMAIN_RATIO eval:check_domain_ratio('0.022') describe DOMAIN_RATIO Message body mentions many internet domains # this could use more work body __LONGWORDS_A /\b(?:[a-z]{8,}[\s\.]+){6}/ body __LONGWORDS_B /\b(?:[a-z]{6,}[\s\.]+){9}/ body __LONGWORDS_C /\b(?:[a-z]{5,}[\s\.]+){10}/ meta LONGWORDS (__LONGWORDS_A + __LONGWORDS_B + __LONGWORDS_C > 1) describe LONGWORDS Long string of long words body HTTPS_IP_MISMATCH eval:check_https_ip_mismatch() describe HTTPS_IP_MISMATCH IP to HTTPS link found in HTML rawbody INTERRUPTUS /(?:[a-zA-Z0-9]<[\/ ]{0,2}?(?!br)(?!p)(?!sup)(?!li)(?!b)(?!i)(?!option)(?!a (?:href|name))(?:\b|!--)[^>]{0,64}?>[a-zA-Z0-9].{0,64}){3}/i describe INTERRUPTUS Message looks to contain HTML-interrupted text