# SpamAssassin rules file: HTML tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ########################################################################### require_version @@VERSION@@ # HTML parser tests # # please sort these by eval type then name meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER) describe HTML_SHORT_CENTER HTML is very short with CENTER tag meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS) describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup tflags HTML_CHARSET_FARAWAY userconf meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE) describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type ########################################################################### # rawbody HTML tests rawbody HIDE_WIN_STATUS /<[^>]+onMouseOver=[^>]+window\.status=/i describe HIDE_WIN_STATUS Javascript to hide URLs in browser rawbody __OBFUSCATING_COMMENT_A /\w(?:]*>)+\w/ rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:]*>)+[^\s<]/ ifplugin Mail::SpamAssassin::Plugin::HTMLEval ifplugin Mail::SpamAssassin::Plugin::MIMEEval meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY)) && !__ISO_2022_JP_DELIM describe OBFUSCATING_COMMENT HTML comments which obfuscate text endif endif # spams that are assembled from a Javascript array # look for the XOR op rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/ rawbody __JS_DOCWRITE /document\.write/ meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE) describe JS_FROMCHARCODE Document is built from a Javascript charcode array # a good possible rule that may resurface # ! $ % ' ( ) , - . / : ; = ? @ _ #rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/ #describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation body __HIGHBITS /(?:[\x80-\xff].?){4}/ # note: __HIGHBITS is used by HTML_CHARSET_FARAWAY ########################################################################### ifplugin Mail::SpamAssassin::Plugin::HTMLEval # HTML control test, HTML spam rules should all have better S/O than this body HTML_MESSAGE eval:html_test('html') describe HTML_MESSAGE HTML included in message # HTML comment tests body HTML_COMMENT_SHORT eval:html_text_match('comment', '') describe HTML_COMMENT_SHORT HTML comment is very short body HTML_COMMENT_SAVED_URL eval:html_text_match('comment', '