# SpamAssassin rules file: URI tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # This program is free software; you can redistribute it and/or modify # it under the terms of either the Artistic License or the GNU General # Public License as published by the Free Software Foundation; either # version 1 of the License, or (at your option) any later version. # # See the file "License" in the top level of the SpamAssassin source # distribution for more details. # ########################################################################### require_version @@VERSION@@ uri NUMERIC_HTTP_ADDR /^https?\:\/\/\d{7,}/is describe NUMERIC_HTTP_ADDR Uses a numeric IP address in URL uri NORMAL_HTTP_TO_IP /^https?\:\/\/(?:\S*\@)?\d+\.\d+\.\d+\.\d+/i describe NORMAL_HTTP_TO_IP Uses a dotted-decimal IP address in URL uri HTTP_WITH_EMAIL_IN_URL /^https?\:\/\/\S+=[-_\+a-z0-9\.]+\@[-_\+a-z0-9\.]+\.[-_\+a-z0-9]{2,3}(?:\&|\s)/ describe HTTP_WITH_EMAIL_IN_URL 'remove' URL contains an email address # Theo sez: # Have gotten FPs off this, and whitespace can't be in the host, so... # % Visit my homepage: http://i.like.foo.com % uri HTTP_ESCAPED_HOST /^https?\:\/\/[^\/\s]*%[0-9a-fA-F][0-9a-fA-F]/ describe HTTP_ESCAPED_HOST Uses %-escapes inside a URL's hostname # note: do not match \r or \n uri HTTP_CTRL_CHARS_HOST /^https?\:\/\/[^\/\s]*[\x00-\x08\x0b\x0c\x0e-\x1f]/ describe HTTP_CTRL_CHARS_HOST Uses control sequences inside a URL hostname # look for URI with escaped 0-9, A-Z, or a-z characters (all other safe # characters have been well-tested, but are sometimes unnecessarily escaped # in nonspam; requiring "http" or "https" also reduces false positives). uri HTTP_EXCESSIVE_ESCAPES /^https?:\/\/\S*%(?:3\d|[46][1-9a-f]|[57][\da])/i describe HTTP_EXCESSIVE_ESCAPES Completely unnecessary %-escapes inside a URL # thx to vince.delvecchio@analog.com for the legwork on the negative # lookbehinds here; saved a lot of work for us (bug 1035), also see bug 1835 uri PORN_4 /^https?:\/\/[\w\.-]*(?:xxx|(?/]*\&\#[\da-f]+}i describe HTTP_ENTITIES_HOST URI obscured with character entities uri URI_DOLLARMACHINE /dollar.?machine/i describe URI_DOLLARMACHINE Message has URI for dollarmachine uri URI_HITBOX /hitbox\.com/i describe URI_HITBOX Message has URI for hitbox.com uri YAHOO_REDIR /^https?\:\/\/rd\.yahoo\.com\/(?:[0-9]{4,}|partner\b|dir\b)/i describe YAHOO_REDIR Has Yahoo Redirect URI uri MORTGAGE_LINKS /(?:^https?\:\/\/|^mailto\:).{0,20}(?:low|about)mortgage/i describe MORTGAGE_LINKS Message has link to mortgage URI uri URI_OFFERS m/offer([sz]|-\S+)?\.(?:com|bi?z)/i describe URI_OFFERS Message has link to company offers uri URI_BANNEDCD m@^(?:https?://|mailto:)[^\/]*bannedcd@i describe URI_BANNEDCD Message has URI for bannedcd uri URI_FREEHT m@^(?:https?://|mailto:)[^\/]*freeht@i describe URI_FREEHT Message has URI for freeht uri URI_4YOU m@^(?:https?://|mailto:)[^\/]*4you@i describe URI_4YOU Message has URI 4you