# SpamAssassin rules file: URI tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ########################################################################### require_version @@VERSION@@ # Redirector URI patterns redirector_pattern /^http:\/\/chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i redirector_pattern /^http:\/\/www(?:\d+)?\.nate\.com\/r\/\w+\/(.*)$/i redirector_pattern /^http:\/\/.+\.gov\/(?:.*\/)?externalLink\.jhtml\?.*url=(.*?)(?:&.*)?$/i redirector_pattern /^http:\/\/redir\.internet\.com\/.+?\/.+?\/(.*)$/i redirector_pattern /^http:\/\/(?:.*?\.)?adtech\.de\/.*(?:;|\|)link=(.*?)(?:;|$)/i redirector_pattern m'^http.*?/redirect\.php\?.*(?<=[?&])goto=(.*?)(?:$|[&\#])'i redirector_pattern m'^https?:/*(?:[^/]+\.)?emf\d\.com/r\.cfm.*?&r=(.*)'i # ExpressionEngine redirector # see http://www.pmachine.com/forums/viewthread/29561/ # e.g. http://www.someEEBasedSite.com/index.php?URL=http://www.NastyPR0nSite.com # e.g. http://www.pmachine.com/ee/knowledgeblog/?URL=http://www.google.com redirector_pattern m'/(?:index.php)?\?.*(?<=[?&])URL=(.*?)(?:$|[&\#])'i # Google redirector. # Common form: # http://www.google.com/url?sa=U&start=4&q=http://urlofspammer # -> http://urlofspammer # Unhandled form: # http://www.google.com/url?q=http://urlofspammer/space&q=here # -> http://urlofspammer/space%20here # Redirector gets http://urlofspammer/space # http://www.google.com/url?q=http://urlof&q=spammer does not work redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/url\?.*?(?<=[?&])q=(.*?)(?:$|[&\#])'i # Google site search # http://www.google.com/search?q=site:bluevallet.com # -> links to http://www.bluevallet.com/ redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])site:(.*?)(?:$|%20|[\s+&\#])'i # Google search for pages that contain the site name # http://www.google.com/search?q="bluevallet.com" # http://www.google.com/search?q=%22bluevallet.com%22 # -> links to search page that probably has http://bluevallet.com # at the top redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/search\?.*?(?<=[?&])q=[^&]*?(?<=%20|..[=+\s])(?:"|%22)(.*?)(?:$|%22|["\s+&\#])'i # Google translate # http://translate.google.com/translate?u=www.domain.tld&langpair=en%7Cen&hl=en # -> http://www.domain.tld inside a frame redirector_pattern m'^http:/*(?:\w+\.)?google(?:\.\w{2,3}){1,2}/translate\?.*?(?<=[?&])u=(.*?)(?:$|[&\#])'i uri NUMERIC_HTTP_ADDR /^https?\:\/\/\d{7}/is describe NUMERIC_HTTP_ADDR Uses a numeric IP address in URL uri NORMAL_HTTP_TO_IP m{^https?://\d+\.\d+\.\d+\.\d+}i describe NORMAL_HTTP_TO_IP Uses a dotted-decimal IP address in URL # Theo sez: # Have gotten FPs off this, and whitespace can't be in the host, so... # % Visit my homepage: http://i.like.foo.com % uri HTTP_ESCAPED_HOST /^https?\:\/\/[^\/\s]*%[0-9a-fA-F][0-9a-fA-F]/ describe HTTP_ESCAPED_HOST Uses %-escapes inside a URL's hostname # note: do not match \r or \n uri HTTP_CTRL_CHARS_HOST /^https?\:\/\/[^\/\s]*[\x00-\x08\x0b\x0c\x0e-\x1f]/ describe HTTP_CTRL_CHARS_HOST Uses control sequences inside a URL hostname # look for URI with escaped 0-9, A-Z, or a-z characters (all other safe # characters have been well-tested, but are sometimes unnecessarily escaped # in nonspam; requiring "http" or "https" also reduces false positives). uri HTTP_EXCESSIVE_ESCAPES /^https?:\/\/\S*%(?:3\d|[46][1-9a-f]|[57][\da])/i describe HTTP_EXCESSIVE_ESCAPES Completely unnecessary %-escapes inside a URL # bug 1801 uri IP_LINK_PLUS m{^https?://\d+\.\d+\.\d+\.\d+.{0,20}(?:cgi|click|ads|id=)}i describe IP_LINK_PLUS Dotted-decimal IP address followed by CGI uri REMOVE_PAGE /^https?:\/\/[^\/]+\/.*?remove/ describe REMOVE_PAGE URL of page called "remove" uri MAILTO_TO_SPAM_ADDR /^mailto:[a-z]+\d{2,}\@/is describe MAILTO_TO_SPAM_ADDR Includes a link to a likely spammer email uri MAILTO_TO_REMOVE /^mailto:.*?remove/is describe MAILTO_TO_REMOVE Includes a 'remove' email address # allow ports 80 and 443 which are http and https, respectively # we don't want to hit http://www.cnn.com:USArticle1840@www.liquidshirts.com/ # though, which actually doesn't have a weird port in it. uri WEIRD_PORT m{https?://[^/\s]+?:\d+(?/]*\&\#[\da-f]+}i #describe HTTP_ENTITIES_HOST URI obscured with character entities uri YAHOO_RD_REDIR m{^https?\://rd\.yahoo\.com/(?:[0-9]{4}|partner\b|dir\b)}i describe YAHOO_RD_REDIR Has Yahoo Redirect URI uri YAHOO_DRS_REDIR m{^https?://drs\.yahoo\.com/}i describe YAHOO_DRS_REDIR Has Yahoo Redirect URI uri URI_OFFERS m/offer([sz]|-\S+)?\.(?:com|bi?z)/i describe URI_OFFERS Message has link to company offers uri URI_4YOU m@^(?:https?://|mailto:)[^\/]*4you@i describe URI_4YOU Message has URI 4you # 0 nonspam hits, hundreds of spam hits. Serious problems there uri TERRA_ES /terra\.es\//i describe TERRA_ES Contains URI to a document hosted at 'terra.es' # "www" hidden as "%77%77%77", "ww%77", etc. # note: *not* anchored to start of string, to catch use of redirectors uri HTTP_77 /http:\/\/.{0,2}\%77/ describe HTTP_77 Contains an URL-encoded hostname (HTTP77) # affiliateid, aff_id, aff_sub_id etc. uri URI_AFFILIATE /aff\w+id=/i describe URI_AFFILIATE Contains a URI with an affiliate ID code # really a URI rule header URI_REDIRECTOR eval:check_for_http_redirector() describe URI_REDIRECTOR Message has HTTP redirector URI # a.com.b.c uri SPOOF_COM2OTH m{^https?://(?:\w+\.)+?com\.(?:\w+\.){2}}i describe SPOOF_COM2OTH URI contains ".com" in middle # a.com.b.com uri SPOOF_COM2COM m{^https?://(?:\w+\.)+?com\.(?:\w+\.)+?com}i describe SPOOF_COM2COM URI contains ".com" in middle and end # a.net.b.com uri SPOOF_NET2COM m{^https?://(?:\w+\.)+?(?:net|org)\.(?:\w+\.)+?com}i describe SPOOF_NET2COM URI contains ".net" or ".org", then ".com" # CDNs (Akamai (edgesuite), Speedera, and NYUD, so far) do this, so skip them uri SPOOF_OURI m{^https?://(?:[a-z0-9_-]+?\.){2,}(?:com|net|org|biz|info|edu|www)(?!\.(?:\w+\.)?(?:edgesuite|nyud|speedera)\.net)(?:\.[a-z0-9_%-]+?){2,}(?:(?::|%3a)\d+)?}i describe SPOOF_OURI URI has items in odd places uri URI_DIGITS m%^https?://[^/?]*\b\d{6,}\b%i describe URI_DIGITS URI hostname has long digit sequence uri URI_HEX m%^https?://[^/?]*\b[0-9a-f]{6,}\b%i describe URI_HEX URI hostname has long hexadecimal sequence uri URI_NOVOWEL m%^https?://[^/?]*[bcdfghjklmnpqrstvwxz]{7}%i describe URI_NOVOWEL URI hostname has long non-vowel sequence uri URI_UNSUBSCRIBE /\b(?:gone|opened|out)\.php/i describe URI_UNSUBSCRIBE URI contains suspicious unsubscribe link uri URI_UPPER_LOWER m{(?i:https?)://([A-Z][a-z]+\.){2,}[A-Za-z\d-]+\.[a-z]{2,4}(?:[?/]|$)} describe URI_UPPER_LOWER URI contains capitalized hostname parts ("Abcde") # bug 3896: URIs in various TLDs, other than 3rd level www uri URI_NO_WWW_INFO_CGI /^(?:https?:\/\/)?[^\/]+(?