# URI tests require_version 2.40 uri NUMERIC_HTTP_ADDR /^https?\:\/\/\d{7,}/is describe NUMERIC_HTTP_ADDR Uses a numeric IP address in URL uri NORMAL_HTTP_TO_IP /^https?\:\/\/\d+\.\d+\.\d+\.\d+/is describe NORMAL_HTTP_TO_IP Uses a dotted-decimal IP address in URL uri HTTP_USERNAME_USED /^https?\:\/\/[^\s\/]+\@/is describe HTTP_USERNAME_USED Uses a username in a URL uri HTTP_WITH_EMAIL_IN_URL /^https?\:\/\/\S+=[-_\+a-z0-9\.]+\@[-_\+a-z0-9\.]+\.[-_\+a-z0-9]{2,3}(?:\&|\s)/ describe HTTP_WITH_EMAIL_IN_URL 'remove' URL contains an email address # Theo sez: # Have gotten FPs off this, and whitespace can't be in the host, so... # % Visit my homepage: http://i.like.foo.com % uri HTTP_ESCAPED_HOST /^https?\:\/\/[^\/\s]*%/ describe HTTP_ESCAPED_HOST Uses %-escapes inside a URL's hostname # note: do not match \r or \n uri HTTP_CTRL_CHARS_HOST /^https?\:\/\/[^\/]*[\x00-\x08\x0b\x0c\x0e-\x1f]/ describe HTTP_CTRL_CHARS_HOST Uses control sequences inside a URL's hostname uri PORN_4 /^https?:\/\/[\w\.-]*(?:xxx|sex|anal|slut|pussy|cum|nympho|suck|porn|hard-?core|taboo|whore|voyeur|lesbian|gurlpages|naughty|lolita|teen|schoolgirl|kooloffer|erotic|lust|panty|panties)[\w-]*\./ describe PORN_4 URL uses words and phrases which indicate porn (4) # some frequently-advertised URLs uri WWW_CLIK4YOU_COM /clik4you\.com/i describe WWW_CLIK4YOU_COM Frequent SPAM content uri UNSUB_SCRIPT /^https?:\/\/.*?cgi.*?(?:unsubscribe|remove)/i describe UNSUB_SCRIPT URL of CGI script called "unsubscribe" or "remove" uri UNSUB_PAGE /^https?:\/\/.*?(?!cgi).*?unsubscribe/i describe UNSUB_PAGE URL of page called "unsubscribe" uri REMOVE_PAGE /^https?:\/\/[^\/]+\/.*?remove/ describe REMOVE_PAGE URL of page called "remove" uri MAILTO_WITH_SUBJ_REMOVE /^mailto:\S+\?subject=[3D=\s"']*remove/is describe MAILTO_WITH_SUBJ_REMOVE Includes a URL link to send an email with the subject 'remove' uri MAILTO_WITH_SUBJ /^mailto:\S+\?subject=/is describe MAILTO_WITH_SUBJ Includes a link to send a mail with a subject uri MAILTO_TO_SPAM_ADDR /^mailto:[a-z]+\d{2,}\@/is describe MAILTO_TO_SPAM_ADDR Includes a link to a likely spammer email address uri MAILTO_TO_REMOVE /^mailto:.*?remove/is describe MAILTO_TO_REMOVE Includes a 'remove' email address # one spamhaus uses servers numbered like this: uri HTTP_NUMBER_WORD /^https?:\/\/(?:zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty)\./i describe HTTP_NUMBER_WORD URL contains spamhaus signature: numbered servers uri JAVASCRIPT_URI /^javascript:/i describe JAVASCRIPT_URI Javascript protocol in a URI uri WEIRD_PORT m{https?:\/\/[^/:\s]+:\d+} describe WEIRD_PORT Uses non-standard port number for HTTP # freqs: 0.003 0.008 0.000 1.00 1.00 WWW_AUTOREMOVE_COM uri WWW_AUTOREMOVE_COM /autoremove\.com/i describe WWW_AUTOREMOVE_COM Frequent SPAM content uri WWW_TRAFFICWOW_NET /trafficwow\.net/i describe WWW_TRAFFICWOW_NET Frequent SPAM content