# SpamAssassin rules file: body tests # # Please don't modify this file as your changes will be overwritten with # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead. # See 'perldoc Mail::SpamAssassin::Conf' for details. # # Note: body tests are run with long lines, so be sure to limit the # size of searches; use /.{0,30}/ instead of /.*/ to avoid huge # search times. # ########################################################################### require_version @@VERSION@@ ########################################################################### # GTUBE test - the generic test for UBE. body GTUBE /XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/ describe GTUBE Generic Test for Unsollicited Bulk Email ########################################################################### # Message digest tests full RAZOR_CHECK eval:check_razor1() describe RAZOR_CHECK Listed in Razor1, see http://razor.sf.net/ tflags RAZOR_CHECK net full RAZOR2_CHECK eval:check_razor2() describe RAZOR2_CHECK Listed in Razor2, see http://razor.sf.net/ tflags RAZOR2_CHECK net full DCC_CHECK eval:check_dcc() describe DCC_CHECK Listed in DCC, see http://rhyolite.com/anti-spam/dcc/ tflags DCC_CHECK net full PYZOR_CHECK eval:check_pyzor() describe PYZOR_CHECK Listed in Pyzor, see http://pyzor.sf.net/ tflags PYZOR_CHECK net ########################################################################### # Spam phrase scores are absolute numbers, not percentages. # # The Fibonacci sequence was used to reduce the dependence on the # distribution and magnitude of scores and to avoid any sharp cut-offs. #body SPAM_PHRASE_00_01 eval:check_for_spam_phrases('00', '01') #describe SPAM_PHRASE_00_01 Spam phrases score is 00 to 01 (low) #body SPAM_PHRASE_01_02 eval:check_for_spam_phrases('01', '02') #describe SPAM_PHRASE_01_02 Spam phrases score is 01 to 02 (low) #body SPAM_PHRASE_02_03 eval:check_for_spam_phrases('02', '03') #describe SPAM_PHRASE_02_03 Spam phrases score is 02 to 03 (medium) #body SPAM_PHRASE_03_05 eval:check_for_spam_phrases('03', '05') #describe SPAM_PHRASE_03_05 Spam phrases score is 03 to 05 (medium) #body SPAM_PHRASE_05_08 eval:check_for_spam_phrases('05', '08') #describe SPAM_PHRASE_05_08 Spam phrases score is 05 to 08 (medium) body SPAM_PHRASE_08_13 eval:check_for_spam_phrases('08', '13') describe SPAM_PHRASE_08_13 Spam phrases score is 08 to 13 (medium) body SPAM_PHRASE_13_21 eval:check_for_spam_phrases('13', '21') describe SPAM_PHRASE_13_21 Spam phrases score is 13 to 21 (high) body SPAM_PHRASE_21_34 eval:check_for_spam_phrases('21', '34') describe SPAM_PHRASE_21_34 Spam phrases score is 21 to 34 (high) body SPAM_PHRASE_34_55 eval:check_for_spam_phrases('34', '55') describe SPAM_PHRASE_34_55 Spam phrases score is 34 to 55 (high) body SPAM_PHRASE_55_XX eval:check_for_spam_phrases('55', 'undef') describe SPAM_PHRASE_55_XX Spam phrases score 55 or higher (high) ########################################################################### body REMOVE_SUBJ /remove.{1,15}subject/i describe REMOVE_SUBJ List removal information body SUBJ_REMOVE /\w.{0,40}subject.{1,15}remove/i describe SUBJ_REMOVE List removal information body REPLY_REMOVE_SUBJECT /reply.{1,15}remove.{1,15}subject/i describe REPLY_REMOVE_SUBJECT List removal information body REMOVE_IN_QUOTES /\"remove\"/i describe REMOVE_IN_QUOTES List removal information body DISCONTINUE /\"discontinue\".{1,15}no further notices/i describe DISCONTINUE List removal information body REMOVE_CLICK /click here to be (?:permanently )?(?:removed|deleted)/i describe REMOVE_CLICK Click to be removed body REMOVE_FROM_LIST /to be removed from (?:the|my|our) (?:mailing|e.?mail|opt[ -]?in)? ?list/i describe REMOVE_FROM_LIST To be removed from list body REMOVE_RESPECT /(?:respect|honou?r) all removal requests/i describe REMOVE_RESPECT We respect all removal requests # bug 1022: MGM_POSTCARD #2 original body REMOVE_POSTAL /(?:Send(?:ing|) a (?:postal mail|postcard) to (?:Unsubscribe|Customer ?Service|remove)|unsubscribe by (?:postal mail|postcard))/i describe REMOVE_POSTAL Send real mail to be unsubscribed ########################################################################### # HTML parser tests # # please sort these by eval type then name body HTML_TITLE_UNTITLED eval:html_eval('title_text', '=~ /Untitled/i') describe HTML_TITLE_UNTITLED HTML title contains "Untitled" body HTML_TITLE_EMPTY eval:html_eval('title_text', '!~ /\S/s') describe HTML_TITLE_EMPTY HTML title contains no text body HTML_50_70 eval:html_percentage('50','70') describe HTML_50_70 Message is 50-70% HTML tags body HTML_70_90 eval:html_percentage('70','90') describe HTML_70_90 Message is 70-90% HTML tags body HTML_90_100 eval:html_percentage('90','100') describe HTML_90_100 Message is 90-100% HTML tags body HTML_TABLE_THICK_BORDER eval:html_test('thick_border') describe HTML_TABLE_THICK_BORDER HTML table has thick border body HTML_COMMENT_EMAIL eval:html_test('comment_email') describe HTML_COMMENT_EMAIL HTML comment contains email address body HTML_COMMENT_SKY eval:html_test('comment_sky') describe HTML_COMMENT_SKY HTML comment contains SKY database codes body HTML_COMMENT_UNIQUE_ID eval:html_test('comment_unique_id') describe HTML_COMMENT_UNIQUE_ID Contains a comment with nothing but unique ID body HTML_COMMENT_8BITS eval:html_test('comment_8bit') describe HTML_COMMENT_8BITS HTML comment has 3 consecutive 8-bit characters body HTML_COMMENT_SAVED_URL eval:html_test('comment_saved_url') describe HTML_COMMENT_SAVED_URL HTML message is a saved web page body HTML_EMBEDS eval:html_test('embeds') describe HTML_EMBEDS HTML with embedded plugin object body HTML_FONT_BIG eval:html_test('big_font') describe HTML_FONT_BIG FONT Size +2 and up or 3 and up body HTML_FONT_COLOR_NOHASH eval:html_test('font_color_nohash') describe HTML_FONT_COLOR_NOHASH HTML font color is missing hash (#) character body HTML_FONT_COLOR_UNSAFE eval:html_test('font_color_unsafe') describe HTML_FONT_COLOR_UNSAFE HTML font color not within safe 6x6x6 palette body HTML_FONT_COLOR_NAME eval:html_test('font_color_name') describe HTML_FONT_COLOR_NAME HTML font color has unusual name body HTML_FONT_INVISIBLE eval:html_test('font_invisible') describe HTML_FONT_INVISIBLE HTML font color is same as background body HTML_FONT_COLOR_GRAY eval:html_test('font_gray') describe HTML_FONT_COLOR_GRAY HTML font color is gray body HTML_FONT_COLOR_RED eval:html_test('font_red') describe HTML_FONT_COLOR_RED HTML font color is red body HTML_FONT_COLOR_YELLOW eval:html_test('font_yellow') describe HTML_FONT_COLOR_YELLOW HTML font color is yellow body HTML_FONT_COLOR_GREEN eval:html_test('font_green') describe HTML_FONT_COLOR_GREEN HTML font color is green body HTML_FONT_COLOR_CYAN eval:html_test('font_cyan') describe HTML_FONT_COLOR_CYAN HTML font color is cyan body HTML_FONT_COLOR_BLUE eval:html_test('font_blue') describe HTML_FONT_COLOR_BLUE HTML font color is blue body HTML_FONT_COLOR_MAGENTA eval:html_test('font_magenta') describe HTML_FONT_COLOR_MAGENTA HTML font color is magenta body HTML_FONT_COLOR_UNKNOWN eval:html_test('font_color_unknown') describe HTML_FONT_COLOR_UNKNOWN HTML font color is unknown to us body HTML_FONT_FACE_BAD eval:html_test('font_face_bad') describe HTML_FONT_FACE_BAD HTML font face is not a word body HTML_FONT_FACE_ODD eval:html_test('font_face_odd') describe HTML_FONT_FACE_ODD HTML font face is not a commonly used face body HTML_FONT_FACE_CAPS eval:html_test('font_face_caps') describe HTML_FONT_FACE_CAPS HTML font face has excess capital characters body HTML_FORM_ACTION_MAILTO eval:html_test('form_action_mailto') describe HTML_FORM_ACTION_MAILTO HTML includes a form which sends mail body HTML_JAVASCRIPT eval:html_test('javascript') describe HTML_JAVASCRIPT JavaScript code body HTML_JAVASCRIPT_VERY_UNSAFE eval:html_test('javascript_very_unsafe') describe HTML_JAVASCRIPT_VERY_UNSAFE Auto-executing JavaScript code # many spammers seem to do this nowadays (and probably track # their customers with it). (contrib: WW) body HTML_RELAYING_FRAME eval:html_test('relaying_frame') describe HTML_RELAYING_FRAME Frame wanted to load outside URL body HTML_WEB_BUGS eval:html_test('web_bugs') describe HTML_WEB_BUGS Image tag with an ID code to identify you body HTML_WIN_BLUR eval:html_test('window_blur') describe HTML_WIN_BLUR Javascript to move windows around body HTML_WIN_OPEN eval:html_test('window_open') describe HTML_WIN_OPEN Javascript to open a new window body HTML_WITH_BGCOLOR eval:html_test('bgcolor_nonwhite') describe HTML_WITH_BGCOLOR HTML mail with non-white background body HTML_TAG_BALANCE_A eval:html_tag_balance('a', '< 0') describe HTML_TAG_BALANCE_A HTML has excess "a" close tags body HTML_TAG_BALANCE_FONT eval:html_tag_balance('font', '< 0') describe HTML_TAG_BALANCE_FONT HTML has excess "font" close tags body HTML_TAG_BALANCE_HTML eval:html_tag_balance('html', '!= 0') describe HTML_TAG_BALANCE_HTML HTML has unbalanced "html" tags body HTML_TAG_BALANCE_BODY eval:html_tag_balance('body', '!= 0') describe HTML_TAG_BALANCE_BODY HTML has unbalanced "body" tags body HTML_TAG_BALANCE_HEAD eval:html_tag_balance('head', '!= 0') describe HTML_TAG_BALANCE_HEAD HTML has unbalanced "head" tags body HTML_TAG_BALANCE_TABLE eval:html_tag_balance('table', '> 0') describe HTML_TAG_BALANCE_TABLE HTML is missing "table" close tags body HTML_TAG_EXISTS_BASE eval:html_tag_exists('base') describe HTML_TAG_EXISTS_BASE HTML has "base" tags body HTML_TAG_EXISTS_PARAM eval:html_tag_exists('param') describe HTML_TAG_EXISTS_PARAM HTML has "param" tag body HTML_TAG_EXISTS_TBODY eval:html_tag_exists('tbody') describe HTML_TAG_EXISTS_TBODY HTML has "tbody" tag ########################################################################### # rawbody HTML tests rawbody JAVASCRIPT_UNSAFE /\bon(?:Blur|Change|Focus|Error|Key(?:Press|Down|Up)|Mouse(?:Down|Up|Over|Move|Out)|Resize|Move|Scroll|Stop|Click)[\s=3d\"\']*\S+[\"\']?/i describe JAVASCRIPT_UNSAFE Easily-executed JavaScript code # 0.001 0.000 0.002 0.00 1.00 JAVASCRIPT_OBFUSCATING (low matches) #rawbody JAVASCRIPT_OBFUSCATING /charCodeAt|fromCharCode/i #describe JAVASCRIPT_OBFUSCATING An attempt to hide spam inside obfuscating Javascript code rawbody SPAM_FORM /CHANGE EMAIL ADDRESS IN ACTION OF FORM/ describe SPAM_FORM Form for changing email address rawbody SPAM_FORM_RETURN /return validate_form/ describe SPAM_FORM_RETURN Form for checking email address rawbody SPAM_FORM_ACTION /action="\&\#\d+;\&\#\d+;\&\#\d+;\&\#\d+;/i describe SPAM_FORM_ACTION Obfuscated action attribute in HTML form rawbody SPAM_FORM_INPUT /][^\s<]/ describe OBFUSCATING_COMMENT HTML comments which obfuscate text rawbody HIDE_WIN_STATUS /<[^>]+onMouseOver=[^>]+window\.status=/i describe HIDE_WIN_STATUS Javascript to hide URLs in browser rawbody LINK_TO_NO_SCHEME /\s+href=['"]?www\./i describe LINK_TO_NO_SCHEME Contains link without http:// prefix ########################################################################### body BUGGY_CGI /Below is the result of your feedback form/ describe BUGGY_CGI Broken CGI script message # possible replacement for SENT_IN_COMPLIANCE body SENT_IN_COMPLIANCE /(?:e.?mail|message) .{0,10}sen[dt] (?:to you )?in (?:\w{1,10} )?compliance (?:of|with)/i describe SENT_IN_COMPLIANCE Claims compliance with spam regulations body PARA_A_2_C_OF_1618 /Paragraph *.a.{0,10}2.{0,10}C\. of S\. 1618/i describe PARA_A_2_C_OF_1618 Claims compliance with Senate Bill 1618 body BILL_1618 /Bill.{0,10}1618.{0,10}TITLE.{0,10}(?:III|\#3)/i describe BILL_1618 Claims compliance with Senate Bill 1618 body S_1618 /S\..{0,10}1618.{0,10}-.{0,10}SECTION.{0,10}301/i describe S_1618 Claims compliance with Senate Bill 1618 body UNDER_BILL_1618 /Under Bill s.?1618/i describe UNDER_BILL_1618 Claims compliance with Senate Bill 1618 body SECTION_301 /SECTION.{0,10}301/i describe SECTION_301 Claims compliance with spam regulations body HR_4176 /H\.?R\.? *4176.{0,10}SECTION.{0,10}101/i describe HR_4176 Claims compliance with House Bill 4176 body CHECK_OR_MONEY_ORDER /check or money order/i describe CHECK_OR_MONEY_ORDER Talk about a check or money order rawbody CARRIAGE_RETURNS eval:check_carriage_returns() describe CARRIAGE_RETURNS Message contains a lot of ^M characters # fixed by ms to not allow entry field onto next line rawbody ASCII_FORM_ENTRY /[^<][A-Za-z][A-Za-z]+.{1,15}?[\x09\x20]*_{30,}/ describe ASCII_FORM_ENTRY Contains an ASCII-formatted form body AMAZING /\bAMAZING\b/ describe AMAZING Contains word 'amazing' in all-caps body GUARANTEE /\bGUARANTEE\b/ describe GUARANTEE Contains word 'guarantee' in all-caps body PROFITS /\bPROFITS\b/ describe PROFITS Contains word 'profits' in all-caps body NO_QS_ASKED /\bNO QUESTIONS ASKED\b/i describe NO_QS_ASKED Doesn't ask any questions body FULL_REFUND /full refund|refunds? your money in full/i describe FULL_REFUND Offers a full refund body FOR_FREE /\bfor (?-i:FREE)\b/i describe FOR_FREE No such thing as a free lunch (1) body COMPLETELY_FREE /\b(?:100%|completely|totally|absolutely) (?-i:F)ree/i describe COMPLETELY_FREE No such thing as a free lunch (2) body NO_COST /\bno (?:cost|charge)\b/i describe NO_COST No such thing as a free lunch (3) body GUARANTEED_100_PERCENT /100% GUARANTEED/i describe GUARANTEED_100_PERCENT One hundred percent guaranteed body MONEY_MAKING /\bmoney mak(?:ing|er)/i describe MONEY_MAKING Discusses money making body BULK_EMAIL /bulk e-*mail/i describe BULK_EMAIL Talks about bulk email body DEAR_EMAIL /\bDear [A-Za-z0-9_-]+\@/ describe DEAR_EMAIL Dear you@you.com? body DEAR_FRIEND /^\s*Dear Friend\b/i describe DEAR_FRIEND How dear can you be if you don't know my name? body DEAR_SOMETHING /\bDear (?:IT\W|Internet|candidate|sirs?|madam|investor|travell?er|car shopper|web)\b/i describe DEAR_SOMETHING Contains 'Dear (something)' body DEAR_SOMEBODY /Dear [A-Z][a-z]+/ describe DEAR_SOMEBODY Contains 'Dear Somebody' body CALL_NOW /\bCALL NOW/i describe CALL_NOW Urges you to call now body CALL_FREE /\b(?:call|dial|toll free|order).{1,15}8(?:00|88|77|66|55|44|33|22)[\)\s-]*[\dA-Z]+[\s-]?[\dA-Z]+/i describe CALL_FREE Contains a tollfree number body ONLINE_BIZ_OPS /online business opportunities/i describe ONLINE_BIZ_OPS Wants you to do business online body BILLION_DOLLARS /[BM]ILLION DOLLAR/ describe BILLION_DOLLARS Talks about lots of money body OPT_IN /\bopt-in\b/ describe OPT_IN Talks about opting in (lowercase version) body OPT_IN_CAPS /(?-i:O)pt.?(?-i:I)n/i describe OPT_IN_CAPS Talks about opting in (capitalized version) body OPT_OUT /\bopt-out\b/ describe OPT_OUT Talks about opting out (lowercase version) body OPT_OUT_CAPS /(?-i:O)pt.?(?-i:O)ut/i describe OPT_OUT_CAPS Talks about opting out (capitalized version) body DIRECT_EMAIL /direct e-*mail\b/i describe DIRECT_EMAIL Talks about direct email body MASS_EMAIL /mass e-*mail/i describe MASS_EMAIL Talks about mass email body EMAIL_MARKETING /e-*mail marketing/i describe EMAIL_MARKETING Talks about email marketing body PRODUCED_AND_SENT_OUT /This a.?d is produced and sent out by/i describe PRODUCED_AND_SENT_OUT Tells you it's an ad body INCREASE_SOMETHING /\b(?:boost|increase|grow|larger|bigger|higher) (?:traffic|sales)\b/i describe INCREASE_SOMETHING Instructions on how to increase something body NEVER_ANOTHER /never receive another mailing/i describe NEVER_ANOTHER "never receive another mailing" # contrib: Duncan body ONE_TIME_MAILING /this\b.{0,20}\b(?:one|1).time\b.{0,20}\b(?:(?:e-?)?mail|offer)/i describe ONE_TIME_MAILING "one time mailing" doesn't mean it isn't spam # this one gets a few false positives body SOCIAL_SEC_NUMBER /social security (?:number|record)/i describe SOCIAL_SEC_NUMBER Talks about social security numbers body MILLION_EMAIL /million (?:\w+ )?(?:e-?mail )?addresses/i describe MILLION_EMAIL Get a million email addresses body ADDRESSES_ON_CD /addresses on cd/i describe ADDRESSES_ON_CD Only thing addresses on CD are useful for is spam body EXCUSE_1 /\b(?:You (?:were sent|have received|are receiving)|You're receiving).{0,15}(?:message|e-?mail)s? because/i describe EXCUSE_1 Gives a lame excuse about why you were sent this spam body EXCUSE_2 /If you did not opt.in/i describe EXCUSE_2 Claims you actually asked for this spam body EXCUSE_3 /to (?:be removed|be deleted|no longer receive th(?:is|ese) messages?) (?:from|send|reply|[e-]*mail)/i describe EXCUSE_3 Claims you can be removed from the list body EXCUSE_4 /To Be Removed,? Please/i describe EXCUSE_4 Claims you can be removed from the list body EXCUSE_5 /that your email address is removed/i describe EXCUSE_5 Claims you can be removed from the list # strange pattern because otherwise it matches the std. majordomo line # pls note the comment above. DO NOT just put "to" in the first group! body EXCUSE_6 /\b(?:wish to|click to) remove yourself/i describe EXCUSE_6 Claims you can be removed from the list body EXCUSE_7 /you (?:wish|want|would like|desire) to be removed/i describe EXCUSE_7 Claims you can be removed from the list body EXCUSE_10 /if you (?:(?:want|wish|care|prefer) not to |do ?n[o']t (?:want|wish|care) to )(?:be contacted again|receive (?:any ?)?(?:more|future|further)\b.{1,10}\b(?:e?-?mail|message|offer|solicitation)s?)/i describe EXCUSE_10 "if you do not wish to receive any more" body EXCUSE_11 /you.{0,15}(?:name|mail).{0,15}(?:was|were).{0,15}list/i describe EXCUSE_11 Claims you were on a list body EXCUSE_12 /this (?:e?-?mail|message) (?:(?:has )?reached|was sent to) you in error/i describe EXCUSE_12 Nobody's perfect body EXCUSE_13 /mail was sent to you because\b/i describe EXCUSE_13 Gives an excuse for why message was sent body EXCUSE_14 /you (?:do not|no longer) wish to receive/i describe EXCUSE_14 Tells you how to stop further spam body EXCUSE_15 /this\s*(?:e?-?mail|message)? (?:is|was) (?:not|never) (?:spam|(?:sent )?unsolicited)/i describe EXCUSE_15 Claims to be legitimate email body EXCUSE_16 /received this.{1,10}in error/i describe EXCUSE_16 I wonder how many emails they sent in error... body EXCUSE_18 /we do not (?:spam|send unsolicited)/i describe EXCUSE_18 Claims not to be spam body EXCUSE_19 /because (?:you're|you (?:are )?)(?:registered|.{0,20}\bopt.{0,3}in)/i describe EXCUSE_19 Claims you opted-in or registered body EXCUSE_20 /you registered at one of our/i describe EXCUSE_20 Claims you registered at their site body EXCUSE_21 /your e.?mail address was obtained/i describe EXCUSE_21 Claims your address was obtained legitimately body EXCUSE_22 /you are receiving this special offer/i describe EXCUSE_22 Claims you're receiving this offer for a reason body EXCUSE_23 /you have provided permission/i describe EXCUSE_23 Claims you have provided permission body EXCUSE_24 /you(?:'ve|'re| have| are)? receiv(?:e|ed|ing) this (?:advertisement|offer|special|recurring|paid).{0,16}\b(?:by either|because)/i describe EXCUSE_24 Claims you received an ad because you wanted it body EXCUSE_REMOVE /to be removed from.{0,20}(?:mailings|offers)/i describe EXCUSE_REMOVE Talks about how to be removed from mailings # jm: keep this case-sensitive, otherwise it FP's body VIAGRA /VIAGRA/ describe VIAGRA Plugs Viagra body NATURAL_VIAGRA /\bnatural viagra\b/i describe NATURAL_VIAGRA Plugs "Natural Viagra" # NB: Made it also match "Herbal V" - a viagra alternative body HERBAL_VIAGRA /\bherbal v(?:iagra)?\b/i describe HERBAL_VIAGRA Plugs "Herbal Viagra" # NB: Also seems to be a diabetes drug, so don't let it score high body TARGETED /\btargeted (?:traffic|e-?mail|internet|leads?)\b/i describe TARGETED Targeted Traffic / Email Addresses body LIMITED_TIME_ONLY /LIMITED TIME (?:ONLY|offer)/i describe LIMITED_TIME_ONLY Offers a limited time offer body STRONG_BUY /strong buy/i describe STRONG_BUY Tells you about a strong buy body WE_HONOR_ALL /we (?:honou?r|respect)(?: all)? remov(?:e|al) requests/i describe WE_HONOR_ALL Claims to honor removal requests # Jul 2 2002 jm: note that CommuniGate is *not* spam software, it's totally legit. # but beginning spammers often download a trial version and spam with it. body COMMUNIGATE /transferred with a trial version of CommuniGate/ describe COMMUNIGATE Sent using a trial version of CommuniGate # this seems to be the new fashion (as of Jul 5 2002). base64-encoded parts need to # be stripped before this match body TRACKER_ID /^[a-z0-9]{6,}[-_a-z0-9]{12,}[a-z0-9]{6,}\s*\z/is describe TRACKER_ID Incorporates a tracking ID number # heh heh heh... tags used to (presumably) generate those anti-Razor random IDs body MARKUP_RAND /{%RAND%}/ describe MARKUP_RAND RAND found, spammer forgot to run the random-ID generator body MARKUP_SSPL /SSPLTM/ describe MARKUP_SSPL SSPL found, spammer forgot to run the random-ID generator body OPPORTUNITY /OPPORTUNITY/ describe OPPORTUNITY Gives information about an opportunity body PURE_PROFIT /PURE PROFIT/i # this used to say 'Profit is dirty, not pure', but that's a bit extreme ;) describe PURE_PROFIT Offers pure profit body STOCK_PICK /STOCK PICK/i describe STOCK_PICK Offers a stock pick body STOCK_ALERT /stock alert/i describe STOCK_ALERT Offers a stock alert body MICRO_CAP_WARNING /Investing in micro-cap securities is highly speculative/i describe MICRO_CAP_WARNING SEC-mandated penny-stock warning -- thanks SEC body INVESTOR_SPEC_SHEET /Investor Spec Sheet/i describe INVESTOR_SPEC_SHEET Standard investment opportunity spam body NOT_ADVISOR /not a registered investment advisor/i describe NOT_ADVISOR Not a registered investment advisor body FREE_CONSULTATION /FREE CONSULTATION/i describe FREE_CONSULTATION Offers a free consultation body SOME_BREAKTHROUGH /(?:science|medical|major|scientific|fundamental|technology|revolutionary)\s+breakthrough/i describe SOME_BREAKTHROUGH Describes some sort of breakthrough body SELECTED_YOU /(?:you (?:have been|were) selected|we (?:have )?selected you)/i describe SELECTED_YOU Says you have been selected # (contrib: Matt Sergeant) body LARGE_HEX /[0-9a-fA-F]{70,}/ describe LARGE_HEX Contains a large block of hexadecimal code # somehow "/name.*\bcredit.?card\b/is" won't match, even if # it's there. *boggle* (contrib: WW) body WANTS_CREDIT_CARD /\bcredit.?card\s+order/i describe WANTS_CREDIT_CARD Asks for credit card details # (contrib: WW) body ASKS_BILLING_ADDRESS /\bbilling address\b/i describe ASKS_BILLING_ADDRESS Asks for a billing address # converted to use eval method by Matthew Cline body LINES_OF_YELLING eval:check_for_yelling() describe LINES_OF_YELLING A WHOLE LINE OF YELLING DETECTED body LINES_OF_YELLING_2 eval:check_for_num_yelling_lines("2") describe LINES_OF_YELLING_2 2 WHOLE LINES OF YELLING DETECTED body LINES_OF_YELLING_3 eval:check_for_num_yelling_lines("3") describe LINES_OF_YELLING_3 3 WHOLE LINES OF YELLING DETECTED ########################################################################### # these tests doesn't actually use rawbody since rawbody isn't raw enough; # they must be written very carefully to avoid modifying the original content rawbody BASE64_ENC_TEXT eval:check_for_mime_base64_encoded_text() describe BASE64_ENC_TEXT Message text disguised using base-64 encoding rawbody MIME_EXCESSIVE_QP eval:check_for_mime_excessive_qp() describe MIME_EXCESSIVE_QP Excessive quoted-printable encoding in body rawbody MIME_HTML_NO_CHARSET eval:check_for_mime_html_no_charset() describe MIME_HTML_NO_CHARSET Message text in HTML without specified charset rawbody MIME_LONG_LINE_QP eval:check_for_mime_long_line_qp() describe MIME_LONG_LINE_QP Quoted-printable line longer than 76 characters rawbody MIME_MISSING_BOUNDARY eval:check_for_mime_missing_boundary() describe MIME_MISSING_BOUNDARY MIME section missing boundary ########################################################################### # Actually indicates virii, typically; just used here to clean corpora. rawbody MICROSOFT_EXECUTABLE eval:check_for_microsoft_executable() describe MICROSOFT_EXECUTABLE Message includes Microsoft executable program # todo: better tflags category for these tests tflags MICROSOFT_EXECUTABLE userconf rawbody MIME_SUSPECT_NAME eval:check_for_mime_suspect_name() describe MIME_SUSPECT_NAME MIME filename does not match content tflags MIME_SUSPECT_NAME userconf ########################################################################### # some full-text matches; note the [3D=\s"']* bits of the patterns, which # match some gibberish produced by quoted-printable encoding of HTML, often # in the middle of a HTML "attribute=value" pair. rawbody CLICK_HERE_LINK /click here.{0,100}<\/a>/is describe CLICK_HERE_LINK Tells you to click on a URL rawbody CLICK_HERE_CAPS_LINK /CLICK HERE.{0,100}<\/[aA]>/s describe CLICK_HERE_CAPS_LINK Tells you to click on a URL (in caps) body CLICK_BELOW /click .{0,30}(?:here|below)/is describe CLICK_BELOW Asks you to click below body CLICK_BELOW_CAPS /CLICK .{0,30}(?:HERE|BELOW)/s describe CLICK_BELOW_CAPS Asks you to click below (in caps) # (contrib: skod) body PRINT_FORM_SIGNATURE /Sign(?:ature)?\s*(?:here|please)?:.{0,30}___*/i describe PRINT_FORM_SIGNATURE Asks you for your signature on a form # (contrib: skod) body MAIL_IN_ORDER_FORM /\b(?:Mail-in|secure|ez|credit.card|fax this) Order Form\b/i describe MAIL_IN_ORDER_FORM Contains mail-in order form # (contrib: skod) body FOR_INSTANT_ACCESS /INSTANT ACCESS/i describe FOR_INSTANT_ACCESS Instant Access button # (contrib: skod) body UNIVERSITY_DIPLOMAS /\b(?:college|university)\s+diplomas/i describe UNIVERSITY_DIPLOMAS University Diplomas body PREST_NON_ACCREDITED /prestigi?ous\b.{0,20}\bnon-accredited\b.{0,20}\buniversities/i describe PREST_NON_ACCREDITED 'Prestigious Non-Accredited Universities' # (contrib: skod) body NEW_DOMAIN_EXTENSIONS /\bnew .{0,15}\bextension/i describe NEW_DOMAIN_EXTENSIONS Possible registry spammer body DOMAIN_BODY /(?:\s|^)(?:\.|dot\s+)(?:info|biz|name)\b|(?:\s|^)\.\w+ domain/mi describe DOMAIN_BODY Domain registration spam body body CLICK_TO_REMOVE_2 /mailto:.{0,50}click.{0,50}remove/is describe CLICK_TO_REMOVE_2 Click-to-remove with mailto: found beforehand # (contrib: climent) # (contrib: WW, mod by David Hull) # offline, massive FP rate body REMOVAL_INSTRUCTIONS /\b(?:REMOVAL|UNSUBSCRIBE) INSTRUCTIONS/i describe REMOVAL_INSTRUCTIONS Gives instructions for removal from list body CANNOT_BE_SPAM /cannot be considered spam/i describe CANNOT_BE_SPAM Claims "cannot be considered spam" # (contrib: WW) body THIS_AINT_SPAM /This.{0,30}is not (?:a )?spam/is describe THIS_AINT_SPAM Claims "This is not spam" # (contrib: WW) body WE_HATE_SPAM /We .{0,30}oppose the use of SPAM/is describe WE_HATE_SPAM Says "We strongly oppose the use of spam email" body THIS_IS_AN_AD /this message is an advertisement/i describe THIS_IS_AN_AD Says "this is an advertisement" (thanks!) # no hits #body IN_ACCORDANCE_WITH_LAWS /has been sent in accordance with/ #describe IN_ACCORDANCE_WITH_LAWS Claims to be in accordance with some Spam law body HR_3113 /H\.\s*R\.\s*3113/is describe HR_3113 Mentions Spam law "H.R. 3113" body UCE_MAIL_ACT /Unsolicited Commercial Electronic Mail Act/ describe UCE_MAIL_ACT Mentions Spam Law "UCE-Mail Act" # Jul 3 2002 jm: modified PENIS_ENLARGE patterns: removed "add", replaced with "inches", # because that seems to be another typical word in the pattern. body PENIS_ENLARGE /\b(?:enlarge|increase|grow|lengthen|larger\b|bigger\b|longer\b|thicker\b|\binches\b).{0,50}\b(?:penis|male organ|P[ -]?P\b|pee[ -]?pee|dick|sc?hlong|wh?anger|breast)/i describe PENIS_ENLARGE Information on getting a larger penis or breasts body PENIS_ENLARGE2 /\b(?:penis|male organ|P[ -]?P\b|pee[ -]?pee|dick|sc?hlong|wh?anger|breast).{0,50}\b(?:enlarge|increase|grow|lengthen|larger\b|bigger\b|longer\b|thicker\b|\binches\b)/i describe PENIS_ENLARGE2 Information on getting a larger penis or breasts (2) # 0.001 0.000 0.002 0.00 3.60 EJACULATION (low matches) #body EJACULATION /\b(?:increase|improve|greater).{0,10}ejaculation/i #describe EJACULATION Increase your ejaculation! body IMPOTENCE /\b(?:impotence (?:problem|cure|solution)|Premature Ejaculation|erectile dysfunction)/i describe IMPOTENCE Impotence cure body WORK_AT_HOME /\b(?:WORK|(?:MAKE|EARN).{1,10}(?:MONEY|\$+|BUCKS|CASH)).{1,10}(?:AT|FROM) (?:YOUR )?HOME/i describe WORK_AT_HOME Information on how to work at home (1) body HOME_EMPLOYMENT /HOME.{0,10}(?: EMPLOYMENT|WORKER|BUSINESS)/i describe HOME_EMPLOYMENT Information on how to work at home (2) body NO_EXPERIENCE /\bNo EXPERIENCE/i describe NO_EXPERIENCE No experience needed! body MORTGAGE_BEST /(?:low(?:est|er)?|free|second|rate|best|refinanc(?:e|ing)|online|instant) mortgage/i describe MORTGAGE_BEST Information on mortgages body MORTGAGE_PITCH /mortgage (?:rates?|quotes?|approv(?:al|ed)|payment|interest|loans?|app(?:\b|lication))/i describe MORTGAGE_PITCH Looks like mortgage pitch body MORTGAGE_RATES /Mortgage rates/i describe MORTGAGE_RATES Information on mortgage rates # detect "mort$age", but not "mortgage" body MORTGAGE_OBFU /mor[tga\$]*\$[ga\$]*e/i describe MORTGAGE_OBFU Attempt at obfuscating the word "mortgage" body TAKE_ACTION_NOW /take action now!/i describe TAKE_ACTION_NOW Tells you to 'take action now!' body THE_FOLLOWING_FORM /the following form\b/i describe THE_FOLLOWING_FORM Asks you to fill out a form # this works best as rawbody # do not add "subscribe", "unsubscribe", or "help" rawbody MAILTO_WITH_SUBJ_REMOVE /mailto:.{0,64}\@.{0,64}\?subject=(?:\"|3D)*(?:remove?|delete|please.?(?:delete|remove|unsubscribe)|abuse|off\b|stop|take.?me.?off)/i # include the \n\n so we don't match mailto's in the headers! Some list software # uses these to handle auto-subscribe/unsubscribe features rawbody MAILTO_LINK /=[3D=\s"']*mailto:/is describe MAILTO_LINK Includes a URL link to send an email body AOL_USERS_LINK /AOL\s+Users\s+Click/is describe AOL_USERS_LINK Includes a link for AOL users to click # Cyber FirePower! rants about 'Internet terrorists' aka spam fighters # (contrib: WW) (jm: tweaked to match again) body INTERNET_TERROR_RANT /At the time of this mailing.{9,50}legitimate return email address.{100,299}internet terrorists/i describe INTERNET_TERROR_RANT Cyber FirePower! rant about losing dropboxes body CHARSET_FARAWAY eval:check_for_faraway_charset() describe CHARSET_FARAWAY Character set indicates a foreign language tflags CHARSET_FARAWAY userconf full CHARSET_FARAWAY_BODY eval:check_for_faraway_charset_in_body() describe CHARSET_FARAWAY_BODY Character set indicates foreign language body tflags CHARSET_FARAWAY_BODY userconf body UNDESIRED_LANGUAGE_BODY eval:check_language() describe UNDESIRED_LANGUAGE_BODY Written in an undesired language tflags UNDESIRED_LANGUAGE_BODY userconf body NIGERIAN_TRANSACTION_1 /\b(?:financial|confiden(?:tial|ce)|safe(?:ty)?|mutual|secret|success(?:ful)?|risk-?free|details).{1,30}\btransaction\b/i describe NIGERIAN_TRANSACTION_1 illegal Nigerian transactions (1) body NIGERIAN_TRANSACTION_2 /\btransaction\b.{1,30}\b(?:magnitude|diplomatic|strict|absolute|secret|confiden(?:tial|ce)|guarantee)/i describe NIGERIAN_TRANSACTION_2 illegal Nigerian transactions (2) body US_DOLLARS /\b(?:\d{1,3})?Million\b.{0,40}\b(?:(?:United States?|Canadian) Dollar?s?|US(?:D|\$)|U\.? ?S\.? Dollar)/i describe US_DOLLARS Nigerian scam key phrase (million dollars) # jm: use {2,3} to avoid matching iso-2022-jp charset items. split into US_DOLLARS_4 # to still match all test cases body US_DOLLARS_2 /\s(?:\$|US\$|usd?).?\d{2,3}(?:\.\d)?.?(?:m|millions?)\b/i describe US_DOLLARS_2 Nigerian scam key phrase ($NNN.N m/USDNNN.N m/US$NN.N m) body US_DOLLARS_3 /(?:\$|usd).?\d{1,3}[,.]\d{3}[,.]\d{3}(?:[,.]\d\d)?/i describe US_DOLLARS_3 Nigerian scam key phrase ($NN,NNN,NNN.NN) body US_DOLLARS_4 /\s(?:\$|US\$|usd?).?\d{1,3}\.\d+.?(?:m|millions?)\b/i describe US_DOLLARS_4 Nigerian scam key phrase ($NNN.N m/USDNNN.N m/US$NN.N m) rawbody FRONTPAGE /FrontPage.Editor/ describe FRONTPAGE Frontpage used to create the message # some very frequent spam subjects, based on statistical analysis body KIFF /temple kiff/i describe KIFF Contains "Temple Kiff" body CBYI /CBYI/ describe CBYI Contains "CBYI" body JODY /\b(?:My wife|Mi esposa), Jody/ describe JODY Contains "My wife, Jody" testimonial body GENTLE_FEROCITY /Gentle Ferocity/i describe GENTLE_FEROCITY Contains "Gentle Ferocity" body VJESTIKA /Vjestika Aphrodisia/i describe VJESTIKA Contains "Vjestika Aphrodisia" body TONER /\b(?:toner|ink(?:[-\s]*jet)?|fax|copier)[-\s]+cartridge/i describe TONER Contains "Toner Cartridge" # 0.01 0.01 0.01 0.64 1.80 ONCE_IN_LIFETIME (low matches) #body ONCE_IN_LIFETIME /once in a lifetime opportunity/i #describe ONCE_IN_LIFETIME Once in a lifetime, apparently body YOUR_INCOME /\byour income\b/i describe YOUR_INCOME Doing something with my income body BE_AMAZED /\bbe amazed\b/i describe BE_AMAZED Apparently, you'll be amazed body RESISTANCE_IS_FUTILE /Replying to this email will not unsubscribe you./i describe RESISTANCE_IS_FUTILE Resistance to this spam is futile # contrib: thelton /at/ donet.com body GREAT_OFFER /\b(?:offer expires|see full offer for details|great offer)/i describe GREAT_OFFER Trying to offer you something body SUBJ_2_CREDIT /subject to credit approval/i describe SUBJ_2_CREDIT Contains 'subject to credit approval' body URGENT_BIZ /urgent.{0,16}(?:assistance|business|buy|confidential|notice|proposal)/i describe URGENT_BIZ Contains urgent matter body EARN_PER_WEEK /\b(?:earn|make).{1,20}\d\d\d+.{1,30}(?:per week|per month|weekly|monthly)/i describe EARN_PER_WEEK Contains 'earn $something per week' # contrib: Wayne A Tucker body PENNIES_A_DAY /for (?:just|only) pennies a day/i describe PENNIES_A_DAY Contains 'for only pennies a day' body FOR_JUST_SOME_AMT /for (?:just|only) \$?\d+\.?\d*[^\.]*!/i describe FOR_JUST_SOME_AMT Contains 'for only' some amount of cash # suggestion following Jason Haar body READ_TO_END /read this (?:e-?mail )?to the end/i describe READ_TO_END You'd better read all of this spam! body ALL_NATURAL /\b(?:100%|completely|totally|all) natural/i describe ALL_NATURAL Spam is 100% natural?! body MONEY_BACK /money back guarantee/i describe MONEY_BACK Money back guarantee. body NO_CATCH /there is no catch/i describe NO_CATCH There is no catch. body NO_OBLIGATION /no obligation/i describe NO_OBLIGATION There is no obligation. body NO_DISSAPOINTMENT /You won'?t be diss?app?ointed/i describe NO_DISSAPOINTMENT You won't be dissapointed. body SERIOUS_ONLY /Serious [IE]nquiries Only/i describe SERIOUS_ONLY Serious Enquiries Only. body RISK_FREE /\b(?:risk[ -]free|no[ -]risk)/i describe RISK_FREE Risk free. Suuurreeee.... # "seen on TV", "seen on ABC/NBC/etc", "seen on XYZ TV", or # "seen on:" body AS_SEEN_ON /seen on\b\s*(?:TV|ABC|NBC|CBS|CNN|Oprah|USA Today|48 Hours|New York Times|\w+\s+TV|:)/i describe AS_SEEN_ON As seen on national TV! body NOT_INTENDED /not intended for residents (?:of|in)\b/i describe NOT_INTENDED Not intended for residents of XYZ. # This phrase appears in many pyramid scheme mails in which # "My Wife Jody" testimonials are absent body COPY_ACCURATELY /copy.{1,10}name.{1,10}address.{1,10}ACCURATELY\b/i describe COPY_ACCURATELY Common pyramid scheme phrase (1) body SEE_FOR_YOURSELF /See (?:for|it) yourself\b/i describe SEE_FOR_YOURSELF See for yourself body ORDER_NOW /\border (?:now|soon|fast|quickly|while)\b/i describe ORDER_NOW Encourages you to waste no time in ordering ## Contrib: Marc Perkel body OFFSHORE_SCAM /\boffshore\b.{0,20}(?:credit card|companies|account|financ|websites?)/i describe OFFSHORE_SCAM Off Shore Scams body VACATION_SCAM /\b(?:free|mini-?|dream|special).{0,10}vacation|vacation (?:offer|promotion|package|for two|getaway)/i describe VACATION_SCAM Vacation Offers body WHY_PAY_MORE /\bwhy pay more\b/i describe WHY_PAY_MORE Why Pay More? body CONGRATULATIONS /\bcongratulations! you/i describe CONGRATULATIONS Congratulations - you've been scammed? body SIGN_UP /\b(?:free sign up|sign up today)\b/i describe SIGN_UP Sign up Free Today body FREE_CELL_PHONE /\bfree .{0,12}(?:cell(?:ular)?|mobile) phone|cell(?:ular)? phone for free/i describe FREE_CELL_PHONE Free Cell Phone body FREE_PREVIEW /\bfree preview\b/i describe FREE_PREVIEW Free Preview body FREE_ACCESS /\bfree access\b/i describe FREE_ACCESS Free Access body FREE_LEADS /\bfree leads\b/i describe FREE_LEADS Free Leads body RECEIVE_EMAIL /receive third party email/i describe RECEIVE_EMAIL Receive third party email # similar to OFFER, but fewer FPs body RECEIVE_OFFER /receive special offer/i describe RECEIVE_OFFER Receive a special offer body OFFER /\b(?:free|special|trial) offer/i describe OFFER Free Offer body OFFER_EXPIRE /\boffer expires\b/i describe OFFER_EXPIRE Offer Expires body FREE_QUOTE /\bfree quote/i describe FREE_QUOTE Free Quote body FREE_QUOTE_INSTANT /free.{0,12}(?:(?:instant|express|online|no.?obligation).{0,4})+.{0,32}\bquote/i describe FREE_QUOTE_INSTANT Free express or no-obligation quote body FREE_INSTALL /\bfree installation\b/i describe FREE_INSTALL Free Installation body FREE_SAMPLE /\bfree sample/i describe FREE_SAMPLE Free Sample body FREE_DVD /\bfree dvd/i describe FREE_DVD Free DVD body FREE_INVESTMENT /\bfree investment/i describe FREE_INVESTMENT Free Investment body INVESTMENT /\binvestment decision/i describe INVESTMENT Investment Decision body FREE_TRIAL /\bfree trial\b/i describe FREE_TRIAL Free Trial body FREE_MEMBERSHIP /\bfree membership/i describe FREE_MEMBERSHIP Free Membership body FREE_WEBSITE /\bfree website/i describe FREE_WEBSITE Free Website body CREDIT_CARD /\bcredit card.{1,10}(?:offer|debt|decision)/i describe CREDIT_CARD Credit Card Offers body NO_CREDIT_CHECK /\bno credit check\b/i describe NO_CREDIT_CHECK No Credit Check body BANKRUPTCY /\b(?:avoid|past) bankruptcy\b/i describe BANKRUPTCY Avoid Bankruptcy body CREDIT_BUREAU /\ball .{0,9}credit bureaus?\b/i describe CREDIT_BUREAU Credit Bureaus body ACCEPT_CREDIT_CARDS /\b(?:accept\b|are accepting).{1,15}credit cards?\b/i describe ACCEPT_CREDIT_CARDS Accept Credit Cards body BAD_CREDIT /\b(?:bad|poor|no\b|eliminate|repair|(?:re)?establish|damag).{0,10} (?:credit|debt)\b/i describe BAD_CREDIT Eliminate Bad Credit body UNSECURED_CREDIT /\bunsecured.{0,10}(?:master ?card|visa|credit|loans|debt)\b/i describe UNSECURED_CREDIT Unsecured Credit/Debt body LOW_INTEREST /\blow.{0,20} interest rates?\b/i describe LOW_INTEREST Lower Interest Rates body COMPARE_RATES /\bcompare .{0,9}rates?\b/i describe COMPARE_RATES Compare Rates body SAVE_UP_TO /\b(?-i:S)ave up to\b/i describe SAVE_UP_TO Save Up To body LOW_PAYMENT /\b(?:reduce|low).{0,12} payment/i describe LOW_PAYMENT Lower Monthly Payment body CONSOLIDATE_DEBT /(?:consolidate .{0,9} (?:debt|credit|bills)|debt[ -]?(?:consolidation|elimination))/i describe CONSOLIDATE_DEBT Consolidate debt, credit, or bills body CREDITORS_CALLING /\bcreditors calling\b/i describe CREDITORS_CALLING Calling Creditors body REFINANCE /\brefinance.{0,6} home\b|\bhome loans?\b/i describe REFINANCE Refinance Home #body SEARCH_ENGINE /\b(?:(?:submit|traffic|top).{0.10} search engines?|site ranking)\b/i #describe SEARCH_ENGINE Search Engine Site Ranking body SEARCH_ENGINE_PROMO /\b(?:(?:submitt?|list)(?:ed|ing|s)?|place(?:d|ment))\b.{0,15}\b(?:in|to).{0,15}\b(?:search(?:ing)?\s*(?:engine|site)|director(?:y|ies))s?\b/is describe SEARCH_ENGINE_PROMO Discusses search engine listings body OPPORTUNITY_2 /\b(?:opportunity (?:pass|of a lifetime|is knocking|for you)|(?:investment|unique) opportunity)\b/i describe OPPORTUNITY_2 Opportunity - What a deal! # 0.00 0.00 0.00 0.00 1.00 MORE_TRAFFIC (no matches) #body MORE_TRAFFIC /\bmore (?:internet|web) traffic\b/i #describe MORE_TRAFFIC More Internet Traffic body NO_PURCHASE /\bno purchase\b/i describe NO_PURCHASE No Purchase Necessary # 0.00 0.00 0.00 0.00 1.00 NO_MIDDLEMAN (no matches) #body NO_MIDDLEMAN /\bno middleman\b/i #describe NO_MIDDLEMAN No Middleman body NO_STRINGS /\bno strings attached\b/i describe NO_STRINGS No Strings Attached body NO_FEE /\bno .{0,12}fees?\b/i describe NO_FEE No Fees body NO_MEDICAL /\bno medical exam/i describe NO_MEDICAL No Medical Exams body NO_AGE /\bno age (?:restriction|limit)/i describe NO_AGE No Age Restrictions body NO_FORMS /\bno .{0,9}forms\b/i describe NO_FORMS No Claim Forms body NO_GIMMICK /\bno gimmick\b/i describe NO_GIMMICK No Gimmick body NO_INVESTMENT /\bno investment/i describe NO_INVESTMENT No Investment body INITIAL_INVEST /\binitial investment\b/i describe INITIAL_INVEST Requires Initial Investment body NO_INVENTORY /\bno inventory\b/i describe NO_INVENTORY No Inventory body BUY_DIRECT /\bbuy direct\b/i describe BUY_DIRECT Buy Direct body DRASTIC_REDUCED /\bdrastic.{0,4} reduc/i describe DRASTIC_REDUCED Drastically Reduced body DO_IT_TODAY /\b(?:join|register|order|apply) .{0,10}(?-i:T)oday\b/i describe DO_IT_TODAY Do it Today body ACT_NOW /\b(?:act.{0,4} now|do.{0.5} hesitate|start now)\b/i describe ACT_NOW Act Now! Don't Hesitate! body WHY_WAIT /\b(?:why wait|what are you waiting for)\b/i describe WHY_WAIT What are you waiting for body SUPPLIES_LIMITED /\bsupplies are limited\b/i describe SUPPLIES_LIMITED Supplies are Limited body SECRET_RECORD /\bsecretly record/i describe SECRET_RECORD Secretly Recorded body USE_IDENTITY /someone using your identity/i describe USE_IDENTITY Someone using your identity body YOU_CAN_SEARCH /you can search for anyone/i describe YOU_CAN_SEARCH You can search for anyone body FIND_ANYTHING /\bfind out anything\b/i describe FIND_ANYTHING Find out anything body SEDUCTION /\b(?:seduc|attract).{0,8} (?:ebook|opposite sex|women)\b/i describe SEDUCTION Score with babes! body INVALUABLE_MARKETING /invaluable marketing information/i describe INVALUABLE_MARKETING Invaluable marketing information body MARKET_SOLUTION /\bmarket.{0,9} solution/i describe MARKET_SOLUTION Marketing Solutions body MARKETING /\bdirect marketing\b/i describe MARKETING Direct Marketing body SAVE_MONEY /\bsave .{0,9}(?:money|thousands|millions|up to)\b/i describe SAVE_MONEY Save big money body GUARANTEED_STUFF /\bguarantee.{0,15}(?:income|money|monthly)\b/i describe GUARANTEED_STUFF Guaranteed Stuff body INCOME /\badditional income\b/i describe INCOME Additional Income body EARNINGS /\b(?:potential (?:earnings|income)|income potential)\b/i describe EARNINGS Potential Earnings body THE_BEST_RATE /\bthe best rate/i describe THE_BEST_RATE The best Rates body WE_PROMISE_YOU /\bwe promise .{0,9}you/i describe WE_PROMISE_YOU Promise you ...! body AMAZING_STUFF /\bamazing (?:product|rates)/i describe AMAZING_STUFF Amazing Stuff body CASH_BONUS /\bcash bonus\b/i describe CASH_BONUS Cash Bonus body SHOPPING_SPREE /\bshopping spree\b/i describe SHOPPING_SPREE Shopping Spree body FANTASTIC /\bfantastic {0,9}(?:price|deal|saving)/i describe FANTASTIC Fantastic Deal body CENTS_ON_DOLLAR /\bcents on the dollar\b/i describe CENTS_ON_DOLLAR Cents on the Dollar body COUPON /\boff coupon/i describe COUPON Offers Coupon body CANT_LIVE_WITHOUT /\bcan.{0,4} live without\b/i describe CANT_LIVE_WITHOUT Can't live without? body NAME_BRAND /\b(?:famous name|major) brand/i describe NAME_BRAND Name Brand # seems like we vastly reduce FPs on this one with a small change or two body DIET /\b(?:(?:without|no) (?:exercis(?:e|ing)|dieting)|weight.?loss|(?:extra|lose|lost|losing).{0,10}(?:pounds|weight|inches|lbs)|burn.{1,10}fat)\b/i describe DIET Lose Weight Spam body LONG_DISTANCE /\b(?:Unlimited|per minute|free).{1,9}Long Distance/i describe LONG_DISTANCE Long Distance Phone Offer body REVERSE_AGING /\breverses? aging\b/i describe REVERSE_AGING Reverses Aging body HAIR_LOSS /\b(?:thinn?ing|restore|grow|new) hair|hair loss/i describe HAIR_LOSS Cures Baldness body CABLE_CONVERTER /\bcable (?:converter|descrambler)/i describe CABLE_CONVERTER Cable Converter body LUXURY_CAR /\bluxury car\b/i describe LUXURY_CAR Luxury Car body WRINKLES /\bwrinkle reduction\b/i describe WRINKLES Removes Wrinkles # 0.01 0.01 0.01 0.29 1.00 HARD_CASH (low matches) #body HARD_CASH /\bhard cash\b/i #describe HARD_CASH Is cash really hard? body BUY_JUDGEMENTS /\b(?:purchase|collect|divorce)\b.{0,30}\bjudgements\b/i describe BUY_JUDGEMENTS Buying judgements body LYING_EYES /\bbelieve your eyes\b/i describe LYING_EYES Will not Belive your Eyes! body WHILE_YOU_SLEEP /\bwhile you sleep\b/i describe WHILE_YOU_SLEEP While you Sleep body WHILE_SUPPLIES /\bwhile supplies last\b/i describe WHILE_SUPPLIES While Supplies Last body RICH /\b(?:make you rich|get rich quick)\b/i describe RICH If only it were that easy body WINNING_CAP /WINNING/ describe WINNING_CAP Winning in Caps body WINNER_CAP /WINNER/ describe WINNER_CAP Winner in Caps body WINNER /\byou.{0,4} a winner|register to win/i describe WINNER Claims you are a winner body YOU_WON /\byou(?:\'ve| have)? won[^\'\w]/i describe YOU_WON Who really wins? body PROMOTION /\bspecial promotion\b/i describe PROMOTION Contains 'Special Promotion' body FREE_PASSWORD /\bfree .{0,9}passwords?\b/i describe FREE_PASSWORD Offers Free (often stolen) Passwords body HIDDEN_CHARGES /\bhidden charges\b/i describe HIDDEN_CHARGES Talks about Hidden Charges body GET_STARTED_NOW /\bget started (?-i:N)ow\b/i describe GET_STARTED_NOW Get Started Now body GET_IT_NOW /\bget it now\b/i describe GET_IT_NOW Get it now! body ONLY_COST /\bonly .{0,9}\$/i describe ONLY_COST Only $$$ body SAVE_BUCKS /\bsave \$/i describe SAVE_BUCKS Save $$$ body FINANCIAL /\bfinancial(?:ly)? free/i describe FINANCIAL Financial Freedom body FORWARD_LOOKING /\bcontains forward-looking statements\b/i describe FORWARD_LOOKING Stock Disclaimer Statement body DISCLAIMER /\bdisclaimer\b/i describe DISCLAIMER Message contains disclaimer body SATISFACTION /\bsatisfaction .{0,9}g(?:ua|au)ranteed|not .{0,9}satisfied\b/i describe SATISFACTION Satisfaction Guaranteed body PRIZE /\bclaim.{0,9} prize/i describe PRIZE Talks about prizes body GETAWAY /\bweekend getaway/i describe GETAWAY Weekend Getaway body HGH /\b(?:human growth hormone|(?-i:HGH)|H.G.H)\b/i describe HGH Human Growth Hormone body GIVING_AWAY /\bgiving away\b/i describe GIVING_AWAY They're just giving it away! body EASY_TERMS /\beasy terms\b/i describe EASY_TERMS Easy Terms body EXTRA_CASH /\bextra cash\b/i describe EXTRA_CASH Offers Extra Cash body GET_PAID /\bget (?-i:P)aid\b/i describe GET_PAID Get Paid body BEEN_TURNED_DOWN /\bbeen turned down\b/i describe BEEN_TURNED_DOWN Have you been turned down? body ONE_TIME /\bone\W+time (?:charge|investment|offer|promotion)/i describe ONE_TIME One Time Rip Off body COMPETE /\bcompete for your business\b/i describe COMPETE Compete for your business body CANCEL /\bcancel at any time\b/i describe CANCEL Cancel at any time! body NO_COMBINE /\bwith any other offer/i describe NO_COMBINE Can not be combined with any other offer body NEW_CUSTOMER /\bnew customers only\b/i describe NEW_CUSTOMER New Customers Only body MEET_SINGLES /\bmeet .{0,12}singles|thousands of personal/i describe MEET_SINGLES Meet Singles body JOIN_MILLIONS /\bjoin (?:millions|thousands)\b/i describe JOIN_MILLIONS Join Millions of Americans body BE_BOSS /\byour own boss\b/i describe BE_BOSS Be your own boss body DIG_UP_INFO /\bdig up information\b/i describe DIG_UP_INFO Dig up Dirt on Friends body MLM /\b(?:MLM|multi.level.marketing)\b/i describe MLM Multi Level Marketing mentioned body NOT_MLM /\bnot (?:MLM|multi.level.marketing)\b/i describe NOT_MLM Apparently, NOT Multi Level Marketing body UNCLAIMED_MONEY /\bunclaimed (?:funds|money|prizes?|rewards?)\b/i describe UNCLAIMED_MONEY People just leave money laying around body SERIOUS_CASH /\bserious cash\b/i describe SERIOUS_CASH Serious cash ########################################### # PORN RULES # ########################################### body LARGE_COLLECTION /\b(?:[0-9,]{5,9}|hundreds|thousands|millions|tons)\b.{0,15}(?:movies|videos?|xxx|streaming|pics?|photos?|of live|of images)\b/i describe LARGE_COLLECTION Possible porn - Large Number of movies, pics body FREE_PORN /\bfree (?:porn|xxx|adult)/i describe FREE_PORN Possible porn - Free Porn body BARELY_LEGAL /\b(?:barely|just) legal\b/i describe BARELY_LEGAL Possible porn - Barely Legal # 0.04 0.04 0.03 0.58 1.00 LOLITA (low matches) #body LOLITA /\blolita/i #describe LOLITA Possible porn - Lolita body MEGA_SITE /\bmega[ -]?sites?\b/i describe MEGA_SITE Possible porn - Mega Porn body CUM_SHOT /\bcum[ -]?shots?\b/i describe CUM_SHOT Possible porn - Cum Shot #body BIG_BOOBS /\bbig boobs\b/i #describe BIG_BOOBS Possible porn - Big Boobs body ALL_CAP_PORN /\b(?:ORGY|FUCKING|FETISH|WEBCAM|VOYEUR|ANAL|CUM|SNATCH|COCK|CUNT|PORN)\b/ describe ALL_CAP_PORN Possible porn - in ALL CAPS body PAY_SITE /\bpay[ -]?sites?\b/i describe PAY_SITE Possible porn - Pay Site #body FARM_PORN /\b(?:farm|animal)\b.{0,9}\b(?:sex\b|fuck|action\b|slut|porn)/i #describe FARM_PORN Possible porn - Sex with Animals body SEX_FEST /\b(?:sex|gay|slut|whore|cum|f[\.\*u][\.\*c]k|suck|adult|xxx|teen)[ -]?fest\b/i describe SEX_FEST Possible porn - Porn Fest # "live cam" is a very common nonspam phrase, removed body LIVE_PORN /\blive .{0,9}(?:fuck(?:ing)?|sex|naked|girls?|virgins?|teens?|porno?)\b/i describe LIVE_PORN Possible porn - Live Porn body HARDCORE_PORN /\bh[a\@]rd[ -]?core .{0,9}(?:teen|virgin|cheerleader|amat(?:eu|ue)r)|\bextreme h[a\@]rdcore/i describe HARDCORE_PORN Possible porn - Hardcore Porn body HOT_NASTY /\b(?:horny|nasty|hot|wild|young|horniest|nastiest|hottest|wildest|youngest|naughty|dirtiest|slutty|kinky|lusty|extreme|xxx+)\b.{0,9}\b(?:virgin|asian|cheerleader|sex|selection|fuck|fucking|anal\b|lesb(?:ian|o)|incest|chicks?|pics|movies|video|gay\b|porn|h[a\@]rdcore|schoolgirls|amateur|slut|adult|cum|xxx|sites?|hotties|shit)/i describe HOT_NASTY Possible porn - Hot, Nasty, Wild, Young body BEST_PORN /\b(?:best|biggest|largest|most|free|ultimate)\b.{0,9}\b(?:virgins?|anal\b|lesbians?|incest|porno?|h[a\@]rdcore|sluts?|xxx+)/i describe BEST_PORN Possible porn - Best, Largest Porn Collections body NASTY_GIRLS /\b(?:horniest|nasty|nastiest|hottest|wildest|slutty|xxx+)\b.{0,9}\b(?:girl|women|teen|babe)/i describe NASTY_GIRLS Possible porn - Nasty Girls #body CAM_PORN /cam (?:girls|sex)\b|\b(?:hidden|voyeur|bathroom|shower|naughty) .{0,9}cam\b/i #describe CAM_PORN Possible porn - Sex on Camera body AMATEUR_PORN /\bamateur .{0,9}(?:sex|porn|star|sites?|college|babes|action|pics|trash|gang|rape)|(?:real|best) amateur/i describe AMATEUR_PORN Possible porn - Amateur Porn body CELEBRITY_PORN /\b(?:celebrity|celebrities|celebs).{0,15}(?:sex|porn|pics|caught|nude|exposed|content)|(?:steamy|hot|nude|shocking|free|h[a\@]rdcore) (?:celebrity|celebrities|celebs)\b/i describe CELEBRITY_PORN Possible porn - Celebrity Porn # 0.00 0.00 0.00 0.00 1.00 PORN_GALLERIES (no matches) #body PORN_GALLERIES /\bhuge galleries\b/i #describe PORN_GALLERIES Possible porn - Galleries of Pictures #body UP_SKIRT /\bup[ -]?skirt\b/i #describe UP_SKIRT Possible porn - Up Skirt body ADULT_SITE /\badult.{0,9}(?:entertainment|sites?|industry|only|business|membership)/i describe ADULT_SITE Possible porn - Adult Web Sites #body UNCENSORED /\buncensored (?:pics|photo)/i #describe UNCENSORED Possible porn - Uncensored Photos body MUST_BE_18 /\bmust be (?:at least|over) 18\b/i describe MUST_BE_18 Possible porn - Must be 18 body INSTANT_ACCESS /\binstant access\b/i describe INSTANT_ACCESS Possible porn - Offers Instant Access body RAPE /\b(?:virgin|gang|teen|amateur) rape|rape (?:sites?|sex)\b/i describe RAPE Possible porn - Rape body LESBIAN /\b(?:xxx|horny|hot|young) lesbian|lesbian (?:sites?|action|bitch)/i describe LESBIAN Possible porn - Lesbian Site body PORN_PASSWORD /\bporn password/i describe PORN_PASSWORD Possible porn - Porn Password # Jul 26 2002 jm: offline until we can figure out performance & effectiveness # problems # (contrib: skod) body PORN_6 /\b(?:\d+\+? xxx pictures|xxx photos?)\b/i describe PORN_6 Possible Porn - XXX Photos body PORN_MEMBERSHIP /\b(?:vip|adult|porn|x.rated) membership/i describe PORN_MEMBERSHIP Possible Porn - Porn membership ################## # End Porn Rules # ################## body CHILD_SUPPORT /\buncollected child support\b/i describe CHILD_SUPPORT "Collect Child Support" Scam body ITS_LEGAL /\b(?:perfectly|absolutely|100%|totally) legal\b/i describe ITS_LEGAL Claims to be Legal body FREE_GRANT /\b(?:free|government) (?-i:G)rants?\b/i describe FREE_GRANT Free Grant Money body VIAGRA_ONLINE /\bviagra .{0,25}(?:express|online|overnight)/i describe VIAGRA_ONLINE Fast Viagra Delivery body VIAGRA_COMBO /\bviagra .{0,15}(?:phentermine|xenical|tenuate|zyban|propecia)\b/i describe VIAGRA_COMBO Viagra and other drugs body ONLINE_PHARMACY /online pharmacy|(?:drugs|medications) online/i describe ONLINE_PHARMACY Online Pharmacy body CONFIDENTIAL_ORDER /confidential.{0,9} order/i describe CONFIDENTIAL_ORDER Confidentially on all orders body HIDDEN_ASSETS /hidden assets/i describe HIDDEN_ASSETS 'Hidden' assets body SAVE_THOUSANDS /\bsave (?:thousands|millions)\b/i describe SAVE_THOUSANDS Save big money body REFINANCE_YOUR_HOME /\brefinance your(?: current)? (?:home|house)\b/i describe REFINANCE_YOUR_HOME "Refinance your home" body MARKETING_PARTNERS /marketing partner|partner site/i describe MARKETING_PARTNERS Claims you registered with some kind of partner # note the tense used, hopefully it won't hit FPs this way body PRIORITY_MAIL /\b[Aa]ll\b.{0,20}\b[Oo]rders\b.{1,20}\bPriority Mail\b/ describe PRIORITY_MAIL Orders shipped by priority mail # Selling insurance online body SAVE_ON_INSURANCE /\bsave .{0,20}\bon (?:your\s+)?(?:auto|car|life|health|medical)? ?insurance\b/i describe SAVE_ON_INSURANCE Trying to sell insurance online body STOP_SNORING /\bstop\s+snoring\b/i describe STOP_SNORING Contains "Stop Snoring" body APPLY_FREE /(?-i:F)ree (?-i:A)pplication|free application.{0,32}(?:today|minute|less than)/i describe APPLY_FREE Free Application body APPLY_ONLINE /\bapply (?-i:O)nline/i describe APPLY_ONLINE Apply online (with capital O) body FREE_INSTALL /(?-i:F)ree installation/i describe FREE_INSTALL Contains 'free installation' with capitals body FREE_PREVIEW /(?-i:F)ree preview/i describe FREE_PREVIEW Contains 'free preview' with capitals # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=678 body USER_4U2 /\b\S{0,20}(?:[^0-9][42](?:yo)?u|for-*you)(?:[.-]\S{1,20})?\@\S{1,20}\.(?:net|com|org|info)\b/ describe USER_4U2 Local part containing a "4u" variant body DOMAIN_4U2 /[\@\.]\S{0,20}(?:[^0-9][42](?:yo)?u|for-*you)(?:[.-]\S{1,20})?\.(?:net|com|org|info)\b/ describe DOMAIN_4U2 Domain name containing a "4u" variant body FREE_ACCESS /(?-i:F)ree access/i describe FREE_ACCESS Contains 'free access' with capitals body FREE_SAMPLE /(?-i:F)ree sample/i describe FREE_SAMPLE Contains 'free sample' with capitals # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=629 body MONTH_TRIAL /(?:month|day) .{0,9}trial/i describe MONTH_TRIAL Month Trial Offer # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=630 body UNLIMITED /UNLIMITED/ describe UNLIMITED Unlimited in caps # bug 680 body MEMBER_2 /\b(?:free|special|paid|dear|gold|opt.in|valued|because you are an?|be a|becoming a|sent to) .{0,9}members?\b/i describe MEMBER_2 Being a Member body GET_IT_NOW /get it (?-i:N)ow/i describe GET_IT_NOW Contains 'Get it now' with capitals # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=632 body LOW_PRICE /\blow.{0,4} (?-i:P)rice/i describe LOW_PRICE Lowest Price # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=610 body SALE /\bsales? (?:price|system|department|technology|ends|today)|\b(?:on|summer|movie|clearance|for|your|increase|super|losing|return|business|airfare) (?-i:S)ales?/i describe SALE Stuff on Sale # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=679 body MEMBER /\bmember (?:number|reward|value|benefit|rate|report|card|information)s?\b/i describe MEMBER Member Stuff # desc: 0.004 0.009 0.001 0.94 3.60 EJACULATION body EJACULATION /\b(?:increase|improve|greater).{0,10}ejaculation/i describe EJACULATION Increase your ejaculation! body TRACE_BY_SSN /Trace anyone by social security number/i describe TRACE_BY_SSN Talks about tracing by SSN body AUTO_EMAIL_REMOVAL /Auto Email Removal/ describe AUTO_EMAIL_REMOVAL Claims auto-email removal body CYBER_FIRE_POWER /\b(?:by|for) Cyber FirePower\!/ describe CYBER_FIRE_POWER mentions Cyber FirePower!, a spam-tool body ONCE_IN_LIFETIME /once in a lifetime opportunity/i describe ONCE_IN_LIFETIME Once in a lifetime, apparently rawbody SAFEGUARD_NOTICE /This safeguard is not inserted when using the registered version/s describe SAFEGUARD_NOTICE Contains signature of unregistered spam tool body MORE_TRAFFIC /\bmore (?:internet|web) traffic\b/i describe MORE_TRAFFIC More Internet Traffic body NO_MIDDLEMAN /\bno middleman\b/i describe NO_MIDDLEMAN No Middleman body PHONE_CANCER /\bphone.{0,15}cancer\b/i describe PHONE_CANCER Cell Phone Cancer Scam body PORN_GALLERIES /\bhuge galleries\b/i describe PORN_GALLERIES Possible porn - Galleries of Pictures body UNCLAIMED_MONEY /\bunclaimed (?:funds|money|prizes?|rewards?)\b/i describe UNCLAIMED_MONEY People just leave money laying around rawbody SPAM_FORM_INPUT /