# SpamAssassin rules file: CVS rules under test # # This file is a placeholder for rules "under probation", ie. checked into # CVS for testing. It should not be distributed; if the rules have good # stats after a mass-check or two, then fold them into the distributed # rules files. # # I suggest adding a prefix to rules in this file, "T_" -- this # helps identify probationary rules in test output. # ########################################################################### # improvement on NO_REAL_NAME header __NO_NAME From =~ /^\s*?\s*$/ header __EMPTY_NAME From =~ /^\s*\"\"\s*?\s*$/ meta T_NO_REAL_NAME (__NO_NAME || __EMPTY_NAME) describe T_NO_REAL_NAME From: does not include a real name # should be fixed now header T_INVALID_MSGID Message-Id !~ /^<(?:[a-zA-Z0-9.,!\#\$%&'*\+\/=?\^_{}|~-]+|\".+\")\@(?:[a-zA-Z0-9_.-]+|\[\d{1,3}(?:\.\d{1,3}){3}\])>(?:\s*\(.*\))?\s*$/ [if-unset: ] describe T_INVALID_MSGID Message-Id is not valid, according to RFC 2822 # results for quinlan # 0.149 0.458 0.000 1.00 0.72 0.50 T_FROM_TOPICA header T_FROM_TOPICA From:addr =~ /\@(?:\w+\.)?email-publisher\.com$/ describe T_FROM_TOPICA From an address at email-publisher.com score T_FROM_TOPICA 0.5 # results for quinlan # 0.245 0.727 0.013 0.98 0.57 0.50 T_FROM_ALL_NUMS header __FROM_JUST_NUMBER From:addr =~ /^\d+\@/ header __FROM_PHONE From:addr =~ /^\d{3}(?:[-.]?\d{3}[-.]?\d{4}|\d{7})\@/ meta T_FROM_ALL_NUMS (__FROM_JUST_NUMBER && !__FROM_PHONE) describe T_FROM_ALL_NUMS From an address that is all numbers (non-phone) score T_FROM_ALL_NUMS 0.5 # seems like the hit rate is a bit too low on this one uri T_ADCLICK_URL m,\Qhttp://www.adclick.ws/p\E, describe T_ADCLICK_URL Contains adclick URL # Needs testing body T_QUOTE_TWICE_1 /^> >\s/ describe T_QUOTE_TWICE_1 Contains twice quoted reply tflags T_QUOTE_TWICE_1 nice score T_QUOTE_TWICE_1 -1.0 header __ORIGINAL_MESSAGE_AGENT X-Mailer =~ /\b(?:Microsoft Outlook|Internet Mail Service|Mozilla|AOL)\b/ rawbody __ORIGINAL_MESSAGE_LINE /^-{5,8} ?Original Message ?-{5,8}$/ meta T_ORIGINAL_MESSAGE (__ORIGINAL_MESSAGE_AGENT && __ORIGINAL_MESSAGE_LINE) describe T_ORIGINAL_MESSAGE Looks like a reply to a message tflags T_ORIGINAL_MESSAGE nice score T_ORIGINAL_MESSAGE -1.0 # avoid an FP; good ones have a hostname after the @ header T_MSGID_GOOD_EXCHANGE Message-Id =~ /^<[A-Z]{28}\.\S+\@\S+>$/ tflags T_MSGID_GOOD_EXCHANGE nice score T_MSGID_GOOD_EXCHANGE -1.0 # one or both of these may need to factor in ok_languages or ok_locales body T_UNPRINTABLE /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]{3,}/ body T_HIGHBIT /[\x80-\xff]{8,}/ # HTML control -- HTML spam rules should all have better S/O than this body T_HTML_MESSAGE eval:html_message() score T_HTML_MESSAGE 0.0000001 # HTML input form tests body T_HTML_TAG_EXISTS_FORM eval:html_tag_exists('form') body T_HTML_FORM_HIDDEN_EMAIL1 eval:html_test('t_form_hidden_email1') body T_HTML_FORM_HIDDEN_EMAIL2 eval:html_test('t_form_hidden_email2') # HTML web bugs replacements body T_HTML_WEB_BUGS_BACKGROUND eval:html_test('web_bugs_background') body T_HTML_WEB_BUGS_SRC eval:html_test('web_bugs_src') # possible improvements for HTML_JAVASCRIPT body T_HTML_JAVASCRIPT eval:html_test('t_javascript') # possible replacement/supplement for FROM_BTAMAIL header T_BTAMAIL_HEADER ALL =~ /btamail\.net\.cn/i ######################################################################## # bug 1106: forged MUAs # AOL header __AOL_MUA X-Mailer =~ /\bAOL\b/ header __AOL_MSGID Message-Id =~ /^<[0-9a-f]{1,3}\.[0-9a-f]{7,8}\.[0-9a-f]{8}\@aol.com>$/ meta T_FORGED_MUA_AOL (__AOL_MUA && !__AOL_MSGID) describe T_FORGED_MUA_AOL Forged mail pretending to be from AOL score T_FORGED_MUA_AOL 1.0 # Internet Mail Service header __IMS_MUA X-Mailer =~ /Internet Mail Service/ header __IMS_MSGID Message-Id =~ /^<[A-F\d]{38,40}\@\S+>$/ meta T_FORGED_MUA_IMS (__IMS_MUA && !__IMS_MSGID) describe T_FORGED_MUA_IMS Forged mail pretending to be from IMS score T_FORGED_MUA_IMS 1.0 # Mozilla header __MOZILLA_MUA X-Mailer =~ /\bMozilla\b/ header __MOZILLA_MSGID Message-Id =~ /^<[A-F\d]{8}\.[A-F\d]{6,8}\@\S+>$/ meta T_FORGED_MUA_MOZILLA (__MOZILLA_MUA && !__MOZILLA_MSGID) describe T_FORGED_MUA_MOZILLA Forged mail pretending to be from Mozilla score T_FORGED_MUA_MOZILLA 1.0 # Outlook header __OUTLOOK_MUA X-Mailer =~ /Outlook(?! IMO)/ header __OUTLOOK_MSGID Message-Id =~ /^<[0-9a-f]{12,12}\$[0-9a-f]{8,8}\$[0-9a-f]{8,8}\@.{1,50}>$/ meta T_FORGED_MUA_OUTLOOK (__OUTLOOK_MUA && !__OUTLOOK_MSGID) describe T_FORGED_MUA_OUTLOOK Forged mail pretending to be from Outlook score T_FORGED_MUA_OUTLOOK 1.0 # Outlook IMO (Internet Mail Only) header __OIMO_MUA X-Mailer =~ /Outlook IMO/ header __OIMO_MSGID Message-Id =~ /^<[A-P]{28}\.[a-z.]+\@\S+>$/ meta T_FORGED_MUA_OIMO (__OIMO_MUA && !__OIMO_MSGID) describe T_FORGED_MUA_OIMO Forged mail pretending to be from Outlook IMO score T_FORGED_MUA_OIMO 1.0 # QUALCOMM Eudora header __EUDORA_MUA X-Mailer =~ /\b(?:QUALCOMM|Eudora)\b/ header __EUDORA_MSGID Message-Id =~ /^<(?:\d\d?\.){4,5}\d{14}\.[a-f0-9]{8}\@\S+>$/ meta T_FORGED_MUA_EUDORA (__EUDORA_MUA && !__EUDORA_MSGID) describe T_FORGED_MUA_EUDORA Forged mail pretending to be from Eudora score T_FORGED_MUA_EUDORA 1.0 ######################################################################## # possible replacements for SUBJ_HAS_SPACES # original version from a postfix filter (Craig Sanders) # adds some hits, but loses some too header T_SUBJ_ENDS_IN_SPACES Subject =~ /\s{6,}([^0 \t]\S{3,})?$/ # distilled new rules (merge of T_SUBJ_ENDS_IN_SPACES and SUBJ_HAS_SPACES) header T_SUBJ_HAS_SPACES1 Subject =~ /(?:\s{6,}|\t)/ header T_SUBJ_HAS_SPACES2 Subject =~ /(?:\s{6,}|\t{2,})/ # bug 1108, From: header header __FROM_NO_LOWER From !~ /[a-z]/ header __FROM_EXISTS exists:From meta T_NO_LOWER_FROM_1 (__FROM_NO_LOWER) meta T_NO_LOWER_FROM_2 (__FROM_NO_LOWER && __FROM_EXISTS) # bug 1108, To: and Cc: headers header __TO_NLA To !~ /[a-z]/ header __TO_NLU To !~ /[a-z][^\s,]*\@/ header __TO_NLH To !~ /\@[^\s,]*[a-z]/ header __CC_NLA Cc !~ /[a-z]/ header __CC_NLU Cc !~ /[a-z][^\s,]*\@/ header __CC_NLH Cc !~ /\@[^\s,]*[a-z]/ header __TOCC_NLA ToCc !~ /[a-z]/ header __TOCC_NLU ToCc !~ /[a-z][^\s,]*\@/ header __TOCC_NLH ToCc !~ /\@[^\s,]*[a-z]/ header __CC_E exists:Cc header __TO_E exists:To header __TOCC_E exists:ToCc # meta T_NO_LOWER_TO_ALL (__TO_NLA) meta T_NO_LOWER_TO_USER (__TO_NLU) meta T_NO_LOWER_TO_HOST (__TO_NLH) meta T_NO_LOWER_TO_EITHER (__TO_NLU || __TO_NLH) # meta T_NO_LOWER_TOCC_ALL (__TOCC_NLA) meta T_NO_LOWER_TOCC_USER (__TOCC_NLU) meta T_NO_LOWER_TOCC_HOST (__TOCC_NLH) meta T_NO_LOWER_TOCC_EITHER (__TOCC_NLU || __TOCC_NLH) # meta T_NO_LOWER_TO_ALL_E (__TO_NLA && __TO_E) meta T_NO_LOWER_TO_USER_E (__TO_NLU && __TO_E) meta T_NO_LOWER_TO_HOST_E (__TO_NLH && __TO_E) meta T_NO_LOWER_TO_EITHER_E ((__TO_NLU || __TO_NLH) && __TO_E) # meta T_NO_LOWER_TOCC_ALL_E (__TOCC_NLA && __TOCC_E) meta T_NO_LOWER_TOCC_USER_E (__TOCC_NLU && __TOCC_E) meta T_NO_LOWER_TOCC_HOST_E (__TOCC_NLH && __TOCC_E) meta T_NO_LOWER_TOCC_EITHER_E ((__TOCC_NLU || __TOCC_NLH) && __TOCC_E) # meta T_NO_LOWER_TO_CC_ALL_E (__CC_NLA && __CC_E) || (__TO_NLA && __TO_E) meta T_NO_LOWER_TO_CC_USER_E (__CC_NLU && __CC_E) || (__TO_NLU && __TO_E) meta T_NO_LOWER_TO_CC_HOST_E (__CC_NLH && __CC_E) || (__TO_NLH && __TO_E) meta T_NO_LOWER_TO_CC_EITHER_E (((__CC_NLU || __CC_NLH) && __CC_E) || ((__TO_NLU || __TO_NLH) && __TO_E)) ########################################################################### body T_BANG_BOSS /\bboss!/i body T_BANG_CARTOONS /\bcartoons!/i body T_BANG_CYBERANALYSTS /\bcyberanalysts!/i body T_BANG_EXERCISE /\bexercis(?:e|er|es)!/i body T_BANG_GUARANTEE /\bguaranteed?\!/i body T_BANG_MONEY /\bmoney!/i body T_BANG_MORE /\b(?-i:M)ore!/i body T_BANG_OFFER /\boffer(?:ed|s)!/i body T_BANG_OPRAH /\boprah!/i body T_BANG_QUOTE /\b(?-i:Q)uotes?\!/i ########################################################################### body T_OBSCURED_EMAIL /^\w+\^\S+\(\w{2,3}\b/m describe T_OBSCURED_EMAIL Message seems to contain obscured email address (rot13) # mailman list reminder mails are getting tagged in 2.41, adding a rule to check for these header __FROM_MAILMAN_OWNER From:addr =~ /^mailman-owner@/ header __SUBJECT_MAILMAN_REMIND Subject =~ /\bmailing list memberships reminder\b/ meta T_MAILMAN_REMINDER (__FROM_MAILMAN_OWNER && __SUBJECT_MAILMAN_REMIND) describe T_MAILMAN_REMINDER Mail headers indicate a mailman membership reminder score T_MAILMAN_REMINDER -3 tflags T_MAILMAN_REMINDER nice ######################################################## header T_NONSENSE_FROM_1 From:addr =~ /[^\W\daeiouy_-]{5}\S*\@/i describe T_NONSENSE_FROM_1 From user name appears to be nonsense (1) header T_NONSENSE_FROM_2 From:addr =~ /[^\W\daeiouy_-]{6,}\S*\@/i describe T_NONSENSE_FROM_2 From user name appears to be nonsense (2) header T_NONSENSE_FROM_00_10 eval:nonsense_from_percent('00', '10') describe T_NONSENSE_FROM_00_10 0%-10% weird trigraphs in From header T_NONSENSE_FROM_10_20 eval:nonsense_from_percent('10', '20') describe T_NONSENSE_FROM_10_20 10%-20% weird trigraphs in From header T_NONSENSE_FROM_20_30 eval:nonsense_from_percent('20', '30') describe T_NONSENSE_FROM_20_30 20%-30% weird trigraphs in From header T_NONSENSE_FROM_30_40 eval:nonsense_from_percent('30', '40') describe T_NONSENSE_FROM_30_40 30%-40% weird trigraphs in From header T_NONSENSE_FROM_40_50 eval:nonsense_from_percent('40', '50') describe T_NONSENSE_FROM_40_50 40%-50% weird trigraphs in From header T_NONSENSE_FROM_50_60 eval:nonsense_from_percent('50', '60') describe T_NONSENSE_FROM_50_60 50%-60% weird trigraphs in From header T_NONSENSE_FROM_60_70 eval:nonsense_from_percent('60', '70') describe T_NONSENSE_FROM_60_70 60%-70% weird trigraphs in From header T_NONSENSE_FROM_70_80 eval:nonsense_from_percent('70', '80') describe T_NONSENSE_FROM_70_80 70%-80% weird trigraphs in From header T_NONSENSE_FROM_80_90 eval:nonsense_from_percent('80', '90') describe T_NONSENSE_FROM_80_90 80%-90% weird trigraphs in From #header T_NONSENSE_FROM_90_100 eval:nonsense_from_percent('90', '100') #describe T_NONSENSE_FROM_90_100 90%-100% weird trigraphs in From header T_NONSENSE_FROM_90_91 eval:nonsense_from_percent('90', '91') header T_NONSENSE_FROM_91_92 eval:nonsense_from_percent('91', '92') header T_NONSENSE_FROM_92_93 eval:nonsense_from_percent('92', '93') header T_NONSENSE_FROM_93_94 eval:nonsense_from_percent('93', '94') header T_NONSENSE_FROM_94_95 eval:nonsense_from_percent('94', '95') header T_NONSENSE_FROM_95_96 eval:nonsense_from_percent('95', '96') header T_NONSENSE_FROM_96_97 eval:nonsense_from_percent('96', '97') header T_NONSENSE_FROM_97_98 eval:nonsense_from_percent('97', '98') header T_NONSENSE_FROM_98_99 eval:nonsense_from_percent('98', '99') header T_NONSENSE_FROM_99_100 eval:nonsense_from_percent('99', '100') score T_NONSENSE_FROM_00_10 0.01 score T_NONSENSE_FROM_10_20 0.01 score T_NONSENSE_FROM_20_30 0.01 score T_NONSENSE_FROM_30_40 0.01 score T_NONSENSE_FROM_40_50 0.01 score T_NONSENSE_FROM_50_60 0.01 score T_NONSENSE_FROM_60_70 0.01 score T_NONSENSE_FROM_70_80 0.01 score T_NONSENSE_FROM_80_90 0.01 #score T_NONSENSE_FROM_90_100 0.01 score T_NONSENSE_FROM_90_91 0.01 score T_NONSENSE_FROM_91_92 0.01 score T_NONSENSE_FROM_92_93 0.01 score T_NONSENSE_FROM_93_94 0.01 score T_NONSENSE_FROM_94_95 0.01 score T_NONSENSE_FROM_95_96 0.01 score T_NONSENSE_FROM_96_97 0.01 score T_NONSENSE_FROM_97_98 0.01 score T_NONSENSE_FROM_98_99 0.01 score T_NONSENSE_FROM_99_100 0.01 ############################################################ body T_HTML_IMAGE_AREA01 eval:html_range('total_image_area','0','1000') body T_HTML_IMAGE_AREA02 eval:html_range('total_image_area','1000','5000') body T_HTML_IMAGE_AREA03 eval:html_range('total_image_area','10000','20000') body T_HTML_IMAGE_AREA04 eval:html_range('total_image_area','20000','30000') body T_HTML_IMAGE_AREA05 eval:html_range('total_image_area','30000','40000') body T_HTML_IMAGE_AREA06 eval:html_range('total_image_area','40000','50000') body T_HTML_IMAGE_AREA07 eval:html_range('total_image_area','50000','60000') body T_HTML_IMAGE_AREA08 eval:html_range('total_image_area','60000','70000') body T_HTML_IMAGE_AREA09 eval:html_range('total_image_area','70000','80000') body T_HTML_IMAGE_AREA10 eval:html_range('total_image_area','80000','90000') body T_HTML_IMAGE_AREA11 eval:html_range('total_image_area','100000','200000') body T_HTML_IMAGE_AREA12 eval:html_range('total_image_area','200000','300000') body T_HTML_IMAGE_AREA13 eval:html_range('total_image_area','300000','400000') body T_HTML_IMAGE_AREA14 eval:html_range('total_image_area','400000','500000') body T_HTML_IMAGE_AREA15 eval:html_range('total_image_area','500000','600000') body T_HTML_IMAGE_AREA16 eval:html_range('total_image_area','600000','700000') body T_HTML_IMAGE_AREA17 eval:html_range('total_image_area','700000','800000') body T_HTML_IMAGE_AREA18 eval:html_range('total_image_area','800000','900000') body T_HTML_IMAGE_AREA19 eval:html_range('total_image_area','1000000') body T_HTML_NUM_IMGS01 eval:html_range('num_imgs','0','1') body T_HTML_NUM_IMGS02 eval:html_range('num_imgs','1','2') body T_HTML_NUM_IMGS03 eval:html_range('num_imgs','2','3') body T_HTML_NUM_IMGS04 eval:html_range('num_imgs','3','4') body T_HTML_NUM_IMGS05 eval:html_range('num_imgs','4','5') body T_HTML_NUM_IMGS06 eval:html_range('num_imgs','5','6') body T_HTML_NUM_IMGS07 eval:html_range('num_imgs','6','7') body T_HTML_NUM_IMGS08 eval:html_range('num_imgs','7','8') body T_HTML_NUM_IMGS09 eval:html_range('num_imgs','8','9') body T_HTML_NUM_IMGS10 eval:html_range('num_imgs','9','10') body T_HTML_NUM_IMGS11 eval:html_range('num_imgs','10','20') body T_HTML_NUM_IMGS12 eval:html_range('num_imgs','20','30') body T_HTML_NUM_IMGS13 eval:html_range('num_imgs','30','40') body T_HTML_NUM_IMGS14 eval:html_range('num_imgs','40','50') body T_HTML_NUM_IMGS15 eval:html_range('num_imgs','50','60') body T_HTML_NUM_IMGS16 eval:html_range('num_imgs','60','70') body T_HTML_NUM_IMGS17 eval:html_range('num_imgs','70','80') body T_HTML_NUM_IMGS18 eval:html_range('num_imgs','80','90') body T_HTML_NUM_IMGS19 eval:html_range('num_imgs','90','100') body T_HTML_NUM_IMGS20 eval:html_range('num_imgs','100') body T_HTML_CONSEC_IMGS01 eval:html_range('max_consec_imgs','1','2') body T_HTML_CONSEC_IMGS02 eval:html_range('max_consec_imgs','2','3') body T_HTML_CONSEC_IMGS03 eval:html_range('max_consec_imgs','3','4') body T_HTML_CONSEC_IMGS04 eval:html_range('max_consec_imgs','4','5') body T_HTML_CONSEC_IMGS05 eval:html_range('max_consec_imgs','5','6') body T_HTML_CONSEC_IMGS06 eval:html_range('max_consec_imgs','6','7') body T_HTML_CONSEC_IMGS07 eval:html_range('max_consec_imgs','7','8') body T_HTML_CONSEC_IMGS08 eval:html_range('max_consec_imgs','8','9') body T_HTML_CONSEC_IMGS09 eval:html_range('max_consec_imgs','9','10') body T_HTML_CONSEC_IMGS10 eval:html_range('max_consec_imgs','10','20') body T_HTML_CONSEC_IMGS11 eval:html_range('max_consec_imgs','20','30') body T_HTML_CONSEC_IMGS12 eval:html_range('max_consec_imgs','30','40') body T_HTML_CONSEC_IMGS13 eval:html_range('max_consec_imgs','40','50') body T_HTML_CONSEC_IMGS14 eval:html_range('max_consec_imgs','50','60') body T_HTML_CONSEC_IMGS15 eval:html_range('max_consec_imgs','60','70') body T_HTML_CONSEC_IMGS16 eval:html_range('max_consec_imgs','70','80') body T_HTML_CONSEC_IMGS17 eval:html_range('max_consec_imgs','80','90') body T_HTML_CONSEC_IMGS18 eval:html_range('max_consec_imgs','90','100') body T_HTML_CONSEC_IMGS19 eval:html_range('max_consec_imgs','100') body T_HTML_MAX_IMG_RATIO01 eval:html_range('max_img_ratio','0.0','0.5') body T_HTML_MAX_IMG_RATIO02 eval:html_range('max_img_ratio','0.5','0.999') meta T_HTML_MAX_IMG_RATIO02B (T_HTML_MAX_IMG_RATIO02 && !FROM_EGROUPS) body T_HTML_MAX_IMG_RATIO03 eval:html_range('max_img_ratio','5','10') meta T_HTML_MAX_IMG_RATIO03B (T_HTML_MAX_IMG_RATIO03 && !FROM_EGROUPS) body T_HTML_MAX_IMG_RATIO04 eval:html_range('max_img_ratio','10','20') body T_HTML_MAX_IMG_RATIO05 eval:html_range('max_img_ratio','20','30') body T_HTML_MAX_IMG_RATIO06 eval:html_range('max_img_ratio','30','40') body T_HTML_MAX_IMG_RATIO07 eval:html_range('max_img_ratio','40','50') body T_HTML_MAX_IMG_RATIO08 eval:html_range('max_img_ratio','50','75') body T_HTML_MAX_IMG_RATIO09 eval:html_range('max_img_ratio','75','100') body T_HTML_MAX_IMG_RATIO10 eval:html_range('max_img_ratio','100') body T_HTML_MIN_IMG_RATIO1 eval:html_range('min_img_ratio','0.0','0.01') body T_HTML_MIN_IMG_RATIO2 eval:html_range('min_img_ratio','0.01','0.1') body T_HTML_MIN_IMG_RATIO3 eval:html_range('min_img_ratio','0.1','0.25') body T_HTML_MIN_IMG_RATIO4 eval:html_range('min_img_ratio','0.25','0.50') body T_HTML_MIN_IMG_RATIO5 eval:html_range('min_img_ratio','0.50','0.75') body T_HTML_MIN_IMG_RATIO6 eval:html_range('min_img_ratio','0.75','0.999') #--------------------------------------------------------- meta T_HTML_NUM_IMGS01B (T_HTML_NUM_IMGS01 && !FROM_EGROUPS) meta T_HTML_NUM_IMGS02B (T_HTML_NUM_IMGS02 && !FROM_EGROUPS) meta T_HTML_CONSEC_IMGS01B (T_HTML_CONSEC_IMGS01 && !FROM_EGROUPS) meta T_HTML_IMAGE_AREA03B (T_HTML_IMAGE_AREA03 && !FROM_EGROUPS) meta T_HTML_IMAGE_AREA04B (T_HTML_IMAGE_AREA04 && !FROM_EGROUPS) meta T_HTML_IMAGE_AREA05B (T_HTML_IMAGE_AREA05 && !FROM_EGROUPS) meta T_HTML_IMAGE_AREA06B (T_HTML_IMAGE_AREA06 && !FROM_EGROUPS) # HTML_50_70 with at least 3 IMG tags body __T_HTML_NUM_IMGS_3 eval:html_range('num_imgs','3') meta T_HTML_50_70_IMGS3 (HTML_50_70 && __T_HTML_NUM_IMGS_3) #--------------------------------------------------------- score T_HTML_IMAGE_AREA01 0.01 score T_HTML_IMAGE_AREA02 0.01 score T_HTML_IMAGE_AREA03 0.01 score T_HTML_IMAGE_AREA04 0.01 score T_HTML_IMAGE_AREA05 0.01 score T_HTML_IMAGE_AREA06 0.01 score T_HTML_IMAGE_AREA07 0.01 score T_HTML_IMAGE_AREA08 0.01 score T_HTML_IMAGE_AREA09 0.01 score T_HTML_IMAGE_AREA10 0.01 score T_HTML_IMAGE_AREA11 0.01 score T_HTML_IMAGE_AREA12 0.01 score T_HTML_IMAGE_AREA13 0.01 score T_HTML_IMAGE_AREA14 0.01 score T_HTML_IMAGE_AREA15 0.01 score T_HTML_IMAGE_AREA16 0.01 score T_HTML_IMAGE_AREA17 0.01 score T_HTML_IMAGE_AREA18 0.01 score T_HTML_IMAGE_AREA19 0.01 score T_HTML_NUM_IMGS01 0.01 score T_HTML_NUM_IMGS02 0.01 score T_HTML_NUM_IMGS03 0.01 score T_HTML_NUM_IMGS04 0.01 score T_HTML_NUM_IMGS05 0.01 score T_HTML_NUM_IMGS06 0.01 score T_HTML_NUM_IMGS07 0.01 score T_HTML_NUM_IMGS08 0.01 score T_HTML_NUM_IMGS09 0.01 score T_HTML_NUM_IMGS10 0.01 score T_HTML_NUM_IMGS11 0.01 score T_HTML_NUM_IMGS12 0.01 score T_HTML_NUM_IMGS13 0.01 score T_HTML_NUM_IMGS14 0.01 score T_HTML_NUM_IMGS15 0.01 score T_HTML_NUM_IMGS16 0.01 score T_HTML_NUM_IMGS17 0.01 score T_HTML_NUM_IMGS18 0.01 score T_HTML_NUM_IMGS19 0.01 score T_HTML_NUM_IMGS20 0.01 score T_HTML_CONSEC_IMGS01 0.01 score T_HTML_CONSEC_IMGS02 0.01 score T_HTML_CONSEC_IMGS03 0.01 score T_HTML_CONSEC_IMGS04 0.01 score T_HTML_CONSEC_IMGS05 0.01 score T_HTML_CONSEC_IMGS06 0.01 score T_HTML_CONSEC_IMGS07 0.01 score T_HTML_CONSEC_IMGS08 0.01 score T_HTML_CONSEC_IMGS09 0.01 score T_HTML_CONSEC_IMGS10 0.01 score T_HTML_CONSEC_IMGS11 0.01 score T_HTML_CONSEC_IMGS12 0.01 score T_HTML_CONSEC_IMGS13 0.01 score T_HTML_CONSEC_IMGS14 0.01 score T_HTML_CONSEC_IMGS15 0.01 score T_HTML_CONSEC_IMGS16 0.01 score T_HTML_CONSEC_IMGS17 0.01 score T_HTML_CONSEC_IMGS18 0.01 score T_HTML_CONSEC_IMGS19 0.01 score T_HTML_MAX_IMG_RATIO01 0.01 score T_HTML_MAX_IMG_RATIO02 0.01 score T_HTML_MAX_IMG_RATIO02B 0.01 score T_HTML_MAX_IMG_RATIO03 0.01 score T_HTML_MAX_IMG_RATIO03B 0.01 score T_HTML_MAX_IMG_RATIO04 0.01 score T_HTML_MAX_IMG_RATIO05 0.01 score T_HTML_MAX_IMG_RATIO06 0.01 score T_HTML_MAX_IMG_RATIO07 0.01 score T_HTML_MAX_IMG_RATIO08 0.01 score T_HTML_MAX_IMG_RATIO09 0.01 score T_HTML_MAX_IMG_RATIO10 0.01 score T_HTML_MIN_IMG_RATIO1 0.01 score T_HTML_MIN_IMG_RATIO2 0.01 score T_HTML_MIN_IMG_RATIO3 0.01 score T_HTML_MIN_IMG_RATIO4 0.01 score T_HTML_MIN_IMG_RATIO5 0.01 score T_HTML_MIN_IMG_RATIO6 0.01 score T_HTML_NUM_IMGS01B 0.01 score T_HTML_NUM_IMGS02B 0.01 score T_HTML_CONSEC_IMGS01B 0.01 score T_HTML_IMAGE_AREA03B 0.01 score T_HTML_IMAGE_AREA04B 0.01 score T_HTML_IMAGE_AREA05B 0.01 score T_HTML_IMAGE_AREA06B 0.01 score T_HTML_50_70_IMGS3 0.01 #################################################################### header T_FROM_ENDS_IN_NUMS1 From =~ /\D\d\@/ header T_FROM_ENDS_IN_NUMS2 From =~ /\D\d\d\@/ header T_FROM_ENDS_IN_NUMS3 From =~ /\D\d\d\d\@/ header T_FROM_ENDS_IN_NUMS4 From =~ /\D\d\d\d\d\@/ header T_FROM_ENDS_IN_NUMS5 From =~ /\D\d\d\d\d\d\@/ header T_FROM_ENDS_IN_NUMS6 From =~ /\d\d\d\d\d\d\@/ score T_FROM_ENDS_IN_NUMS1 0.01 score T_FROM_ENDS_IN_NUMS2 0.01 score T_FROM_ENDS_IN_NUMS3 0.01 score T_FROM_ENDS_IN_NUMS4 0.01 score T_FROM_ENDS_IN_NUMS5 0.01 score T_FROM_ENDS_IN_NUMS6 0.01 ############################################################ meta T_CONFIRMED_FORGED (FORGED_RCVD_TRAIL && (FORGED_AOL_RCVD || FORGED_HOTMAIL_RCVD || FORGED_EUDORAMAIL_RCVD || FORGED_YAHOO_RCVD || FORGED_JUNO_RCVD || FORGED_GW05_RCVD || FORGED_MX_HOTMAIL)) meta T_MULTI_FORGED ((FORGED_AOL_RCVD + FORGED_HOTMAIL_RCVD + FORGED_EUDORAMAIL_RCVD + FORGED_YAHOO_RCVD + FORGED_JUNO_RCVD + FORGED_GW05_RCVD) > 1) ########################################################### body T_HTML_TAG_EXISTS_CENTER eval:html_tag_exists('center') body T_HTML_TAG_EXISTS_HX eval:html_test('header_tag') body T_HTML_BIG_FONT_B eval:html_test('big_font_B') body T_HTML_FONT_SIZE_PX eval:html_test('font_px') body T_HTML_STYLE_LINE_HEIGHT eval:html_test('line_height') body T_HTML_SHOUTING1 eval:html_range('max_shouting','0','1') body T_HTML_SHOUTING2 eval:html_range('max_shouting','1','2') body T_HTML_SHOUTING3 eval:html_range('max_shouting','2','3') body T_HTML_SHOUTING4 eval:html_range('max_shouting','3','4') body T_HTML_SHOUTING5 eval:html_range('max_shouting','4','5') body T_HTML_SHOUTING6 eval:html_range('max_shouting','5','6') body T_HTML_SHOUTING7 eval:html_range('max_shouting','6','7') body T_HTML_SHOUTING8 eval:html_range('max_shouting','7','8') body T_HTML_SHOUTING9 eval:html_range('max_shouting','8') score T_HTML_SHOUTING1 0.01 score T_HTML_SHOUTING2 0.01 score T_HTML_SHOUTING3 0.01 score T_HTML_SHOUTING4 0.01 score T_HTML_SHOUTING5 0.01 score T_HTML_SHOUTING6 0.01 score T_HTML_SHOUTING7 0.01 score T_HTML_SHOUTING8 0.01 score T_HTML_SHOUTING9 0.01 ################################################################## # testing the HTML percentage range # this will be necessary if the HTML heuristics test is changed # (probably best to go with deciles, replacing HTML_XX_YY rules) body T_HTML_P1_00_20 eval:html_percentage('00','20') body T_HTML_P1_20_40 eval:html_percentage('20','40') body T_HTML_P1_40_60 eval:html_percentage('40','60') body T_HTML_P1_60_80 eval:html_percentage('60','80') body T_HTML_P1_80_100 eval:html_percentage('80','100') body T_HTML_P2_00_10 eval:html_percentage('00','10') body T_HTML_P2_10_20 eval:html_percentage('10','20') body T_HTML_P2_20_30 eval:html_percentage('20','30') body T_HTML_P2_30_40 eval:html_percentage('30','40') body T_HTML_P2_40_50 eval:html_percentage('40','50') body T_HTML_P2_50_60 eval:html_percentage('50','60') body T_HTML_P2_60_70 eval:html_percentage('60','70') body T_HTML_P2_70_80 eval:html_percentage('70','80') body T_HTML_P2_80_90 eval:html_percentage('80','90') body T_HTML_P2_90_100 eval:html_percentage('90','100') ################################################################## body T_HTML_MAIL_ONLY /HTML only email, sorry/i # catches stuff like "http://www.yahoo.com@0xD5.0xEF.0x8F.0x9D" uri T_HEX_HOST /^https?:\/\/[^\/]*0x[0-9a-fA-F]{2}/ describe T_HEX_HOST Looks like an URL with obfuscated hex IP address score T_HEX_HOST 0.01 # If we see % followed by 21-7e except 2[2356f], 3[a-f], and 40, they don't need escaping, suspicious... # This is pretty ugly, but I couldn't think of a better way to do it. :| uri T_HTTP_ESCAPED_URI_SUSPICIOUS m@%(?:2[147-9a-e]|3[0-9]|4[1-9a-f]|[56][0-9a-f]|7[0-9a-e])@i describe T_HTTP_ESCAPED_URI_SUSPICIOUS Uses unnecessary %-escapes inside a URL's path score T_HTTP_ESCAPED_URI_SUSPICIOUS 0.01 # conservative version of above rule, just look for 0-9, A-Z, a-z uri T_EXCESSIVE_URI_ESCAPES_V1 m/%(?:3[0-9]|[46][1-9a-f]|[57][\da])/i describe T_EXCESSIVE_URI_ESCAPES_V1 Unnecessary %-escapes inside a URL score T_EXCESSIVE_URI_ESCAPES_V1 0.01 # somewhat conservative version, look for 0-9, A-Z, a-z plus the most common # non-letters that are 100% spam and no ham: '|', '-', ';', '(', and ')' uri T_EXCESSIVE_URI_ESCAPES_V2 m/%(?:2[89d]|3[0-9b]|[46][1-9a-f]|[57][\dac])/i describe T_EXCESSIVE_URI_ESCAPES_V2 Unnecessary %-escapes inside a URL score T_EXCESSIVE_URI_ESCAPES_V2 0.01 # Most/all of these require that From addresses do not start with numbers. # let's see how this does... (jm) header T_FROM_NUM_AT_WEBMAIL From:addr =~ /^\d\S+\@(?:msn\.com|flashmail\.com|mailexcite\.com|prodigy\.net|yahoo\.\S+|hotmail\.com|eudoramail\.com|aol\.com|excite\.com|email\.com|earthlink\.net|geocities\.com|hknetmail\.com|angelfire\.com)/i describe T_FROM_NUM_AT_WEBMAIL From address is webmail, but starts with a number score T_FROM_NUM_AT_WEBMAIL 0.01 # actually, this looks really good! header T_FROM_OFFERS From:addr =~ /offers\S*\@/ describe T_FROM_OFFERS From address is "offers at something" score T_FROM_OFFERS 0.01 # These three have had their ' ' (space) with \s+ for slightly better # results on my corpus. If it looks good for everyone, these can replace # the non-T_ version. Ignore the scores, they're just there to # avoid confusion. rawbody T_CLICK_HERE_LINK /click\s+here.{0,100}<\/a>/is rawbody T_CLICK_HERE_CAPS_LINK /CLICK\s+HERE.{0,100}<\/[aA]>/s rawbody T_QUOTED_EMAIL_TEXT /^>+\s+.{60,72}$/ score T_CLICK_HERE_CAPS_LINK 0.634 score T_QUOTED_EMAIL_TEXT -0.2 score T_CLICK_HERE_LINK 0.317 # Potentially FP-prone, especially if you talk about optics. but it # matches a few messages that don't get properly marked otherwise. header __T_OPT_HEADER_SUBJ Subject =~ /opt\W?.{2,3}\b/i header __T_OPT_HEADER_RESSUBJ Resent-Subject =~ /opt\W?.{2,3}\b/i header __T_OPT_HEADER_ALL ALL =~ /opt\W?.{2,3}\b/i meta T_OPT_HEADER (__T_OPT_HEADER_ALL && !__T_OPT_HEADER_SUBJ && !__T_OPT_HEADER_RESSUBJ) body T_HTML_LINK_CLICK_HERE eval:html_eval('anchor_text', '=~ /click\s+here/i') body T_HTML_LINK_MONEY eval:html_eval('anchor_text', '=~/\s\$/') body T_HTML_LINK_UNSUB eval:html_eval('anchor_text', '=~/unsubscribe/i') score T_HTML_LINK_CLICK_HERE 0.01 score T_HTML_LINK_MONEY 0.01 score T_HTML_LINK_UNSUB 0.01 # Some spammers try to get multiple negative scores by adding in different # user-agent looking headers. TOO_MANY_UA adds up all of our USER_AGENT # tests and matches if more than 1 hits. UA_AND_XM actually matches a # good amount of non-spam at the moment, the above problem can trigger # this rule as well. # T_TOO_MANY_UA uses "regexp and operator": all of the tests matching # regexp /USER_AGENT.*/ will be strung together with "+" between them. meta T_TOO_MANY_UA ( (USER_AGENT.+ +) > 1) meta T_UA_AND_XM ( USER_AGENT && __HAS_X_MAILER ) body T_EARN_MONEY /\b(?:earn|make|making|made|received?|discounted)\s+(?:up\s+to|as\s+much\s+as|over|at\s+least|a\s+full)?\s*\$\s*[0-9,]{2}/i describe T_EARN_MONEY Message talks about earning money # thanks to Jason Staples for this suggestion. Sendmail and other MTAs # will query the Ident server on a relay; Squid proxies should not be header T_IDENT_SQUID Received =~ /ident[:=]squid\b/i describe T_IDENT_SQUID Message was sent by a Squid HTTP proxy score T_IDENT_SQUID 1.0 body T_EXCUSE_10 /if you (?:(?:want|wish|care|prefer) not to |do ?n[o']t (?:want|wish|care) to )(?:be contacted again|receive (?:any ?)?(?:more|future|further)\b.{1,10}\b(?:e?-?mail|message|offer|solicitation)s?|be included)/i body T_EXCUSE_25 /you have been included (?:mistakenly|by mistake)/i body T_LEAVE_ADDR /leave your address/i