# These are oddities seen in Other People's Spam, i.e. I have no hits in my test corpora describe __HAS_IMG_SRC Has an img tag on a non-quoted line rawbody __HAS_IMG_SRC /^[^>].*?].*?].*?<(?:img src|IMG SRC)=/m tflags __HAS_IMG_SRC_ONECASE multiple maxhits=100 describe __HAS_HREF_ONECASE Has an anchor tag with a href attribute in non-quoted line with consistent case rawbody __HAS_HREF_ONECASE /^[^>].*?<(?:a href|A HREF)=/m tflags __HAS_HREF_ONECASE multiple maxhits=100 describe __MIXED_IMG_CASE Has img tags with mixed-up cases in non-quoted lines meta __MIXED_IMG_CASE __HAS_IMG_SRC - __HAS_IMG_SRC_ONECASE > 0 describe __MIXED_HREF_CASE Has anchor tags with mixed-up cases in non-quoted lines meta __MIXED_HREF_CASE __HAS_HREF - __HAS_HREF_ONECASE > 0 describe __MIXED_TAG_CASE Has multiple mixed-case tags in non-quoted lines. meta __MIXED_TAG_CASE __MIXED_IMG_CASE && __MIXED_HREF_CASE describe SCC_THREE_WORD_MONTY Are you POTUS or a mass murderer? header SCC_THREE_WORD_MONTY From =~ /(\w{2,}) (\w{2,}) (\w{2,}) <\1.\2.\3/ # Fingerprint Majordomo lists header __SCC_MD_UNSUB List-Unsubscribe =~ /: 1 tflags SCC_MAJORDOMO nice describe SCC_ODD_MUA Unlikely MUA for a modern human header SCC_ODD_MUA X-Mailer =~ /^X-Mailer: Microsoft Outlook 14.0$/ describe SCC_SPECIAL_GUID Unique in a similar way rawbody SCC_SPECIAL_GUID /^[[:xdigit:]]{8}-[[:xdigit:]]{4}-([[:xdigit:]]{3})-\1-[[:xdigit:]]{12}$/m tflags SCC_SPECIAL_GUID publish multiple maxhits=15 describe __NO_EXTERNALS No external relays header __NO_EXTERNALS X-Spam-Relays-External =~ /^$/ describe ALL_INTERNAL Has only internal relays meta ALL_INTERNAL __NO_EXTERNALS && !NO_RELAYS tflags ALL_INTERNAL nice describe SCC_NEWBIE_HASBEENS Abused gTLDs seen in spam from Google Apps. header SCC_NEWBIE_HASBEENS X-Beenthere =~ /\.(?:today|online|monster)/ describe __SCC_HTML_OBJOBJ Contains an object rawbody __SCC_HTML_OBJOBJ /< *object +\w{65,80} *>/ describe SCC_ISEMM_LID_1 Fingerprint of a particular spammer using an old spamware header SCC_ISEMM_LID_1 X-Mailer-LID =~ /54,55,56,58,53/ tflags SCC_ISEMM_LID_1 publish score SCC_ISEMM_LID_1 3.5 describe SCC_ISEMM_LID_1A Fingerprint of a particular spammer using an old spamware header SCC_ISEMM_LID_1A X-Mailer-LID =~ /54,55,56,/ tflags SCC_ISEMM_LID_1A publish score SCC_ISEMM_LID_1A 3.5 describe SCC_ISEMM_LID_1B Genericized spammer fingerprint header SCC_ISEMM_LID_1B X-Mailer-LID =~ /(?:[56][0-9],)+/ tflags SCC_ISEMM_LID_1B publish score SCC_ISEMM_LID_1B 1.5 describe SCC_SPAMMER_ADDR_1 Fingerprint of a particular spammer body SCC_SPAMMER_ADDR_1 /34 N Franklin/ describe SCC_SPAMMER_ADDR_2 Fingerprint of a particular spammer body SCC_SPAMMER_ADDR_2 /6130 W Flamingo Rd/ describe SCC_CANSPAM_1 Interesting compliance language body SCC_CANSPAM_1 /The advertiser does not manage your subscription/ describe SCC_CANSPAM_2 Interesting compliance language body SCC_CANSPAM_2 /you may unsubscribe by clicking here or by writing to/ # 1-3 are antique and stale. This sha256.sha256@*.co pattern is fresh in 2022 describe SCC_SPAMMERMID4 Distinctive Message-Id header SCC_SPAMMERMID4 Message-ID =~ /<[-_a-zA-Z0-9.]{43}\.[-_a-zA-Z0-9.]{43}\@[-a-z0-9]{4,30}\.co>/ score SCC_SPAMMERMID4 1 describe __SCC_ESP_RESY From "Resy" which is known to emit spam body __SCC_ESP_RESY /This marketing message was sent to you by Resy/ # TESTING a few domains that are in our "suspicious" list but are known to have some good senders describe T_SCC_TLD_ONLINE From domain in *.online header T_SCC_TLD_ONLINE From =~ /<[^ @]+@[^ @>]+\.online>/ describe T_SCC_TLD_XYZ From domain in *.xyz header T_SCC_TLD_XYZ From =~ /<[^ @]+@[^ @>]+\.xyz>/ describe T_SCC_URL_NOTIONSITE has a notion.site URL uri T_SCC_URL_NOTIONSITE /https?:\/\/[^ ]*\.notion.site/ tflags T_SCC_URL_NOTIONSITE multiple maxhits=5 describe T_SCC_URL_CLOUDINARY Has Cloudinary backend(?) URL uri T_SCC_URL_CLOUDINARY /https?:\/\/res.cloudinary.com/ tflags T_SCC_URL_CLOUDINARY multiple maxhits=5 #Very bad Feb 2024. We'll see if it lasts describe T_SCC_FROM_TLD_BEST From domain in *.best header T_SCC_FROM_TLD_BEST From =~ /<[^ @]+@[^ @>]+\.best>/ describe T_SCC_URL_BEST has a .best URL uri T_SCC_URL_BEST /https?:\/\/[^ ]*\.best/ tflags T_SCC_URL_BEST multiple maxhits=5 # Lots of spam getting through at GMail with these... describe __SCC_HASHBUST_1 Innocent text seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_1 /FACULT\x{C3}\x{89} D\x{E2}\x{80}\x{99}\x{C3}\x{89}DUCATION pour l\x{E2}\x{80}\x{99}ann\x{C3}\x{A9}e universitaire 2019 \/ 2020.$/im describe __SCC_HASHBUST_2 Innocent text seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_2 /AUTORISATION D\x{E2}\x{80}\x{99}INSCRIPTION$/im describe __SCC_HASHBUST_3 Innocent text seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_3 /Find out more about the application process, researching your destination/i describe __SCC_HASHBUST_4 Innocent text seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_4 /Subscription Expired Today/i describe __SCC_HASHBUST_5 Pattern seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_5 /(.(([a-z]{5,8}) ([a-z])){3,}\3\n){1,}/m describe __SCC_HASHBUST_6 Pattern seen a lot in 'hash-buster' text. rawbody __SCC_HASHBUST_6 /----[a-z0-9]{4,6};[a-z0-9]{4,6};[a-z0-9]{4,6}/ tflags __SCC_HASHBUST_6 multiple maxhits=20 meta SCC_5FAKE_BOUNDARYS __SCC_HASHBUST_6 > 4 describe SCC_HB_ANY At least 1 hash-buster meta SCC_HB_ANY __SCC_HASHBUST_1 || __SCC_HASHBUST_2 || __SCC_HASHBUST_3 || __SCC_HASHBUST_4 || __SCC_HASHBUST_5 describe SCC_HB2 2 different hash-busters meta SCC_HB2 ( __SCC_HASHBUST_1 + __SCC_HASHBUST_2 + __SCC_HASHBUST_3 + __SCC_HASHBUST_4 + __SCC_HASHBUST_5 ) == 2 describe SCC_HB3 3 different hash-busters meta SCC_HB3 ( __SCC_HASHBUST_1 + __SCC_HASHBUST_2 + __SCC_HASHBUST_3 + __SCC_HASHBUST_4 + __SCC_HASHBUST_5 ) == 3 describe SCC_HB4 4 different hash-busters meta SCC_HB4 ( __SCC_HASHBUST_1 + __SCC_HASHBUST_2 + __SCC_HASHBUST_3 + __SCC_HASHBUST_4 + __SCC_HASHBUST_5 ) == 4 describe SCC_HB5 5 different hash-busters meta SCC_HB5 ( __SCC_HASHBUST_1 + __SCC_HASHBUST_2 + __SCC_HASHBUST_3 + __SCC_HASHBUST_4 + __SCC_HASHBUST_5 ) == 5 ifplugin Mail::SpamAssassin::Plugin::MIMEHeader mimeheader __SCC_BOGUS_CTE_1 Content-Transfer-Encoding =~ /^Hexa/i meta SCC_BOGUS_CTE_1 __SCC_BOGUS_CTE_1 describe SCC_BOGUS_CTE_1 Bogus Content-Transfer-Encoding header tflags SCC_BOGUS_CTE_1 publish endif ifplugin Mail::SpamAssassin::Plugin::MIMEHeader mimeheader __SCC_CTMPP Content-Type =~ /multipart\/parallel/ describe SCC_CTMPP Uncommon Content-Type meta SCC_CTMPP __SCC_CTMPP tflags SCC_CTMPP publish endif describe __SCC_MIME_BOUNDARY2 Idiosyncratic boundary-like string rawbody __SCC_MIME_BOUNDARY2 /^ *(?:----[[:alnum:]]{8};[[:alnum:]]{6}){2}$/m tflags __SCC_MIME_BOUNDARY2 multiple maxhits=7 describe SCC_MIME_BOUNDARY2 Fake boundary-like string >5 times meta SCC_MIME_BOUNDARY2 __SCC_MIME_BOUNDARY2 > 5 describe HTML_BADATTR Illegal char in HTML attribute name rawbody HTML_BADATTR /<[a-z]{1,10}\s[^>]{1,80}\/(?:src|href)\s*\=/ score HTML_BADATTR 1 tflags HTML_BADATTR publish describe T_LINKED_SCRAPE Claims to have scraped LinkedIN body T_LINKED_SCRAPE /I came across your profile on LinkedIn/ tflags T_LINKED_SCRAPE publish