#!/usr/bin/perl -w -i.bak # # uniq-mailbox mbox [...] # # removes duplicate mails found in any of the mailboxes, rewriting # them as it goes. backups saved as mbox.bak if any modifications # are made. # # support for .tar and .gz mboxes: none. unpack them first ;) ########################################################################### use English; my $lastargv = ''; my $found_dups = 0; my $num_mails = 0; my $skipmail = 0; my %seen = (); while (<>) { if ($lastargv ne $ARGV) { report(); $found_dups = 0; $num_mails = 0; $lastargv = $ARGV; } if (/^From /) { $in_header = 1; $skipmail = 0; @lines = (); } if ($skipmail) { next; } if ($in_header) { push (@lines, $_); if (/^Message-[iI][dD]: (.*)$/) { if (exists $seen{$1}) { $found_dups++; @lines = (); $skipmail = 1; } $seen{$1} = 1; $num_mails++; } if (/^$/) { if (!$skipmail) { print @lines; @lines = (); } $in_header = 0; } } else { print; } } report(); exit; sub report { return unless ($lastargv =~ /\S/); if ($found_dups) { warn "found $found_dups duplicate mails: $lastargv ($num_mails messages)\n"; } else { warn "no duplicate mails: $lastargv ($num_mails messages)\n"; unlink ("$lastargv.bak"); # not needed } }