#!/usr/bin/perl # # uniq-maildirs [-m grepfile] mdir1 mdir2 ... # # Run through all maildirs specified on the command line, searching # recursively (and through subdirs) reading each file for a Message-I[dD] # header. Files that share the same Message-Id will be listed on stdout # in sh command format: "rm -f FILENAME" so the output can be run # as a script. # # If the -m arg is used, the file named will be parsed for Message-IDs # in UNIX "grep" format: e.g. "filename:Message-Id: blah" use File::Find; $| = 1; if (defined $ARGV[0] && $ARGV[0] eq '-m') { shift @ARGV; my $msgidsfile = shift @ARGV; %file = (); $count = 0; open (IN, "<$msgidsfile") or die "cannot read msgids from $msgidsfile"; while () { /^([^:]+):Message-I.: (\S+)/ or next; $f = $1; $m = $2; $m =~ s/^$//; $file{$m} = $f; $count++; progress ($count, "m"); } warn "\nfound $count message-ids.\n"; } $count = 0; $dups = 0; foreach my $dir (@ARGV) { File::Find::find ( { wanted => \&wanted, no_chdir => 1 }, $dir); } warn "\nscanned $count mails, $dups dups.\n"; sub wanted { local ($_); $count++; open (IN, "<$File::Find::name") or warn "cannot read $File::Find::name"; while () { /^$/ and last; /^Message-I[dD]: (\S+)/ or next; my $m = $1; $m =~ s/^$//; if (exists $file{$m}) { print "\n# DUP: $File::Find::name dup of $file{$m}\n"; print "rm -f $File::Find::name\n"; $dups++; progress ($count, "*"); } else { $file{$m} = $File::Find::name; progress ($count, "."); } last; } close IN; } sub progress { my ($c, $sym) = @_; if (($c % 500) == 0) { print STDERR $sym; } if (($c % (500*70)) == 0) { print STDERR "\n"; } }