#!/usr/bin/perl -w # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use warnings; sub mywarn; use Getopt::Long qw(:config bundling auto_help); use Pod::Usage; use vars qw($opt_noannotate $opt_h $opt_ids); GetOptions("noannotate" => \$opt_noannotate, "ids" => \$opt_ids, "h|headers" => \$opt_h ); =head1 NAME mboxget - Extract message(s) from an mbox (or other mail file) =head1 SYNOPSIS mboxget [options] ... Options: -h, --headers Display only message headers --ids List only the IDs, do not generate an mbox --noannotate Do not add X-Mass-Check-Id header with message path =head1 DESCRIPTION B extracts and displays the messages specified on the command line and on STDIN. Messages are expected to be listed as . or simply . Multiple messages can be specified, in which case, each message will be output. This script is very useful in combination with mass-check logs and mboxes. Targets given on STDIN may be in B format, and may even be preceeded by a filename and a colon (like the output of B would provide) Use the B<-h> option to just output headers. By default an X-Mass-Check-Id header is added to make it easier to find a message. Use B<--noannotate> to disable this. =head1 EXAMPLES To show spam messages that hit the rule BAYES_99 grep BAYES_99 masses.log | mboxget To show the message indicated by "/path/to/my/mbox.1234" mboxget /path/to/my/mbox.1234 To find the Subject lines of all spam messages that hit SUBJ_AS_SEEN grep SUBJ_AS_SEEN spam.log | mboxget -h | grep ^Subject: To get the ids or paths of messages that hit SUBJ_AS_SEEN grep SUBJ_AS_SEEN spam.log | mboxget --ids =cut my $prog = $0; $prog =~ s@.*/@@; my $annotate = ($opt_noannotate ? 0 : 1); foreach (@ARGV) { handle_input($_); } while (<>) { s/^[^\s:]+://; # filenames, from "grep foo *" if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) { # mass-check format handle_input($1); } else { next if /^#/; chomp; handle_input($_); } } exit; sub handle_input { my $where = shift; if ($opt_ids) { print "$where\n"; return; } my ($file, $offset); if (-f $where) { ($file, $offset) = ($where); } else { ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/); } if ($file =~ /\.gz$/) { open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!"; } elsif ($file =~ /\.bz2$/) { open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!"; } else { open (INPUT, "<$file") or mywarn "open $file failed: $!"; } if ($offset) { # TODO: steal open-file caching code from old revisions of # mass-check-results-to-mbox if (!seek(INPUT, $offset, 0)) { mywarn "$prog: seek $offset failed: $!\n"; close INPUT; return; } } # read the message into @msg my $past = 0; my @msg = (); while () { if ($past && defined($offset)) { # only do this for mboxes last if substr($_,0,5) eq "From "; } else { $past = 1; } if ($opt_h) { last if /^$/; } push (@msg, $_); } close INPUT; # now chop off the leading headers that may have come from a previous # run, or will interfere with insertion of the X-Mass-Check-Id hdr my $fromline = "From nobody\@nowhere Wed Jan 1 00:00:00 2000\n"; while (scalar @msg > 0 && $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /) { if ($msg[0] =~ /^From /) { $fromline = $msg[0]; } shift @msg; } # and output $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n"); print $fromline, @msg, "\n"; } sub mywarn { warn @_; if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; } }