#!/usr/bin/perl -w # mboxget - get a message from a mailbox or maildir, from mass-check output # # usage: mboxget [-noannotate] [mass-check-mbox-or-file-id ...] # # example: # # grep SUBJECT_FREQ spam.log | ./mboxget | grep Subject: # # <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; my $prog = $0; $prog =~ s@.*/@@; sub mywarn; use Getopt::Long; use vars qw($opt_noannotate); GetOptions("noannotate"); my $annotate = ($opt_noannotate ? 0 : 1); my @inputs = @ARGV; while (<>) { s/^[^\s:]+://; # filenames, from "grep foo *" if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) { # mass-check format handle_input($1); } else { next if /^#/; chomp; handle_input($_); } } exit; sub handle_input { my $where = shift; my ($file, $offset); if (-f $where) { ($file, $offset) = ($where); } else { ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/); } if ($file =~ /\.gz$/) { open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!"; } elsif ($file =~ /\.bz2$/) { open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!"; } else { open (INPUT, "<$file") or mywarn "open $file failed: $!"; } if ($offset) { # TODO: steal open-file caching code from old revisions of # mass-check-results-to-mbox if (!seek(INPUT, $offset, 0)) { mywarn "$prog: seek $offset failed: $!\n"; close INPUT; return; } } # read the message into @msg my $past = 0; my @msg = (); while () { if ($past && $offset) { # only do this for mboxes last if substr($_,0,5) eq "From "; } else { $past = 1; } push (@msg, $_); } close INPUT; # now chop off the leading headers that may have come from a previous # run, or will interfere with insertion of the X-Mass-Check-Id hdr my $fromline = "From nobody\@nowhere Wed Jan 1 00:00:00 2000\n"; while (scalar @msg > 0 && $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /) { if ($msg[0] =~ /^From /) { $fromline = $msg[0]; } shift @msg; } # and output $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n"); print $fromline, @msg, "\n"; } sub mywarn { warn @_; if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; } }