#!/usr/bin/perl # # split-log-into-buckets [n] # # Split a mass-check log into n identically-sized buckets, evenly # taking messages from all checked corpora and preserving comments. # It does this evenly by running through all buckets sequentially # as each line is read. n defaults to 10 my $numbuckets = 0; if (defined $ARGV[0]) { $numbuckets = $ARGV[0]+0; } $numbuckets ||= 10; my %buckets = (); foreach my $i (1 .. $numbuckets) { print "Creating split-$i.log\n"; open ($buckets{$i}, ">split-$i.log"); } while () { select $buckets{1+int(rand()*$numbuckets)}; print $_; } foreach my $i (1 .. $numbuckets) { close $buckets{$i}; }