#! /usr/bin/perl

    #	Break a monolithic mail folder into a directory
    #	with one message per file.
        
    #	by John Walker  http://www.fourmilab.ch/
    #	    	   September 2002

    if ($#ARGV < 1) {
    	print("Usage: perl fragmail.pl mail_folder_file target_directory\n");
	exit(2);
    }
    
    if (!(-d $ARGV[1])) {
    	die "Target directory $ARGV[1] does not exist";
    }
    
    $totalMessages = 0;
    
    $inf = $ARGV[0];
    if ($inf =~ m/\.gz$/) {
    	open(IN, "zcat $inf |") || die "Cannot open gzipped input file $inf";
    } else {
    	open(IN, "<$inf") || die "Cannot open input file $inf";
    }
    
    while (($l = <IN>)) {
    	if ($l =~ m/^From /) {
    	    last;
	}
    }
    
    if (!$l) {
    	print("No messages in mail folder!\n");
	exit(0);
    }
    
    $eof = 0;
    
    while (!$eof) {
    
    	#   Read next message from mail folder.  At
	#   this point $l contains the first ("From ")
	#   line of the message.
	
	$nlines = 0;
	undef @message;
	$msize = length($l);
	&trim_end_of_line;
	$message[$nlines++] = $l;
	
	#   Read the balance of the message into the
	#   @message array.  Quit when the "From " line
	#   of the next message is encountered or the
	#   end of the folder is encountered.
	
	while ($l = <IN>) {
	    if ($l =~ m/^From /) {
		last;
	    } else {
	    	&trim_end_of_line;
	    	$message[$nlines++] = $l;
	    	$msize += length($l);
    	    }
	}
	if ($nlines > 0) {
	    &dispose_of_message;
	}
	$eof = !($l);
# if ($bail++ > 20) { $eof = 1; }  # Quick bail-out for testing
    }
    
    #	Compute and display aggregate statistics
    
    print("Total messages: $totalMessages\n");

#   Dispose of the message in the @message
#   array.
    
sub dispose_of_message {
    local($i, $from, $to, $subject, $disp,
    	  $afrom, $act, $oname);
    
    #	Parse message header for "interesting" items
    
    for ($i = 0; $i < $nlines; $i++) {
    	if ((length($message[$i]) == 0) ||
	    ($message[$i] =~ m/^\s*$/)) {
#print("Bail on line $i\n");
	    last;
	}
	
	if ($message[$i] =~ m/^From\s/) {
	    if (!($message[$i] =~ m/^From\s+-\s+/)) {
	    	$from = $message[$i];
	    } else {
	    	$afrom = $message[$i];
	    }
	}
	elsif ((!defined($from)) && ($message[$i] =~ m/^From:\s/)) {
	    $from = $message[$i];
	}
	elsif ($message[$i] =~ m/^To:\s/) {
	    $to = $message[$i];
	}
	elsif ($message[$i] =~ m/^Subject:\s/) {
	    $subject = $message[$i];
	}
	elsif ($message[$i] =~ m/^Date:\s/) {
	    $date = $message[$i];
	}
    }
    if (!defined($from)) {
    	$from = $afrom;
    }
    
    
    $totalMessages++;
    
    #	If a file with the proposed message number already
    #	exists, advance the message number until we find
    #	one which avoids collision.  This allows dumping
    #	messages from multiple folders into the same target
    #	directory with multiple runs of this program.
    
    while (-f "$ARGV[1]/message_$totalMessages") {
    	$totalMessages++;
    }
    
    $oname = "$ARGV[1]/message_$totalMessages";
    open(OF, ">$oname") || die
    	"Cannot create $ARGV[1]/message_$totalMessages";
	
    for ($i = 0; $i < $nlines; $i++) {
    	print(OF "$message[$i]\n");
    }
    close(OF);
print("$ARGV[1]/message_$totalMessages: $from\n");
    
}

#   Trim end of line sequences from line

sub trim_end_of_line {
    $l =~ s/(.*)([\r\n]+)$/\1/;
}
