#!/usr/bin/perl
#
# Replacement for the elm frm(1) program which is increasingly
# disappearing from computers as elm is superceded by other, more
# recent and less useful, programs.
#
# Bruce Murphy, rattus.net software
# November 2003

# pod documentation for this program.

use strict;

# debugging levels.
my ($DEBUG) = 0;		# general debugging output.

# general variables.
my (@tokens);

# We should make rfrm behave just like frm if it's called with that name.
my ($compat) = 0;

print "called as '$0'\n" if $DEBUG;
#@tokens = split /\//, $0;
#if ($tokens[$#tokens] eq "frm") {
if ($0 =~ /(^|\/)frm$/) {
    print "Using compatibility mode.\n" if $DEBUG;
    $compat = 1;
}

# Things we'll be looking for within the headers to create our summary
# listing. The header token in question and the element name to store
# it under in the summary table.
my (%headerelements) = ( "Subject:" => "subject",
			 "From:"    => "sender1",
			 "From "    => "sender2",
			 "Date: "   => "date" );

# Formatting options for the various summary format we have available.
my (%formats) = ( "standard" => { sender => 20, subject => 58 } );


# input file selection routine. For the time being we'll either take
# one or more files from the command-line, or if we aren't given any,
# try to deduce something from the MAIL environment variable.
#
# printheader, firstheader are variables we'll use to print some
# optional information about what file we're currently looking at.
if (@ARGV > 0) {
    # scan each of the items from the command line.
    my ($printheader) = 1 if @ARGV > 1;
    my ($firstheader) = 1;
    my ($mailhandle);

    foreach my $mailfile (@ARGV) {
	undef $mailhandle;

	if ($mailfile eq "-") {
	    # traditional UNIX special case.
	    $mailhandle = "STDIN";
	} elsif (! open ($mailhandle, "$mailfile")) {
	    printf STDERR "Unable to open file %s: %s\n", $mailfile, $!;
	    next;
	}
	if ($printheader) {
	    # hack to leave extra blank dividing lines between files
	    # without causing any extra blank space.
	    if ($firstheader) {
		$firstheader = 0;
	    } else {
		print "\n";
	    }
	    printf "  Mail File: %s\n", $mailfile;
	}
	
	scanfile_mailbox($mailhandle);
    }
} else {
    # no mail files were specified on the command line. Try the
    # environment variable and then give up.
    my $mailhandle;

    if (exists $ENV{'MAIL'} and -r $ENV{'MAIL'}) {
	undef $mailhandle;
	if (open($mailhandle, $ENV{'MAIL'})) {
	    scanfile_mailbox($mailhandle);
	} else {
	    printf "Unable to open %s: %s\n", $ENV{'MAIL'}, $!;
	}
    } else {
	printf STDERR "No mail files specified and \$MAIL not " .
	    "defined or accessible\n";
	exit 1;
    }
    
}

# This is our central subroutine which is responsible for grovelling
# through a mailbox file in some format or other, pulling out the
# appropriate headers and then feeding them to our summary function.
#
# This one recognises the generic Berkeley mailbox format used
# wherever sensible mailreaders are found.
sub scanfile_mailbox {
    my ($file) = @_;

    my ($inheader, $elements, $lastelement);
    my ($token, $data);

    $inheader = 0;
    $elements = { };
    while (<$file>) {
	chomp;

	if (/^From /) {
	    if ($inheader) {
		# we think we're already in a header. Print what we've
		# got and start a new one.
		printsummary($elements);
		$elements = {};
		$lastelement = "";
	    } else {
		$inheader = 1;
	    }

	    # this wouldn't ordinarily make it through to the general
	    # header processing steps in the next stanza.
	    ($data) = $_ =~ /^From (.*)/;
	    $elements->{$headerelements{"From "}} = $data;

	    # we can't skip this line, we probably want it for our
	    # header collection as a backup sender....
	}

	if ($inheader) {
	    if (/^$/) {
		# end of header reached.
		printsummary($elements);
		$elements = { };
		$inheader = 0;
		$lastelement = "";
		next;
	    } elsif (($token, $data) = $_ =~ /^([-a-zA-Z0-9]+[: ])\s(.*)/ ) {
		# start of a normal header line.
		# Let's check to see whether it's one of the headers we're
		# storing for later.
		if (exists $headerelements{$token}) {
		    $lastelement = $headerelements{$token};
		    $elements->{$lastelement} = $data;
		} else {
		    $lastelement = "";
		}
		
	    } elsif (/^\s/) {
		# handle continuations.
		if ($lastelement) {
		    s/^\s+//; # strip whitespace.
		    $elements->{$lastelement} .= $_;
		}
	    }
	}
	
    }
    if ($inheader) {
	printsummary($elements);
	print "Truncated mail file.\n" if $DEBUG;
    }
}

# This function takes a hash which may or may not have the elements
# defined in the headerelements hash above and prints out a summary.
sub printsummary {
    my ($elements) = @_;

    my ($subject, $sender);
    my ($formatstr);

    # build subject element.
    if (exists $elements->{'subject'}) {
	$subject = $elements->{'subject'};
	$subject = cleanline($subject, {whitespace => 1,
				       compresswhite => 1,
				       trimends => 1,
				       stripcontrol => 1,
				       squashhigh => 1 });

	$subject = substr($subject, 0, $formats{'standard'}->{'subject'});
    } else {
	$subject = "";
    }

    # build sender element. It would be nice to have a better-designed
    # fall-through here so that a From: header which doesn't yield
    # anything useful won't stop us looking at the envelope.
    if (exists $elements->{'sender1'}) {
	# grab from the From: header line.
	$sender = getsender($elements->{'sender1'});
#	$sender =~ y/\t/ /;                 # whitespace
#	$sender =~ y/\000-\039//d;          # bad characters.
	$sender = substr($sender, 0, $formats{'standard'}->{'sender'});
    } elsif (exists $elements->{'sender2'}) {
	# last-ditch at reading the envelope. (Bearing in mind that
	# many mail systems throw this away)
	$sender = $elements->{'sender2'};
	($sender) = $sender =~ /(\S+@\S+)/;
	$sender = cleanline($sender, {stripcontrol => 1,
				     squashhigh => 1});
	if (!$sender) {
	    $sender = "(no sender)";
	}
    } else {
	$sender = "(no sender)";
    }
    
    # This should probably be done more globally.
    $formatstr = sprintf ("%%-%ds %%-%ds\n",
			  $formats{'standard'}->{'sender'},
			  $formats{'standard'}->{'subject'});

    printf $formatstr, $sender, $subject;
}

############
# small utility functions that have probably been written many times
# before, but which I'm forced to write again becuase I can't find the
# others right now.

# getsender($fromheader, $fromenv)
#
# extracts a best guess sender from the two froms, with particular
# preference to the real name parts. Understands various real-name
# encoding formats as found off in the wider internet.
sub getsender {
    my ($fromheader, $fromenv) = @_;

    my ($sender);

#    print STDERR "getsender called with '$fromheader'\n" if $DEBUG;
    
    if ($fromheader =~ /<[^>]+@[^>]+>/ ) {
	# Real Name <email@address.com>
	# <email@address.com>
	$sender = $fromheader;
	$sender =~ s/<[^>]+@[^>]+>//;
	$sender =~ y/"//d;
    } elsif ($fromheader =~ /@.*\(.*\)/ or $fromheader =~ /\(.*\).*@/ ) {
	# email@address.com (Real Name)
	# or reversed variants.
	
	($sender) = $fromheader =~ /\((.*)\)/;
    } else {
	$sender = $fromheader;
    }

    $sender = cleanline($sender, {whitespace => 1,
				  compresswhite => 1,
				  trimends => 1,
				  stripcontrol => 1});

    if (! $sender or $sender =~ /^\s+$/) {
	$sender = "(no sender)";
    }
    return $sender;
}

# cleanline($string, $opts)
#
# Our general string cleaning function. We want to be sure that no
# nasty characters manage to get through to the general string
# handling stuff. Options is a hash ref with significant keys and can
# include:
#
# whitespace:     converts all whitespace \r \n \t to spaces.
# compresswhite:  removes duplicate adjacent whitespace
# trimends:       removes leading and trailing whitespace.
# stripcontrol:   strips all control (mostly \000-\039) characters.
# striphigh:      removes all \200 and above characters.
# squashhigh:     replaces all \200 and above chars with '_'.
#
# multiple options are executed in the order specified above.
#
# example cleanline($string, {whitespace => 1, trimends => 1})
sub cleanline {
    my ($instring, $opts) = @_;
    my ($outstring) = $instring;

    if (not defined $opts) {
	warn "cleanline called without options on '$instring'";
	return $outstring;
    }

    if (exists $opts->{'whitespace'}) {
	$outstring =~ y/\r\t\n/   /;
    }
    if (exists $opts->{'compresswhite'}) {
	$outstring =~ y/\r\t\n /\r\t\n /s;
    }
    if (exists $opts->{'trimends'}) {
	$outstring =~ s/^\s+//;
	$outstring =~ s/\s+$//;
    }
    if (exists $opts->{'stripcontrol'}) {
	$outstring =~ y/\000-\037/ /;
    }
    if (exists $opts->{'striphigh'}) {
	$outstring =~ y/\200-\377//d;
    }
    if (exists $opts->{'squashhigh'}) {
	$outstring =~ y/\200-\377/_/;
    }

    return $outstring;
}
