#!/usr/bin/perl -w

#
# $Id: eliminate-dups.pl,v 1.2 1999/01/06 01:05:59 psamuel Exp $
#
# qmail duplicate eliminator
#
# Add the following to your ~/.qmail file
#
#    | eliminate-dups hashfile
#
# Original by Russ Nelson. This version by Peter Samuel.
#
# This version uses a dbm file instead of a text file. The original
# used flock to ensure the text file was locked. This version doesn't in
# the belief that Perl's dbm file stuff will be sufficiently fast. If it
# isn't you might end up with an occasional duplicate message.
#
# Thanks to Fred Lindberg for user feedback and Stuart Cooper for some
# Perl tips.

###########################################################################

require 5;
use strict;

###########################################################################
#
# If the MD5 module is installed, use it. Otherwise, use a simple
# checksum via pack(). Because "use" is a compile time directive, we
# need to surround it with a BEGIN block to avoid compile time errors if
# MD5.pm cannot be found. MD5 is preferred as it gives a better chance
# of generating unique checksums.

my($checksum);

BEGIN {
    $checksum = 1;		# Assume MD5 is not available

    foreach (@INC)
    {
	if (-f "$_/MD5.pm")
	{
	    $checksum = 0;
	    use MD5;
	    last;
	}
    }
}

###########################################################################
#
# Process arguments. The only argument we expect is the name of the
# user's hash file.

my($hashname) = shift;
$hashname = ".maildups" unless $hashname;

###########################################################################
#
# Read in the headers

my($loose) = 1;			# loose matching if set.

my($headers);
my($ignore_continue) = 0;
my($keep_continue) = 0;

while(<>)
{
    last if /^$/;
    next if $ignore_continue && /^\s/;

    $ignore_continue = 0;

    if (/^received:/i)
    {
	$ignore_continue = 1;
	next;
    }

    if (! $loose)
    {
	$headers .= $_;
	next;
    }

    if ($keep_continue && /^\s/)
    {
	$headers .= $_;
	next;
    }

    $keep_continue = 0;

    if (m/^(from|message-id|date):/i)
    {
	$headers .= $_;
	$keep_continue = 1;
	next;
    }

    next;
}

###########################################################################
#
# Calculate the checksums for the headers and the message body

my($hash);

if ($checksum)
{
    undef $/;
    $hash = &checksum($headers, <STDIN>);
}
else
{
    my($md5) = new MD5;
    $md5->add($headers);
    $md5->addfile(\*STDIN);
    $hash = $md5->hexdigest;
}

###########################################################################
#
# Compare the hash value with the dbm file(s)

my(%HASH);

dbmopen(%HASH, "$hashname", 0644) ||
    &fatal("could not open dbm file $hashname");

if (defined $HASH{$hash})
{
    # Message is a duplicate - tell qmail to stop processing the
    # .qmail file.

    dbmclose(%HASH) || &fatal("could not close dbm file $hashname");
    exit(99);
}

# Message is new, add its checksum to the list of seen messages

my($now) = time;
$HASH{$hash} = $now;

# Remove old entries

my($ttl) = 3600 * 24 * 7;		# time to live - 7 days in seconds

for (keys %HASH)
{
    delete $HASH{$_} if (($now - $HASH{$_}) > $ttl);
}

dbmclose(%HASH) || &fatal("could not close dbm file $hashname");

###########################################################################
#
# Success - tell qmail to continue processing the .qmail file.

exit(0);

###########################################################################
###########################################################################

sub checksum
{
    return unpack("%32C*", join("", @_));
}

sub fatal
{
    # Transient error. Print to STDERR so that qmail will log the problem.

    my($msg) = @_;

    print STDERR
	"eliminate_dups: fatal: $msg\n";

    exit(111);
}
