#!/usr/bin/env perl
## refdbrtf - combines a source RTF document and a bibliography file
## citations must be written as [[cit-Z]], multi-head citations as
## [[cit1-Z][cit2-Z]...], where cit, cit1, cit2 are citation keys from
## a RefDB database, and -Z is one of -X, -Q, -A, -S, and -Y, denoting
## the type of the citation
## usage: refdbrtf -b bibfile.rtf < source.rtf > combined.rtf

## markus@mhoenicka.de 2008-02-01

##   This program is free software; you can redistribute it and/or modify
##   it under the terms of the GNU General Public License as published by
##   the Free Software Foundation; either version 2 of the License, or
##   (at your option) any later version.
##   
##   This program is distributed in the hope that it will be useful,
##   but WITHOUT ANY WARRANTY; without even the implied warranty of
##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##   GNU General Public License for more details.
   
##   You should have received a copy of the GNU General Public License
##   along with this program; if not, see <http://www.gnu.org/licenses/>

use warnings;

## use this module to read command line options
use Getopt::Std;

my $bibfile;
my $terminator = "";
my %opts;

## the switches are:
## -h: prints help
getopts('b:h', \%opts);

## loop over all command line options
while (($key, $value) = each %opts) {
    if ($key eq "b") {
	$bibfile = $value;
    }
    elsif ($key eq "h") {
	print "refdbrtf reads a RTF document from stdin and scans it for RefDB citations (such as \"[[Miller1999]]\" or \"[[Miller1999][Doe2001]]\"). These citations are replaced with crosslinks to appropriate entries in a bibliographic listing, which must be provided in RTF format using the -b option. The bibliography will be inserted in the document, and the combined document is written to stdout.\n";
	exit(0);
    }
}

if (!defined($bibfile)) {
    print STDERR ("error: no bibliography file provided\n");
    exit(1);
}

## read the bib file and create a hash that keeps a display text handy
## for each linkend
open BIBFILE, "<$bibfile"
    or die "error: could not open bibliography file\n";

my %displaytexts;

while (<BIBFILE>) {
    chomp;
    ## all in-text citation strings are hidden paragraphs. Extract a hash
    ## which associates the display texts with their linkends. Display
    ## texts may be undefined, e.g. in references w/o authors
    if (/^\\v/) {
	my $linkend = $_;
	my $displaytext = $_;

	$linkend =~ s/.*\\bkmkstart ([^-]+-.|IM\d+).*/$1/;
	$displaytext =~ s/.*\\bkmkstart[^\}]+\}(.*)\{\\\*\\bkmkend.*/$1/;
	$displaytexts{"$linkend"} = $displaytext;
	#print STDERR ("displaytext of linkend $linkend is $displaytext\n");
    }
}

my %refs;

## counter for multiple references. RefDB uses a 1-based index
my $multicounter = 1;

## read data from stdin
while (<>) {
    # retain terminating }, if any, and emit it before the next loop
    # to allow inserting bib in front of the } which terminates the
    # document. The last bracket is discarded as the bib
    # file adds one anyway
    print $terminator;
    if (s/\}$//) {
	$terminator="}";
    }
    else {
	$terminator="";
    }

    ## split each line into tokens separated by citations. As the regular
    ## expression used in split() contains parentheses, the citations
    ## are returned too as members of the result array
    my @citations = split(/(\[\[.+?\]\])/);

    ## loop over all tokens and use only the citations for further processing
    foreach my $citation (@citations) {
	if ($citation =~ /\[\[.+\]\]/) {
	    ## remove outer pair of square brackets
	    $citation =~ s/^\[(.+)\]$/$1/;
	    #print STDERR "citation went to >>" . $citation . "<<\n";

	    ## split into individual references
	    my @xrefs = split(/\[/, $citation);

	    ## remove leading empty item
	    shift(@xrefs);
	    #print "xrefs went to " . scalar @xrefs . "\n";

	    if (scalar @xrefs > 1) {
		## multi-head reference. We have to find the bookmark name
		## of the appropriate hidden entry in the bib file.
		my $im;

		$im = $multicounter++;
		$linkend = "IM$im";
		$displaytext = $displaytexts{"$linkend"};
		if (!defined($displaytext)) {
		    $displaytext = "";
		}

		#print STDERR ("displaytext of linkend $linkend is $displaytext\n");
		## reuse linkend
		$linkend = $xrefs[0];
		$linkend =~ s/(.+)\]$/ID$1/;
		## remove trailing square bracket and turn it in
		## a RTF hyperlink
		print "\{\\field\{\\\*\\fldinst\{ HYPERLINK \\\\l \"$linkend\" \}\}\{\\fldrslt\{\\cf3\\ul $displaytext\}\}\}";
	    }
 	    else {
		## single reference
		## isolate the linkend and find the matching display text
		my $brief;
		$xref = $xrefs[0];
		$linkend = $xref;
		$linkend =~ s/(.+)\]$/ID$1/;

		## check for first or subsequent reference by removing the
		## -Z specifier
		$brief = $linkend;
		$brief =~ s/(.+)-./$1/;
		if (exists($refs{"$brief"})) {
		    ## fix any "out of order" references. Should be subsequent
		    $linkend =~ s/(.+-)X/${1}S/;
		    $linkend =~ s/(.+-)A/${1}Q/;
		}
		else {
		    ## fix any "out of order" references. Should be first
		    ## add the brief linkend to the hash
		    $refs{"$brief"} = 1;
		    $linkend =~ s/(.+-)Q/${1}A/;
		    $linkend =~ s/(.+-)S/${1}X/;
		}
		$displaytext = $displaytexts{"$linkend"};
		if (!defined($displaytext)) {
		    $displaytext = "";
		}
		#print STDERR ("displaytext of linkend $linkend is $displaytext\n");
		## remove trailing square bracket and turn it in
		## a RTF hyperlink
		$xref =~ s/(.+)-.\]$/\{\\field\{\\\*\\fldinst\{ HYPERLINK \\\\l \"ID$1-b\" \}\}\{\\fldrslt\{\\cf3\\ul $displaytext\}\}\}/;
		print $xref;
	    }
	}
	else { # pass through any text outside of a citation
	    print $citation;
	}
    }
}

# read and insert bibliography file
my $counter = 0;

# reopen the file to get back to the start
open BIBFILE, "<$bibfile"
    or die "error: could not open bibliography file\n";

while (<BIBFILE>) {
    ## skip RTF header (first two lines in refdbib output)
    if ($counter > 1) {
	## skip the hidden entries
	unless (/^\\v /) {
	    print $_;
	}
    }
    $counter++;
}

## the end
exit 0;


