#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;
##
##  htmlfix -- Fixup HTML markup code
##  Copyright (c) 1997-2000 Ralf S. Engelschall, All Rights Reserved.
##  Copyright (c) 2000 Denis Barbier
##

use strict;
use warnings;

use lib "/usr/lib/wml";
use lib "/usr/lib/wml";

use Getopt::Long 2.13;
use Image::Size;
use IO::File 1.06;

#
#   process command line
#
sub usage {
    print STDERR "Usage: htmlfix [options] [file]\n";
    print STDERR "\n";
    print STDERR "Options:\n";
    print STDERR "  -o, --outputfile=<file>  set output file instead of stdout\n";
    print STDERR "  -F, --fix=<fixes>        select which fix to apply\n";
    print STDERR "  -S, --skip=<fixes>       skip specified fixes\n";
    print STDERR "  -v, --verbose            verbose mode\n\n";
    print STDERR "Fixes are a comma separated list of (default is to process them all)\n";
    print STDERR "  imgalt : add ALT attributes to IMG tags\n";
    print STDERR "  imgsize: add WIDTH/HEIGHT attributes to IMG tags\n";
    print STDERR "  summary: add SUMMARY attribute to TABLE tags\n";
    print STDERR "  center : change proprietary CENTER tag to standard DIV tag\n";
    print STDERR "  space  : fix trailing spaces in tags\n";
    print STDERR "  quotes : add missing quotes for attributes and missing '#' character\n           to color attributes\n";
    print STDERR "  indent : indent paragraphs\n";
    print STDERR "  comment: out-comment tags\n";
    print STDERR "  tagcase: perform tag case-conversion\n";
    exit(1);
}

use vars qw($opt_v);
$opt_v = 0;
my $opt_o = '-';
my $opt_F = 'imgalt,imgsize,summary,center,space,quotes,indent,comment,tagcase';
my $opt_S = '';
my $opt_q;
$Getopt::Long::bundling = 1;
$Getopt::Long::getopt_compat = 0;
if (not Getopt::Long::GetOptions(
    "v|verbose" => \$opt_v,
    "q" => \$opt_q,
    "F|fix=s" => \$opt_F,
    "S|skip=s" => \$opt_S,
    "o|outputfile=s" => \$opt_o)) {
    usage();
}

sub verbose {
    my ($str) = @_;
    if ($opt_v) {
        print STDERR "** HTMLfix:Verbose: $str\n";
    }
}

sub error {
    my ($str) = @_;
    print STDERR "** HTMLfix:Error: $str\n";
    exit(1);
}

sub warning {
    my ($str) = @_;
    if (not $opt_q) {
        print STDERR "** HTMLfix:Warning: $str\n";
    }

    return;
}

#
#   read input file
#
use vars qw( $buffer );

BEGIN { $buffer = ''; }

use WML_Backends;

$buffer = WML_Backends->input(\@ARGV, \&error, \&usage);

if (!defined($buffer)) { die "Egloo Buffer is undef." }

#
#   processing loop
#
use vars qw($bytes);

BEGIN { $bytes = 0; }

#
#   Definitions of fixups
#   Some attention has been paid for efficiency in regular expressions,
#   this is why they appear more complicated than needed.
#

#
#   FIXUP 1: add WIDTH/HEIGHT/ALT attributes to <img>-tags
#
sub ProcessImgTag {
    my ($attr) = @_;
    my ($image, $width, $height, $scale);
    my ($Nwidth, $Nheight, $Pwidth, $Pheight);

    if (   $attr =~ m|SRC\s*=\s*"([^"]*)"|is
            or $attr =~ m|SRC\s*=\s*(\S+)|is    ) {
        $image = $1;

        if (my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                $atime,$mtime,$ctime,$blksize,$blocks) = stat($image))
        {
            $bytes += $size;

            #   add WIDTH and HEIGHT to speed up display
            $width  = -1;
            $height = -1;
            $scale  =  1;
            if (   $attr =~ m/WIDTH\s*=\s*([0-9%]+|\*)/is
                    or $attr =~ m/WIDTH\s*=\s*"([0-9%]+|\*)"/is) {
                $width = $1;
            }
            if (   $attr =~ m/HEIGHT\s*=\s*([0-9%]+|\*)/is
                    or $attr =~ m/HEIGHT\s*=\s*"([0-9%]+|\*)"/is) {
                $height = $1;
            }
            if (   $attr =~ s/SCALE\s*=\s*([0-9]+)%//is
                    or $attr =~ s/SCALE\s*=\s*"([0-9]+)%"//is) {
                $scale = $1 / 100;
            }
            if (   $attr =~ s/SCALE\s*=\s*([0-9.]+)//is
                    or $attr =~ s/SCALE\s*=\s*"([0-9.]+)"//is) {
                $scale = $1;
            }
            if ($width  eq '*' or $width  == -1 or
                $height eq '*' or $height == -1   ) {
                if (-f $image) {
                    ($Pwidth, $Pheight) = Image::Size::imgsize($image);

                    #    width given, height needs completed
                    if ((not ($width  eq '*' or $width  == -1)) and
                        ($height eq '*' or $height == -1)     ) {
                        if ($width == $Pwidth) {
                            $Nheight = $Pheight;
                        }
                        else {
                            $Nheight = int(($Pheight/$Pwidth)*$width);
                        }
                    }
                    #   height given, width needs completed
                    elsif ((not ($height eq '*' or $height == -1)) and
                        ($width  eq '*' or $width  == -1)     ) {
                        if ($height == $Pheight) {
                            $Nwidth = $Pwidth;
                        }
                        else {
                            $Nwidth = int(($Pwidth/$Pheight)*$height);
                        }
                    }
                    #   both width and height needs completed
                    elsif (($height eq '*' or $height == -1) and
                        ($width  eq '*' or $width  == -1)    ) {
                        $Nwidth  = $Pwidth;
                        $Nheight = $Pheight;
                    }

                    #   optionally scale the dimensions
                    if ($scale != 1) {
                        $Nwidth  = int($Nwidth  * $scale);
                        $Nheight = int($Nheight * $scale);
                    }

                    #   now set the new values
                    if ($width eq '*') {
                        $attr =~ s|(WIDTH\s*=\s*)\S+|$1$Nwidth|is;
                        verbose("substituting width for $image: ``width=$Nwidth''");
                    }
                    elsif ($width == -1) {
                        $attr .= " width=$Nwidth";
                        verbose("adding width for $image: ``width=$Nwidth''");
                    }
                    if ($height eq '*') {
                        $attr =~ s|(HEIGHT\s*=\s*)\S+|$1$Nheight|is;
                        verbose("substituting height for $image: ``height=$Nheight''");
                    }
                    elsif ($height == -1) {
                        $attr .= " height=$Nheight";
                        verbose("adding height for $image: ``height=$Nheight''");
                    }
                }
                else {
                    #   complain
                    verbose("cannot complete size of $image: file not found");
                    #   and make sure the =* placeholder constructs are removed
                    $attr =~ s|WIDTH\s*=\s*\*||is;
                    $attr =~ s|HEIGHT\s*=\s*\*||is;
                }
            }
        }
    }

    return $attr;
}
sub Fixup_imgalt {
    my $bufferN = '';

    if (!defined($buffer)) { die "CLamm oo Buffer is undef." }

    while ($buffer =~ s|^(.*?)(<[iI][mM][gG]\s+)([^>]+?)(/?>)||s) {
        my ($pre, $tag, $attr, $end) = ($1, $2, $3, $4);
        if (    $attr !~ m|ALT\s*=\s*"[^"]*"|is
            and $attr !~ m|ALT\s*=\s*\S+|is) {
            verbose("adding ALT for image");
            $attr .= ' alt=""';
        }
        $bufferN .= $pre . $tag . $attr . $end;
    }
    $buffer = $bufferN . $buffer;

    return;
}
sub Fixup_imgsize {
    my $bufferN = '';
    while ($buffer =~ s|^(.*?)(<[iI][mM][gG]\s+)([^>]+?)(/?>)||s) {
        my ($pre, $tag, $attr, $end) = ($1, $2, $3, $4);
        $bufferN .= $pre . $tag . ProcessImgTag($attr) . $end;
    }
    $buffer = $bufferN . $buffer;

    return;
}

#
#   FIXUP 2: add summary attribute to <table>-tags
#
sub Fixup_summary {
    verbose("adding summary attribute to <table>");

    my $last = 0;
    my $bufferN = '';
    while ($buffer =~ m|\G(.*?)(<[tT][aA][bB][lL][eE])([^>]*?)(/?>)|gs) {
        $last = pos($buffer);
        my ($pre, $begin, $attr, $end) = ($1, $2, $3, $4);

        #   add a SUMMARY="" tag to make HTML lints happy
        if ($attr !~ m|SUMMARY\s*=|i) {
            $attr .= ' summary=""';
        }
        $bufferN .= $pre . $begin . $attr . $end;
    }
    $buffer = $bufferN . substr($buffer, $last);

    return;
}

#
#   FIXUP 3: change <center>..</center> to <div align=center>..</div>
#
sub Fixup_center {
    verbose("replacing <center>..</center> by <div align=center>..</div>");

    $buffer =~ s|<[cC][eE][nN][tT][eE][rR](\s[^>]*)?>|<div align="center"$1>|g;
    $buffer =~ s|</[cC][eE][nN][tT][eE][rR]>|</div>|g;

    return;
}

#
#   FIXUP 4: fix trailing space in tags
#
sub Fixup_space {
    verbose("trailing space in tags");

    #   Only space characters are removed, neither tabs nor newlines
    $buffer =~ s| +>|>|g;
    $buffer =~ s|([^\s])/>|$1 />|g;

    return;
}

#
#   FIXUP 5: add quotations to attribute values and
#            add missing '#' char to color attributes
#
sub Fixup_quotes {
    verbose("add quotes to attributes");

    my $last = 0;
    my $bufferN = '';
    while ($buffer =~ m|\G(.*?)(<[a-zA-Z_][^>]*>)|sg) {
        $last = pos($buffer);
        my ($prolog, $tag) = ($1, $2);
        $tag =~ s@([A-Za-z_-]+=)([^\s\"\'><\[]+)(\s|/?>)@$1"$2"$3@sg;
        $tag =~ s|([A-Za-z_-]+=")([0-9A-Fa-f]{6}"[\s/>])|$1#$2|sg;
        $bufferN .= $prolog.$tag;
    }
    $buffer = $bufferN . substr($buffer, $last);

    return;
}


#
#   FIXUP 6: paragraph indentation
#
sub ProcessIndentContainer {
    my ($attr, $data) = @_;
    my ($num, $size, $pad, $prefix);

    #   determine amount of padding
    $num  = 0;
    $size = 4;
    $attr =~ s/num\s*=\s*"?(\d+)"?/$num = $1, ''/ige;
    $attr =~ s/size\s*=\s*"?(\d+)"?/$size = $1, ''/ige;

    #   pad the data
    if ($num > 0) {
        $pad = ' ' x ($num * $size);
        $data =~ s/^/$pad/mg;
    }
    elsif ($num == 0) {
        ($prefix) = ($data =~ m|^\n*([ \t]+)|s);
        if (length($prefix) > 0) {
            $data =~ s/^$prefix//mg;
        }
    }
    return $data;
}
sub Fixup_indent {
    verbose("paragraph indentation");

    if ($buffer =~ m|<[iI][nN][dD][eE][nN][tT][\s>]|) {
        my $bufferN = '';
        while ($buffer =~ s|^(.*?)<indent([^>]*)>(.*?)</indent>||is) {
            my ($pre, $attr, $data) = ($1, $2, $3);
            $bufferN .= $pre . ProcessIndentContainer($attr, $data);
        }
        $buffer = $bufferN . $buffer;
    }

    return;
}

#
#   FIXUP 7: out-commenting tags
#
sub Fixup_comment {
    verbose("remove commenting tags");
    $buffer =~ s|<[a-zA-Z_][a-zA-Z0-9-]*#.*?>||sg;
    $buffer =~ s|</[a-zA-Z_][a-zA-Z0-9-]*#>||sg;

    return;
}

#
#   FIXUP 8: tag case translation
#
sub doit_upper {
    my ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\U$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|\/?>|\s)/\U$1\E$2/sg;
    return $prefix.$body;
}

sub doit_lower {
    my ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\L$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|\/?>|\s)/\L$1\E$2/sg;
    return $prefix.$body;
}
sub ProcessTagConv {
    my ($attr, $data) = @_;
    my ($case);

    #   determine case translation type
    $case = 'upper';
    $attr =~ s/case\s*=\s*"?(upper|lower)"?/$case = lc($1), ''/ige;

    #   and then translate the data
    if ($case eq 'upper') {
        $data =~ s|(<[a-zA-Z][a-zA-Z0-9_-]*\s*/?>)|\U$1\E|sg;
        $data =~ s|(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?/?>)|doit_upper($1,$2)|sge;
        $data =~ s|(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*/?>)|\U$1\E|sg;
    }
    else {
        $data =~ s|(<[a-zA-Z][a-zA-Z0-9_-]*\s*/?>)|\L$1\E|sg;
        $data =~ s|(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?>)|doit_lower($1,$2)|sge;
        $data =~ s|(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*/?>)|\L$1\E|sg;
    }
    return $data;
}
sub Fixup_tagcase {
    verbose("tag case translation");

    if ($buffer =~ m|<[tT][aA][gG][cC][oO][nN][vV][\s>]|) {
        my $bufferN = '';
        while ($buffer =~ s|^(.*?)<tagconv([^>]*)>(.*?)</tagconv>||is) {
            my ($pre, $attr, $data) = ($1, $2, $3);
            $bufferN .= $pre . ProcessTagConv($attr, $data);
        }
        $buffer = $bufferN . $buffer;
    }
}

#
#   process all required fixups
#
foreach my $S (split(',', $opt_S)) {
    $opt_F =~ s/\b$S\b//;
}

foreach my $m (split(',', $opt_F)) {
    my $fixup = 'Fixup_' . $m;
    my $ref = __PACKAGE__->can($fixup);

    if ($ref)
    {
        $ref->();
    }
}

#
#   statistic
#
verbose("Total amount of images: $bytes bytes");

WML_Backends->out($opt_o, \&error, [$buffer]);


exit(0);

##EOF##
