#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;

# htmlclean
# Copyright (C) 1998 by ITU

use lib "/usr/lib/wml";
use lib "/usr/lib/wml";

use strict;

sub usage {
  print STDERR <<END;
 usage: $0 file1 [file2 file3 ...]
END
  exit 1;
}

sub print_version {
  my($version) = $HTML::Clean::VERSION;
  print "$0\n\tHTML::Clean::VERSION: $version\n\n";
  exit 0;
}


usage() if ($#ARGV == -1);
usage() if ($#ARGV >=0 && $ARGV[0] eq '-?');


use HTML::Clean;
use Getopt::Long;
my (%opts);

$Getopt::Long::getopt_compat = 1;   # avoid parsing +'s as options (doesn't work!)
&Getopt::Long::config(qw(no_ignore_case no_getopt_compat));
&GetOptions(\%opts, qw(v V t=s 1 2 3 4 5 6 7 8 9));

&print_version if ($opts{'V'});
&usage if ($#ARGV == -1); # we MUST have at least one file

my($verbose) = $opts{'v'};
my $level = 9;
foreach my $i (1, 2, 3, 4, 5, 6, 7, 8, 9) {
  $level = $i if ($opts{$i});
}

main($level, \@ARGV);
exit 0;

sub main {
  my($level, $files) = @_;

  my $h = new HTML::Clean(); # Just a empty holder..
  print_error('initializing...') if (!$h);
  $h->level($level);

  foreach my $f (@$files) {
    my $result = $h->initialize($f);
    print_error($f) if ($result == 0);

    my $d = $h->data();
    my $origlen = length($$d);

    # add options to control these...
    $h->compat();
    $h->strip();

    my $newlen = length($$d);
    my $pct = 0;
    if ($origlen > 0) {
      $pct = (100 * ($origlen - $newlen)) / $origlen;
    }
    printf "%6d %6d %2d%% %s\n", $origlen, $newlen, $pct, $f if ($verbose);

    # Okay, now move the files around..
    rename($f, "$f.bak") || die "Cannot rename '$f': $!\n";
    open(output, ">$f") || die "Cannot overwrite '$f': $!\n";
    print output $$d;
    close(output);
  }
}


sub print_error {
  my($msg) = @_;
  print STDERR <<END;
$0: $msg ($!)
END
exit(1);
}


