#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;
##
##  htmlfix -- Fixup HTML markup code
##  Copyright (c) 1997 Ralf S. Engelschall, All Rights Reserved. 
##

require 5.003;

use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/i386-linux/5.00401";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/site_perl";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/site_perl/i386-linux";

use Getopt::Long 2.12;
use Image::Size;
use IO::File 1.06;

#
#   process command line
#
sub usage {
    print STDERR "Usage: htmlfix [options] [file]\n";
    print STDERR "   where options are\n";
    print STDERR "   -o file  set output file instead of stdout\n";
    print STDERR "   -v       verbose mode\n";
    exit(1);
}
$opt_v = 0;
$opt_o = '-';
$Getopt::Long::bundling = 1;
$Getopt::Long::getopt_compat = 0;
if (not Getopt::Long::GetOptions("v|verbose",
                                 "o|outputfile=s")) {
    &usage;
}
sub verbose {
    local($str) = @_;
    if ($opt_v) {
        print STDERR "** HTMLfix:Verbose: $str\n";
    }
}
sub warning {
    local($str) = @_;
    if (not $opt_q) {
        print STDERR "** HTMLfix:Warning: $str\n";
    }
}

#
#   read input file
#
if (($#ARGV == 0 and $ARGV[0] eq '-') or $#ARGV == -1) {
    $in = new IO::Handle;
    $in->fdopen(fileno(STDIN), "r");
    local ($/) = undef;
    $buffer = <$in>;
    $in->close;
}
elsif ($#ARGV == 0) {
    $in = new IO::File;
    $in->open($ARGV[0]);
    local ($/) = undef;
    $buffer = <$in>;
    $in->close;
}
else {
    &usage;
}

#
#   processing loop
#
$bytes = 0;

sub ProcessImgTag {
    my ($tag) = @_;
    my ($begin, $attr, $end, $image, $width, $height);

    ($begin, $attr, $end) = ($tag =~ m|^(<IMG\s+)(.+)(\s*>)$|is);

    if (   $attr =~ m|SRC\s*=\s*"([^"]*)"|is
        or $attr =~ m|SRC\s*=\s*(\S+)|is    ) {
        $image = $1;

        ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
         $atime,$mtime,$ctime,$blksize,$blocks) = stat($image);
        $bytes += $size;

        #   add an ALT="" tag to make HTML lints happy
        if (    $attr !~ m|ALT\s*=\s*"[^"]*"|is
            and $attr !~ m|ALT\s*=\s*\S+|is) {
            &verbose("adding ALT for $image");
            $attr .= ' alt=""';
        }

        #   add WIDTH and HEIGHT to speed up display
        $width  = -1;
        $height = -1;
        if (   $attr =~ m|WIDTH\s*=\s*([0-9%]+)|is
            or $attr =~ m|WIDTH\s*=\s*"([0-9%]+)"|is) {
            $width = $1;
        }
        if (   $attr =~ m|HEIGHT\s*=\s*([0-9%]+)|is
            or $attr =~ m|HEIGHT\s*=\s*"([0-9%]+)"|is) {
            $height = $1;
        }
        if ($width == -1 and $height == -1) {
            if (-f $image) {
                ($width, $height) = Image::Size::imgsize($image);
                $attr .= " width=$width height=$height";
                &verbose("adding size for $image: ``width=$width height=$height''");
            }
            else {
                &verbose("cannot add width/height to $image: file not found");
            }
        }
    }

    $tag = $begin . $attr . $end;
    return $tag;
}

#   FIXUP1: add WIDTH/HEIGHT/ALT to img-tags
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)(<img\s+[^>]+>)(.*)$|is) {
        ($pre, $tag, $buffer) = ($1, $2, $3);
        $bufferN .= $pre . &ProcessImgTag($tag);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

#   FIXUP 2: change <center>..</center> to <div align=center>..</div>
$buffer =~ s|<center>(.*?)</center>|<div align=center>$1</div>|isg;

#   FIXUP 3: change attributes of form ...=124 to ...="123".
$buffer =~ s|([A-Za-z]+=)([+-]?\d+)([\s\n>])|$1"$2"$3|isg;

#   FIXUP 4: add missing '#' char to color attributes 
$buffer =~ s|([A-Za-z]+=")([0-9A-Fa-f]{6}"[\s\n>])|$1#$2|sg;

#   FIXUP 5: paragraph indentation 
sub ProcessIndentContainer {
    my ($attr, $data) = @_;
    my ($num, $size, $pad, $prefix);
    
    #   determine amount of padding
    $num  = 0;
    $size = 4;
    $attr =~ s/num\s*=\s*"?(\d+)"?/$num = $1, ''/ige;
    $attr =~ s/size\s*=\s*"?(\d+)"?/$size = $1, ''/ige;

    #   pad the data
    if ($num > 0) {
        $pad = ' ' x ($num * $size);
        $data =~ s/^/$pad/mg;
    }
    elsif ($num == 0) {
        ($prefix) = ($data =~ m|^\n*([ \t]+)|s);
        if (length($prefix) > 0) {
            $data =~ s/^$prefix//mg;
        }
    }

    return $data;
}
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)<indent(.*?)>(.*?)</indent>(.*)$|is) {
        ($pre, $attr, $data, $buffer) = ($1, $2, $3, $4);
        $bufferN .= $pre . &ProcessIndentContainer($attr, $data);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

#   FIXUP 6: out-commenting tags
$buffer =~ s|<[a-z_][a-z0-9-]*#.*?>||isg;
$buffer =~ s|</[a-z_][a-z0-9-]*#>||isg;

#   FIXUP 7: tag case translation
sub doit_upper {
    ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\U$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|>|\s)/\U$1\E$2/sg;
    return $prefix.$body;
}
sub doit_lower {
    ($prefix, $body) = @_;
    $prefix =~ s/^(.+)$/\L$1\E/;
    $body =~ s/(\s+[a-zA-Z][a-zA-Z0-9_-]*)(\s*=\s*[^"\s]+|\s*=\s*"[^"]*"|>|\s)/\L$1\E$2/sg;
    return $prefix.$body;
}
sub ProcessTagConv {
    my ($attr, $data) = @_;
    my ($case);
    
    #   determine case translation type
    $case = 'upper';
    $attr =~ s/case\s*=\s*"?(upper|lower)"?/$case = lc($1), ''/ige;

    #   and then translate the data
    if ($case eq 'upper') {
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\U$1\E/sg;
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?>)/&doit_upper($1,$2)/sge;
        $data =~ s/(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\U$1\E/sg;
    }
    else {
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\L$1\E/sg;
        $data =~ s/(<[a-zA-Z][a-zA-Z0-9_-]*)(\s+.*?>)/&doit_lower($1,$2)/sge;
        $data =~ s/(<\/[a-zA-Z][a-zA-Z0-9_-]*\s*>)/\L$1\E/sg;
    }
    return $data;
}
$bufferN = '';
while ($buffer) {
    if ($buffer =~ m|^(.*?)<tagconv(.*?)>(.*?)</tagconv>(.*)$|is) {
        ($pre, $attr, $data, $buffer) = ($1, $2, $3, $4);
        $bufferN .= $pre . &ProcessTagConv($attr, $data);
        next;
    }
    else {
        $bufferN .= $buffer;
        last;
    }
}
$buffer = $bufferN;

&verbose("Total amount of images: $bytes bytes");

#
#   write to output file
#
if ($opt_o eq '-') {
    $out = new IO::Handle;
    $out->fdopen(fileno(STDOUT), "w");
}
else {
    $out = new IO::File;
    $out->open(">$opt_o");
}
$out->print($buffer);
$out->close;

exit(0);

##EOF##
