#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
    if $running_under_some_shell;
##
##  htmlstrip -- Strip HTML markup code
##  Copyright (c) 1997 Ralf S. Engelschall, All Rights Reserved. 
##

require 5.003;

use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/i386-linux/5.00401";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/site_perl";
use lib "/var/tmp/perl-root/usr//lib/wml/perl/lib/site_perl/i386-linux";

use Getopt::Long 2.12;
use IO::Handle 1.15;
use IO::File 1.06;

#
#   process command line
#
sub usage {
    print STDERR "Usage: htmlstrip [options] [file]\n";
    print STDERR "   where options are\n";
    print STDERR "   -o file  set output file instead of stdout\n";
    print STDERR "   -O level set optimization/crunch level\n";
    print STDERR "   -v       verbose mode\n";
    exit(1);
}
$opt_v = 0;
$opt_o = '-';
$opt_O = 2;
$Getopt::Long::bundling = 1;
$Getopt::Long::getopt_compat = 0;
if (not Getopt::Long::GetOptions("v|verbose",
                                 "O|optimize=i",
                                 "o|outputfile=s")) {
    &usage;
}

sub verbose {
    my ($str) = @_;
    if ($opt_v) {
        print STDERR "** HTMLstrip:Verbose: $str";
    }
}

#
#   read input file
#
if (($#ARGV == 0 and $ARGV[0] eq '-') or $#ARGV == -1) {
    $in = new IO::Handle;
    $in->fdopen(fileno(STDIN), "r");
    local ($/) = undef;
    $INPUT = <$in>;
    $in->close;
}
elsif ($#ARGV == 0) {
    $in = new IO::File;
    $in->open($ARGV[0]);
    local ($/) = undef;
    $INPUT = <$in>;
    $in->close;
}
else {
    &usage;
}

#
#   processing loop
#

%TAGS = (
    "nostrip" => { BEGIN => "<nostrip>", END => "</nostrip>", REMOVE => 1 },
    "pre"     => { BEGIN => "<pre>",     END => "</pre>",     REMOVE => 0 },
    "xmp"     => { BEGIN => "<xmp>",     END => "</xmp>",     REMOVE => 0 },
);

sub StripPlain {
    my ($buf) = @_;

    #   Level 0
    if ($opt_O >= 0) {
        #$buf =~ s|^\s*#[#!\s\n].*$||mg; 
        $buf =~ s|^\s*#.*$||mg; 
    }
    #   Level 1
    if ($opt_O >= 1) {
        $buf =~ s|^\s*$||mg;
        $buf =~ s|\n\n|\n|sg;
    }
    #   Level 2
    if ($opt_O >= 2) {
        $buf =~ s|(\S+)[ \t]{2,}|$1 |mg;
        $buf =~ s|\s+([a-zA-Z]+)\s*=\s*"| $1="|isg;
        $buf =~ s|<\s+([a-zA-Z]+)|<$1|isg;
        $buf =~ s|\s+>|$1>|isg;
        $buf =~ s|\s+\n|\n|sg;
    }
    #   Level 3
    if ($opt_O >= 3) {
        $buf =~ s|^\s+||mg;
    }
    #   Level 4
    if ($opt_O >= 4) {
        $buf =~ s|<!--.+?-->||sg;
        $buf =~ s|^\s*$||mg;
        $buf =~ s|\n\n|\n|sg;
    }
    #   Level 5
    if ($opt_O >= 5) {
        $buf =~ s|\n| |sg;
        $from = $buf;
        $line = '';
        $buf = '';
        sub nexttoken {
            my ($buf) = @_;
            my ($token, $bufN);

            if ($buf =~ m|^([^<]+?)(<.+)$|s) {
                $token = $1;
                $bufN  = $2;
            }
            elsif ($buf =~ m|^(<[^>]+>)(.*)$|s) {
                $token = $1;
                $bufN  = $2;
            }
            else {
                $token = $buf;
                $bufN  = '';
            }

            if (length($token) > 80) {
                $x = substr($token, 0, 80);
                $i = rindex($x, " ");
                $bufN = substr($token, $i) . $bufN;
                $token = substr($token, 0, $i);
            }
            return ($token, $bufN);
        }
        while (length($from) > 0) {
            ($token, $from) = &nexttoken($from);
            if ((length($line) + length($token)) < 80)  {
                $line .= $token;
            }
            else {
                $buf .= $line . "\n";
                $line = $token;
            }
        }
        $buf =~ s|^\s+||mg;
        $buf =~ s|\s+$||mg;
    }

    return $buf;
}

sub StripPreformatted {
    my ($buf) = @_;

    #   currently now stipping
    return $buf;
}

$OUTPUT = '';
while (1) {
    #   look for a begin tag
    $len = length($INPUT);
    $pos = $len;
    foreach $tag (keys(%TAGS)) {
        if ($INPUT =~ m|^(.*?)($TAGS{$tag}->{BEGIN})(.*)$|is) {
            $n = length($1);
            if ($n < $pos) {
                $pos = $n;
                $prolog = $1;
                $curtag = $2;
                $epilog = $3;
                $tagname = $tag;
            }
        }
    }
    if ($pos < $len) {
        $o = &StripPlain($prolog);
        if (substr($OUTPUT, length($OUTPUT)-1, 1) eq "\n" and
            substr($o, 0, 1) eq "\n") {
            $o = substr($o, 1, length($o)-1); 
        }
        $OUTPUT .= $o;
        if (not $TAGS{$tagname}->{REMOVE}) {
            $OUTPUT .= $curtag;
        }
        $INPUT = $epilog;
        if ($INPUT =~ m|^(.*?)($TAGS{$tagname}->{END})(.*)$|is) {
            $OUTPUT .= &StripPreformatted($1);
            if (not $TAGS{$tagname}->{REMOVE}) {
                $OUTPUT .= $2;
            }
            $INPUT = $3;
        }
        next;
    }
    else {
        $o = &StripPlain($INPUT);
        if (substr($OUTPUT, length($OUTPUT)-1, 1) eq "\n" and
            substr($o, 0, 1) eq "\n") {
            $o = substr($o, 1, length($o)-1); 
        }
        $OUTPUT .= $o;
        last;
    }
}

#
#   global stripping
#
$OUTPUT =~ s|^\n||s;

#
#   write to output file
#
if ($opt_o eq '-') {
    $out = new IO::Handle;
    $out->fdopen(fileno(STDOUT), "w");
}
else {
    $out = new IO::File;
    $out->open(">$opt_o");
}
$out->print($OUTPUT);
$out->close;

exit(0);

##EOF##
