#!/usr/bin/perl -w
#
# BW whois
#
# Copyright (c) 1999-2000 William E. Weinman
# http://bw.org/whois/
#
# Designed to work with the new-mangled whois system introduced 1 Dec 1999.
#
# Under the new domain-name regime the whois system is now distributed 
# amongst the various domain-police^H^H^H^H^H^H^H^H^H^H registrars, thereby 
# requiring that we make at least two separate requests (to two separate 
# servers) for each whois record. 
#
# This program will first go to the "root" whois server and ask for a record. 
# If found, the root server will tell us where to go get the actual record, and 
# then we go get it. 
#
# This program is free software. You may modify and distribute it under 
# the same terms as perl itself. 
#
# See HISTORY file. 
# Documentation in (man format) whois.1 and (plaintext format) whois.txt
#

use strict;
use IO::Socket;
use Getopt::Long;

my $VERSION = "2.7";

# the location (full path) of your html file for CGI mode
my $htmlfile = "/var/web/bearnet/luna/whois.html";
my $tld_conf = "/etc/tld.conf";

### no need to modify anything below here ### 

my $cgi = $ENV{SCRIPT_NAME} || '';

use constant TRUE      => 1;
use constant FALSE     => '';

use subs qw{ _print error };

my $host = '';
my $quiet = '';
my $help = '';
my $html = '';
my $jpokay = '';
my $_version = '';
my $stripheader = '';
my $makehtml = '';

my $version = $VERSION;
my $_c = $cgi ? '&copy;' : 'Copyright';
my $copyright = "$_c 1999-2000 William E. Weinman";
my $progname = $cgi ? '<a href="http://bw.org/whois/">BW whois</a>' : 'BW whois' ;
my $byline = $cgi ? '<a href="http://bw.org/">Bill Weinman</a>' : 'Bill Weinman <http://bw.org/>';
my $banner = $cgi ? "$progname $version by $byline\n$copyright\n\n" : "$progname $version by $byline\n$copyright\n";

my $RWHOIS_PORT = 4321;
my $WHOIS_PORT = 43;
my $ctype_sent = FALSE;

my $gtlds        = '(com|net|org)';
my $internic     = 'whois.internic.net';
my $default_host = $internic;               # starting point
my $netblk_host  = 'whois.arin.net';        # default host for netblocks
my $portname = FALSE;
my $protoname = 'tcp';
my $link_host = '';

# the text to test against for the end of a header with -s
my $headerstop = q{you agree to abide};

my $outstr = '';
my $q = '';
my $env = $ENV{BW_WHOIS};

++ $|;

if($env) {
  $env =~ /stripheader/ and $stripheader = TRUE;
  $env =~ /quiet/ and $quiet = TRUE;
  $env =~ /jpokay/ and $jpokay = TRUE;
  $env =~ /tld(:|=)([^\s,]*)/ and $tld_conf = $2;
  }

if($cgi) { 
  $q = getquery();
  do_cgi();
  exit 0;
  }

else {
  GetOptions(
   "host=s" => \$host, 
   "h=s" => \$host, 
   "port=s" => \$portname, 
   "tld=s" => \$tld_conf,
   "stripheader!" => \$stripheader, 
   "makehtml!" => \$makehtml, 
   "quiet!" => \$quiet,
   "html!" => \$html,
   "help!" => \$help,
   "jpokay!" => \$jpokay,
   "version!" => \$_version
   ) or usage();

  do_commandline(@ARGV);
  exit 0;
  }

sub do_cgi
{
my $domain = $q->{domain} || '';
my $h = '';
my $_ct = 'text/html';

++$stripheader if $q->{stripheader};
++$quiet if $q->{quiet};
++$jpokay if $q->{jpokay};

if($domain) { whois($domain) };

$outstr = $banner .= $outstr; 

if($htmlfile and -f $htmlfile) {
  open(HF, "<$htmlfile") or error "cannot open $htmlfile: $!\n";
  while(<HF>) { $h .= $_ }
  close HF;
  }
else { $h = defaulthtml(); }

$h =~ s/\$SELF\$/$cgi/gs;
$h =~ s/\$DOMAIN\$/$domain/gs;
$h =~ s/\$RESULT\$/$outstr/gs;

ctype($_ct);
print $h;
}

sub do_commandline
{
usage() if $help;
version() if $_version;

if($makehtml) { 
  print defaulthtml();
  exit;
  }

usage() unless @_;

# signon
_print $banner unless $quiet;

while(my $domain = shift) { whois($domain) }
}

sub ctype
{
my $ct = shift;
my $nl = "\x0d\x0a";
print "Content-type: $ct$nl$nl" unless $ctype_sent;
$ctype_sent = TRUE;
}

sub whois
{
my $domain = shift;
my $tld = '';
my $r_host = $host;
my $r_port = $portname;
my $netblock = FALSE;
my $r_default_host = $default_host;

$r_port = ($r_host =~ /rwhois/) ? $RWHOIS_PORT : $WHOIS_PORT unless $r_port;

_print "Request: $domain\n";

# support for the <request>@<domain> syntax ...
unless ($r_host) { ($domain, $r_host) = split /\@/, $domain; }
if($r_host) { 
  $r_host =~ /:(.*)$/ and $r_port = $1;
  }

# is it a netnum or NETBLK? try ARIN first
if($domain =~ /^(\d{1,3}\.?){1,4}$/ or $domain =~ /^net(blk)?-[a-z0-9\-]+$/i) {
  $r_default_host = $netblk_host;
  _print "using netblock server $netblk_host\n";
  $netblock = TRUE;
  }

my @rc = ();
my $subrec = '';

# do we need a different default server?
if(!$r_host and $r_default_host ne $netblk_host and (lc $domain) =~ /\.([a-z0-9\-]+)$/) {
  $tld = $1;
  if($tld !~ /$gtlds/) {
    my $tld_host = find_tld($domain);
    $r_default_host = $tld_host if $tld_host;
    } 
  }

# Go Fishin' at the default host ... 
unless($r_host) {
  $r_host = FALSE;

  @rc = whois_fetch($r_default_host, $domain, $r_port);

  if($netblock) {    # is the netblk delegated ?
    foreach (@rc) {
      next if /(nic\.mil|internic.net)/;
      if(/(r?whois\.[\-\.a-z0-9]+)/i and !$r_host) {
        $r_host = $1;
        $r_port = $RWHOIS_PORT if /rwhois/;   # default to the correct port # for rwhois
        } 
      if(/\bport\s+(\d+)/i) { $r_port = $1; }
      }
    }

  else {  # we are at the root whois server ... find the delegation
    grep { /Whois Server:\s*(.*)/i and $r_host = $1 } @rc;   # look for the referral
    }
  }

# now we know where to look -- let's go get it
if($r_host) {
  $r_port = $portname unless $r_port;
  @rc = whois_fetch($r_host, $domain, $r_port);
  grep {/\((.*-DOM)\).*$domain$/i and $subrec = $1 } @rc;
  }

# do we have a sub rec? If so, "Fetch!"
if($subrec) {
  _print "found a reference to $subrec ... requesting full record ...\n" unless $quiet;
  @rc = whois_fetch($r_host, $subrec, $r_port);
  }

# tell 'em what we found ...
_print "Registrar: $r_host\n" if (@rc && $r_host && !$quiet);
my $headerflag = ($stripheader && $r_host && grep(/$headerstop/, @rc));
while(@rc) {
  my $l = shift @rc;
  _print $l unless $stripheader && $headerflag;
  if($stripheader) {
    $headerflag = FALSE if($l =~ /$headerstop/i);
    }
  }
}

sub whois_fetch
{
my $host = shift;
my $domain = shift;
my $port = shift;
my ($uri, $handle);
my @rc;

my $rs = IO::Socket::INET->new(
    PeerAddr  => $host,
    PeerPort  => $port,
    Proto     => $protoname
  );
unless($rs) {
  use Errno;
  my $errno = 0 + $!;
  error "host $host not found\n" if $!{EINVAL};
  error "unable to connect to $host ($errno: $!)\n" unless $rs;
  }
my $IP = $rs->peerhost; 
my $PORTNUM = $rs->peerport;
_print "connecting to $host [$IP:$PORTNUM] ... \n" unless $quiet;
$rs->autoflush(1);

if(!$jpokay and $host =~ /nic\.ad\.jp$/) { 
  _print qq(japanese whois ... adding '/e' to request\n) unless $quiet;
  $domain .= '/e' 
  }

# if it's a valid 2nd-level domain name, treat it as one. 
if($domain =~ /^[a-z\d\-]+\.[a-z\d\-]+$/ and $host eq $internic) { 
  $rs->print("domain $domain\r\n"); 
  }
else { $rs->print("$domain\r\n"); }

while(<$rs>) { 
  push @rc, $_;
  }

$link_host = $host;
return @rc;
}

sub version { print $banner, "\n"; exit }

# getquery
#
# returns hash of CGI query strings
# works with GET, POST, and multipart methods
#
sub getquery
{
my $method = $ENV{'REQUEST_METHOD'} || 'none';
my ($query_string, $pair);
my %query_hash;
my $ct = $ENV{CONTENT_TYPE} || '';

my ($count, $x, $boundary, $chunk, $i, $filect, $_qsname, $_qsvalue);
my @chunks;

if($ct =~ /^multipart/) {
  # process multipart query
  $count = read STDIN, $x, $ENV{CONTENT_LENGTH}; 
  ($boundary) = $ct =~ /boundary=(.*)$/;
  @chunks = split /\r?\n?--$boundary-?-?\r\n/, $x;
  for $chunk (@chunks) { 
    my ($header, $data) = split /\r\n\r\n/, $chunk;
    my @lines = split /\r\n/, $header; chomp @lines;
    if($lines[0] =~ /$boundary/) { shift @lines }  # loose any leftover boundary strings
    if($lines[0] =~ /filename=/i) {   # it's a file
      for($i = 1; $i < @lines; $i++) {
        if($lines[$i] =~ /^content-type:\s*(\S*)/i) { 
          $filect = $1;
          last;
          }
        }
      $query_hash{_datatype} = $filect;
      $query_hash{_datafile} = $data;
      next;
      }
    for($i = 0; $i < @lines; $i++) { 
      my $thisline = $lines[$i];
      if ($thisline =~ /^Content-disposition: form-data; name="?(\w+)"?/i) {
        $_qsname = $1;
        $_qsvalue = $data;
        $query_hash{$_qsname} = $_qsvalue;
        }
      }
    }
  }

else {
  $query_string = $ENV{'QUERY_STRING'} if $method eq 'GET';
  $query_string = <STDIN> if $method eq 'POST';
  $query_string = $ARGV[0] if $method eq 'none';
  return () unless $query_string;

  foreach $pair (split(/&/, $query_string)) {
    ($_qsname, $_qsvalue) = split(/=/, $pair);
    $_qsvalue =~ s/\+/ /g;
    $_qsvalue =~ s/%([\da-f]{2})/pack('c',hex($1))/ieg;
    $query_hash{$_qsname} = $_qsvalue;
    # if it's an image element, make an extra entry for just the name
    if($_qsname =~ /(.*)\.x$/) { $query_hash{$1} = "image" }
    }
  }
return \%query_hash;
}

sub find_tld
{
my $domain = lc shift;
my $tld = '';
my $server = '';
my $tld_file = "$tld_conf";

return FALSE unless $tld_conf and -f $tld_file;

open(TLD, "<$tld_file") or error "can't open $tld_file ($!)\n";
while(<TLD>) {
  next if /^#/;
  chomp;
  my @tokens = split(/\s+/);
  my $lh = shift @tokens or next;
  my $rh = shift @tokens or next;
  if(substr($domain, 0 - length($lh)) eq $lh) {
    $tld = $lh;
    $server = $rh;
    _print "whois server for *$tld is $server ...\n" unless $quiet;
    last;
    }
  }
close TLD;
return $server;
}

sub defaulthtml
{
return q{<!--  

  BW whois example HTML file
  Copyright 1999-2000 William E. Weinman  http://bw.org/  

  Placeholders are used for the various values which make this 
  work. These placeholders are represented by text enclosed in 
  '$' signs like this: 

    $PLACEHOLDER$

  The placeholders are: 

    SELF    The URI path of the program on your web server, taken 
            from the value of the SCRIPT_NAME environment variable. 

    DOMAIN  The domain that was last looked up, if any. 

    RESULT  The result of the whois query from BW whois. 

  See the example (below) for specific usage. 

  The following options are available 
    stripheader mode:      <input type=hidden name=stripheader value=1> 
    quiet mode:            <input type=hidden name=quiet value=1> 
    japanese output okay:  <input type=hidden name=jpokay value=1> 

-->

<html>
<title> BW whois &middot; Online Query </title>

<body>

<h2> <a href="http://bw.org/whois/">BW whois</a> &middot; Online Query </h2>

<p>
<form action="$SELF$" method=post>
Enter a domain name: <br>
<input type=text name=domain value="$DOMAIN$">
<input type=hidden name=stripheader value=1> 
<input type=hidden name=quiet value=1> 
<input type=submit>
</form>

<p><pre>
$RESULT$
</pre></p>
</body>
</html>

<!-- end of example HTML file for BW whois -->
}
}

sub _print
{
my ($handle, $uri);
my $options = '';
$options .= '&stripheader=1' if $stripheader;
$options .= '&quiet=1' if $quiet;

if($html or $cgi) {
  # RFC-954 whois servers (e.g. whois.networksolutions.com) require the "!" 
  # to look up handles, while other whois servers (e.g. RIPE) prohibit it. 
  # I search for the double-dash option as that is often used on those servers
  $handle = ($link_host =~ /whois.networksolutions.com/) ? '%21' : '';
  $uri = $cgi || $ENV{_SCRIPT_NAME} || 'whois';

  while (@_) { 
    my $_outstr = shift;
    $_outstr =~ s|
      \((                    # a handle is in parens ...
        [A-Z]                # ... is all UPPERCASE and starts with a letter
        [A-Z0-9-_]{3,}?)\)      # ... may contain digits, dashes, and underscores
      |(<a href="$uri?domain=$handle$1%40$link_host$options">$1</a>)|gsx
       if ($html or $cgi);
    $outstr .= $_outstr;
    print $_outstr unless $cgi;
    }
  }
else { print @_ }
}

sub error
{
if($cgi) {
  ctype('text/html');
  my $em = ''; while (@_) { $em .= shift }
  print qq{
    <body bgcolor=white><title> BW Whois Error </title>
    <h1> Error </h1> <p> <em> $em </em>
    };
  exit;
  }
else {
  die @_;
  }
}

sub usage
{
print $banner;
print <<USAGE;

usage: whois [options] (<request> | <request>@<host>) [ ... ]

options: 

  --help         Show this screen.

  --version      Show version information and exit. 

  --host <host>  Hostname of the whois server
  -h <host>      this is the same as the <request>@<host> form
                 if not specified will search $default_host
                 for a "Whois Server:" record

  --port         Specify a different port than the normal whois(43).
  -p

  --quiet        Don't print any extraneous messages. 
  -q             ... "just the facts, ma'am"

  --stripheader  Strip off that silly disclaimer from the 
  -s             whois.networksolutions.com server. You've 
                 read it a thousand times already, right?

  --tld <path>   Full path/file name for tld.conf file. Defaults 
                 to "/etc/tld.conf"

  --makehtml     Display example HTML file. Prints a small 
                 file to STDOUT with the example HTML in it. 
                 Use this to modify to your own taste for CGI 
                 mode. Change \$htmlfile variable as needed. 

Get the latest version of BW Whois here: http://bw.org/whois/

USAGE
exit;
}

