#!/usr/bin/env python

# Squij
#
# (c) 1998 Mark Nottingham <mnot@pobox.com>
# licensed under the GPL; see COPYING
#
# TODO:
# - various output formats
# - other type of reports, confs
# - other logfiles (store.log?)

import sys
__version__ = '0.7'



def main():
	''' here's where it's all at '''

	import sys
	
	options = ParseGetOpt()
	config = SquidConfigParse(options['-c'])

	# httpd-format logs don't contain the information we're interested in.	
	if config.emulate_httpd_log:
		sys.stderr.write("Sorry, cannot use HTTP-format logs.\n")
		sys.exit(0)

	patterns = ParsePatterns(config, options)
	results, start_time, end_time = Analyse(config, options, patterns)
	Output(options, patterns, results, start_time, end_time)



def usage():
	''' Print usage message and exit. '''
	
	import sys

	print '''\
USAGE: %s [options]
       -a use the archived logfile (access.log.0)
       -c [configuration file] (default: /usr/local/squid/etc/squid.conf)
       -i take logfile from STDIN
       -o [server] limit anaylists to requests on a single origin server
       -h this help
       -n Squid version 1.1.x config format (default is version 2)
       -s [start time in UTC]
''' % (sys.argv[0])
	sys.exit(0)



def Analyse(config, options, patterns):
	''' given a configuration object, analyse the logfile '''

	import Acct
	from weblog import squid, limit, url
	from string import lower

	# these lists define which tags are associated with the various 
	# hit types. 	
	fresh_tags =	[	'TCP_HIT',
						'TCP_MEM_HIT',
						'TCP_IMS_HIT',
						'TCP_IMS_MISS' ]
	stale_tags =	[	'TCP_REFRESH_HIT',
						'TCP_REFRESH_MISS',
						'TCP_REF_FAIL_HIT' ]
	refresh_tags = 	[	'TCP_CLIENT_REFRESH' ]

	mod_tags =		[	'TCP_REFRESH_MISS' ]
	unmod_tags = 	[	'TCP_REFRESH_HIT' ]

	hit_tags = 		[	'TCP_HIT',
						'TCP_MEM_HIT',
						'TCP_IMS_HIT',
						'TCP_IMS_MISS',
						'TCP_NEGATIVE_HIT',
						'TCP_REFRESH_HIT' ]
	miss_tags = 	[	'TCP_MISS',
						'TCP_REFRESH_MISS' ]

	
	### determine where the logfile is, and open it
	if options.has_key('-i'):					# logfile is on STDIN
		fh = sys.stdin
	else:
		logfile = config.cache_access_log
		if options.has_key('-a'):				# -a specified
			logfile = logfile + '.0'
		try:
			fh = open(logfile, 'r')				# access log filehandle
		except IOError, (errno, error):
			sys.stderr.write("Can't open logfile %s: %s" % (logfile, error))
			sys.exit(0)

	log0 = squid.AccessParser(fh)		# access log object
	if options.has_key('-s'):
		log1 = limit.Time(log0)			# time limited log object
		log1.start = options['-s']
	else:
		log1 = log0
	if options.has_key('-o'):
		log = url.Parser(log1)			# url-parsed log object
	else:
		log = log1
	r_acct = Acct.Accountant()			# regex accounting object

	# setting these attributes tells the accounting object when
	# to increment its counters, and by how much; we pass it
	# a lambda function that makes the decision.
	r_acct.ttl_h = lambda a:1
	r_acct.ttl_b = lambda a:a.bytes
	r_acct.hit_h = lambda a, t=hit_tags: decide(a, t, 1)
	r_acct.hit_b = lambda a, t=hit_tags: decide(a, t, a.bytes)
	r_acct.fresh_h = lambda a, t=fresh_tags: decide(a, t, 1)
	r_acct.stale_h = lambda a, t=stale_tags: decide(a, t, 1)
#	r_acct.refresh_h = lambda a, t=refresh_tags: decide(a, t, 1)
	r_acct.mod_h = lambda a, t=mod_tags: decide(a, t, 1)
	r_acct.unmod_h = lambda a, t=unmod_tags: decide(a, t, 1)
	r_acct.elapsed = lambda a:a.elapsed

	### process the access logfile
	start_flag = 1
	while log.getlogent():
		if start_flag:
			start_time = log.utime
			start_flag = 0
		if options.has_key('-o'):
			if lower(log.url_host) != options['-o']:
				continue
		url = log.url
		for pat in patterns:
			if pat[1].search(url) > -1:
				r_acct.enter(log, pat[0])
				break
	return r_acct, start_time, log.utime



def decide(log, scope, retval):
	''' 
	returns a value according to arguments;
	intended to be used as a lambda function (see above)
	'''
	if log.log_tag in scope: return retval
	return 0



def Output(options, patterns, r_acct, start_time, end_time):
	''' output '''

	from time import ctime

	print "squij output: %s to %s" % (ctime(start_time), ctime(end_time))
	if options.has_key('-o'):
		print "              limited to %s" % (options['-o'])

	print '''\
         regex   ave svc   hit/byte   fresh/   unmod/      total hits/
                  time       rate     stale   modified        bytes''' 
	print '-' * 79
	
	ave = [0L,0L,0L,0L,0L,0L,0L,0L,0L,0L,0L]
	for pat in patterns:
		n = pat[0]

		try:
			a =	 (getattr(r_acct, n + '_hit_h') / float(getattr(r_acct, n + '_ttl_h'))) * 100 
		except ZeroDivisionError:
			a = 0
		try:
			b =	 (getattr(r_acct, n + '_hit_b') / float(getattr(r_acct, n + '_ttl_b'))) * 100
		except ZeroDivisionError:
			b = 0		
		c,d =	ratio(getattr(r_acct, n + '_fresh_h'), getattr(r_acct, n + '_stale_h'))
		e,f =	ratio(getattr(r_acct, n + '_unmod_h'), getattr(r_acct, n + '_mod_h'))
		try:
			g = (getattr(r_acct, n + '_elapsed') / float(getattr(r_acct, n + '_ttl_h'))) / 1000
		except ZeroDivisionError:
			g = 0

		print "%16s  %5.1f   %3.1d%%/%3.1d%%    %2i:%-2i   %2i:%-2i   %8i/%5iM" % \
			(n[:14],
			 g, a, b, c, d, e, f,
			 getattr(r_acct, n + '_ttl_h'),
			 M(getattr(r_acct, n + '_ttl_b')),
			)
			
		### keep overall stats
		h = 0
		for i in [
			getattr(r_acct, n + '_hit_h'), getattr(r_acct, n + '_ttl_h'),
			getattr(r_acct, n + '_hit_b'), getattr(r_acct, n + '_ttl_b'),
			getattr(r_acct, n + '_fresh_h'), getattr(r_acct, n + '_stale_h'),
			getattr(r_acct, n + '_unmod_h'), getattr(r_acct, n + '_mod_h'),
			getattr(r_acct, n + '_ttl_h'), M(getattr(r_acct, n + '_ttl_b')),
			getattr(r_acct, n + '_elapsed') ]:
				ave[h] = ave[h] + i
				h = h + 1

	### output overall stats
	try:
		o = (ave[0]/float(ave[1])) * 100
	except ZeroDivisionError:
		o = 0
	try:
		p = (ave[2]/float(ave[3])) * 100
	except ZeroDivisionError:
		p = 0
	q, r = ratio(ave[4], ave[5])
	s, t = ratio(ave[6], ave[7]) 
	try:
		u = ave[10] / ave[8] / 1000
	except ZeroDivisionError:
		u = 0

	print "%16s  %5.1f   %3.1d%%/%3.1d%%    %2i:%-2i   %2i:%-2i   %8i/%5iM" % \
		('OVERALL', u, o, p, q, r, s, t, ave[8], ave[9])	



def ratio(a, b):
	'''
	Input is the two numbers to be compared.
	Returns a ratio of x to y in the form of x:y. Approximates according
	to an inbuild fudge factor. There is also a hard upper limit on the
	accuracy (and therefore, size) of numbers that appear.
	There are probably better ways to do this...
	'''

	# My terminology and methodology are probably WAY off, but then again,
	# I Suck At Math (and am proud of it).

	fudge = 3		# the generated half of the ratio won't be bigger...
	accuracy = 99	# the suppled half of the ratio won't be bigger than this

	if (a, b) == (0, 0):	# nothing to do
		return 0, 0
	if a == 0:				# deal with nasty ZeroDivisionErrors
		return 0, 1
	if b == 0:				# ditto
		return 1, 0
	n = a / float(b)
	if n >=1:
		p = n
	else:
		p = 1/n
	arr = [p % int(p)]
	for i in range(2, fudge):
		if i * p >= accuracy:
			continue
		arr.append((i * p) % int(i * p))
	q = arr.index(min(arr)) + 1
	p = int(p * q)
	if p > accuracy: p = accuracy
	if n >=1:
		return p, q
	else:
		return q, p



def K(bytes):
	''' make Kbytes out of bytes '''
	return bytes / 1024

def M(bytes):
	''' make Mbytes out of bytes '''
	return bytes / 1024 / 1024


def ParsePatterns(config, options):
	''' Parse the squid.conf file for refresh patterns and return '''

	import regex, string, sys
	from string import atoi
	
	# patterns is a list of lists that each have the following format:
	# [human_readable_name, compiled_pattern, min, percent, max, (options)]
	patterns = []

	# NOTE FOR VERSION ONE FORMAT CONFIGS:
	# there is a bit of screwy stuff with precedence, warn users.
	# (because of how Conf works, entries within each type (case
	# sensitive or insensitive) in the same order as the file,
	# but the two types of entries are not in the same relationship;
	# case-insensitive entries are first, then case-sensititve.)

	if options['-n'] == 1:	# squid.conf is in version 1 format
		for line in config.refresh_pattern_i:
			if not line: continue
			try:
				patterns.append((	line[0] + ' i',
									regex.compile(line[0], regex.casefold), 
									atoi(line[1]), 
									atoi(line[2][:-1]), 
									atoi(line[3]),
									()
								))
			except:
				sys.stderr.write('WARNING: trouble parsing squid.conf refresh_pattern/i line: %s\n' % (line[0]))
				continue	
		for line in config.refresh_pattern:
			if not line: continue
			try:
				patterns.append((	line[0], 
									regex.compile(line[0]), 
									atoi(line[1]), 
									atoi(line[2][:-1]), 
									atoi(line[3]),
									()
								))
			except:
				sys.stderr.write('WARNING: trouble parsing squid.conf refresh_pattern line: %s\n' % (line[0]))
				continue

	else:							# squid.conf is in version 2 format
		for line in config.refresh_pattern:
			if not line: continue
			case_flag = ''				# visual indicator
			if line[0] == '-i':		# case-insensative flag is present
				line_regex = regex.compile(line[1], regex.casefold)
				del line[0]			# get rid of the regex from the array
				case_flag = 'i'
			elif line[0] == '+i':		# case-sensative flag is present
				line_regex = regex.compile(line[1])
				del line[0]			# get rid of the regex from the array
			else:
				line_regex = regex.compile(line[0])
			try:
				patterns.append((	line[0] + ' ' + case_flag,
									line_regex,
									atoi(line[1]),
									atoi(line[2][:-1]),
									atoi(line[3]),
									(line[4:])
								))
			except:
				sys.stderr.write('WARNING: trouble parsing squid.conf refresh_pattern line: %s\n' % (line[0]))
				continue
	return patterns



def SquidConfigParse(squidconf):
	''' parse the squidconf file and return an object with its attrs '''

	import Conf
	from string import split
	from os import path

	### find the squid base directory.	
	squidconfdir = path.split(squidconf)[0]
	squid_dir = path.split(squidconfdir)[0]
	
	### find the configuration file.
	try:
		config = Conf.ConfReader(squidconf)
	except IOError, (errnum, reason):
		sys.stderr.write("Can't read %s: %s\n" % (squidconf, reason))
		sys.exit(1)

	### set up parameters for the configuration lines we're interested
	### in. Note that the relationship between defaults and postprocessing
	### is important.	
	config.config_separator = None
	config.set('cache_access_log', default=path.join(squid_dir, 'logs/access.log'))
	config.set('cache_log', default=path.join(squid_dir, 'logs/cache.log'))
	config.set('cache_store_log', default=path.join(squid_dir, 'logs/store.log'))
	config.set('emulate_httpd_log', default='0', post=Conf.yesno)
	config.set('refresh_pattern', default='. 20% 4320', post=split, list=1)	
	config.set('refresh_pattern/i', default='', post=split, list=1)
	config.set('cache_stoplist', default='cgi-bin ?', post=split)
	config.set('cache_stoplist_pattern', default='',  post=split)

	### parse the configuration file (well, try to...)
	try:
		config.parse()
	except NameError, missing_one:
		sys.stderr.write("Missing Squid Config: %s\n" % (missing_one))
		sys.exit(1)
	except ValueError, convert_problem:
		sys.stderr.write("Conversion Error on line %s: %s\n" % (config.config_line_number, convert_problem))

	### return an object that has the desired configuration as attributes.			
	return config	



def ParseGetOpt():
	''' Parse command line options and return. '''

	import sys, getopt, string
	
	options = {}
	try:
		(optlist, args) = getopt.getopt(sys.argv[1:], 'ac:hio:ns:')
	except getopt.error:
		usage()
	for pair in optlist:
		options[pair[0]] = pair[1]

	# help
	if options.has_key('-h'): usage()

	# specify config file
	if not options.has_key('-c'):
		options['-c'] = '/usr/local/squid/etc/squid.conf'

	# specify origin server to limit to
	if options.has_key('-o'):
		options['-o'] = string.lower(options['-o'])

	# specify squid version
	if options.has_key('-n'):
		options['-n'] = 1
	else:
		options['-n'] = 2

	# specify start time
	if options.has_key('-s'):
		try:
			options['-s'] = int(options['-s'])
		except ValueError:
			usage()
	return(options)




if __name__ == '__main__':
	try:
		main()
	except KeyboardInterrupt:
		sys.stderr.write("INTERRUPT\n")
		sys.exit(0)
