#! /usr/bin/env php
<?php
/*
 * lire :: A partial re-implementation of Lire from the formerlogwatch.org
 *
 * This reimplementation only provides the Web server log analysis previously
 * provided by Lire.
 *
 * Version 1.1.0  January 12, 2026
 * Copyright (c) 2025-2026, Ron Guerin <ron@vnetworx.net>
 *
 * Requires: PHP_PCRE
 * Suggests: PHP-OpenSSL, PHP-POSIX, tput, id
 *
*/

error_reporting(E_ALL);
ini_set('display_errors', 1);
define('VERSION', '1.1.0');
define('MEPATH', realpath($argv[0]));
define('MEHOST', hostname(TRUE));
$me = basename(__FILE__);
define('ME', (substr($me, -4) == '.php') ? substr($me, 0, strlen($me) - 4) : $me);
$timezone = restore_standard_timezone_policy();
define('VERSIONSTAMP', date('F j, Y H:i:s', filemtime(MEPATH)));
openlog(ME, LOG_PID, LOG_USER); // Open syslog
if (function_exists('cli_set_process_title')) cli_set_process_title(ME); // set proctitle
ini_set('precision', 15);

$days = 1;
$found = $skip = FALSE;
// Parse command-line, early
foreach ($argv as $index => $arg) {
	if (! $index) continue; // skip $argv[0]
	if ($skip) {
		$skip = FALSE;
		continue;
	}
	$argbase = ($pos = strpos($arg, '=')) ? substr($arg, 0, $pos) : $arg;
	switch ($argbase) {
		case '-V':
		case '--version':
			echo VERSION."\n";
			exit;
			break;
		case '?':
		case '-?':
		case '-h':
		case '--help':
		case 'help':
			help();
			exit;
			break;
		case '-w':
		case '--week':
			$days = 7;
			break;
		default:
			$found = TRUE;
			break;
	}
}

if (is_readable('/etc/'.ME.'.conf.php')) require_once '/etc/'.ME.'.conf.php';

define('DEBUG', (isset($debug) && ($debug === TRUE)) ? TRUE : FALSE);
define('STAMP', (isset($stamp) && $stamp === TRUE) ? TRUE : FALSE);
define('LOCKWAIT', (isset($proclockwait)) ? $proclockwait : 180); // seconds
define('LOCKFILE', (isset($lockfile)) ? $lockfile : '/var/run/'.ME.'.lock');
define('SYSLOG', ((! isset($syslog)) || (isset($syslog) && ($syslog !== FALSE))) ? TRUE : FALSE);
define('WEBLOG', (isset($weblog)) ? $weblog : '/var/log/apache2/access.log');
define('DATADIR', (isset($datadir)) ? $datadir : '/var/local/lib/'.ME);
define('INTERVAL', ($days == 1) ? 'daily' : 'weekly');
if (! isset($mailto)) $mailto = 'root';
if (! isset($mailtoname)) $mailtoname = ME.' Administrator';
define('MAILTO', '"'.$mailtoname.'" <'.$mailto.'>');
define('MAILFROMNAME', (isset($mailfromname)) ? $mailfromname : 'Lire');
if (isset($mailfrom) && (! strstr($mailfrom, '@'))) $mailfrom .= '@'.php_uname('n');
else $mailfrom = 'lire@'.php_uname('n');
define('MAILFROM', '"'.MAILFROMNAME.'" <'.$mailfrom.'>');
$hostname = (isset($fqdn) && ($fqdn == TRUE)) ? hostname(TRUE) : hostname();
define('MAILSUBJECT', (isset($mailsubject)) ? $mailsubject : $hostname.' '.INTERVAL.' Web server report');
define('TOPN', (isset($topn)) ? $topn : 10); // Number of top items to show in reports

// Initialize data structures
$data = ['requestsbyday' => array(), 'requestsbyhour' => array(), 'bytesbyday' => array(), 'sessionsbyday' => array(),
	'requestsbysize' => ['[0-1k>' => 0, '[1k-6k>' => 0,
		'[6k-31k>' => 0, '[31k-156k>' => 0, '[156k-781k>' => 0, '[781k+>' => 0],
	'bytesbydirectory' => array(), 'requestsbyhost' => array(), 'bytesbyhost' => array(),
	'requestsbyurl' => array(), 'browsers' => array(), 'os' => array(), 	'httpmethods' => array(),
	'httpversions' => array(), 'httpstatus' => array(), 'referers' => array(), 'referringsites' => array(),
	'sessions' => array(), 'firstpages' => array(), 'lastpages' => array()];

$totalrequests = $totalbytes = 0;

// Obtain process lock
$started = time();
while (! ($LOCK = lock_pid(FALSE, $unwritable))) {
	if ($unwritable) break;
	usleep(mt_rand(100000, 299999));
	if (time() >= ($started + LOCKWAIT)) break;
}
if (! $LOCK) {
	if ($unwritable) {
		@fwrite(STDERR, 'Error: Lockfile '.$unwritable.' is not writeable by this user ('
			.posix_getpwuid(posix_geteuid())['name'].')'."\n");
		exit(1);
	}
	else {
		echo 'Timeout waiting for '.ME.' process lock.'."\n";
		exit(1);
	}
}

// Get log files matching pattern
$logfiles = glob(WEBLOG.'*');
if ((! $logfiles) || (count($logfiles) == 0)) {
	$subject = 'Error running '.ME;
	$message = 'No log files found for: '.WEBLOG."\n";
	mail_send(MAILFROM, MAILTO, $subject, $message, FALSE);
	exit(1);
}

// Sort files by modification time (oldest first)
usort($logfiles, function($a, $b) { return filemtime($a) - filemtime($b); });

// Calculate cutoff time N days before start of today, with detected timezone
$logtimezone = timezone_from_logfiles($logfiles, $timezone); // get timezone of logs from logfiles
$today = new DateTimeImmutable('today', $logtimezone);
$start = $today->modify('-'.$days.' days'); // start at 00:00:00 yesterday
$end = $today->modify('-1 second');  // end at 23:59:59 yesterday
$starttime = $start->getTimestamp();
$endtime = $end->getTimestamp();
$filename = DATADIR.'/'.$end->format('Y-m-d').'.json';

// Filter files that could contain data in our range
$files = array();
foreach ($logfiles as $file) {
	$mtime = filemtime($file);
	if ($mtime >= $start->getTimestamp()) $process[] = $file;
}

foreach ($process as $file) {
	if (! process_log_file($file, $start, $end, $data, $totalrequests, $totalbytes)) {
		$subject = 'Error running '.ME;
		$message = 'Unable to open: '.$file."\n";
		mail_send(MAILFROM, MAILTO, $subject, $message, FALSE);
		exit(1);
	}
}

// Count sessions by day after all files processed
foreach ($data['sessions'] as $session) increment($data['sessionsbyday'], $session['day']);

if ($days == 1) save_json_data($filename, $data, $totalrequests, $totalbytes, $end); // save JSON data
$report = generate_report($data, $totalrequests, $totalbytes, $start, $end);
send_report($report, $end);

exit;


####################################################################################################################################
####################################################################################################################################


function save_json_data($filename, $data, $totalrequests, $totalbytes, $reportdate) {
	$jsondata = ['date' => $reportdate->format('Y-m-d'), 'totalrequests' => $totalrequests,
		'totalbytes' => $totalbytes, 'data' => $data];
	$json = json_encode($jsondata, JSON_PRETTY_PRINT);

	// Create a temporary JSON file
	$temp = tempnam(sys_get_temp_dir(), 'json_');
	if (file_put_contents($temp, $json) === FALSE) {
		syslog(LOG_ERR, 'Failed to write temporary JSON file');
		return FALSE;
	}

	// Compress to gzip
	$gz = gzopen($filename.'.gz', 'wb9');
	if ($gz === FALSE) {
		syslog(LOG_ERR, 'Failed to create gzip file: '.$filename);
		unlink($tempJson);
		return FALSE;
	}

	gzwrite($gz, $json);
	gzclose($gz);

	// Clean up temporary file
	unlink($temp);

	return TRUE;
}

function parse_log_line($line) {
	// More strict pattern - validate HTTP method and protocol
	$pattern = chr(7).'^(\S+) \S+ \S+ \[([^\]]+)\] "(GET|POST|HEAD|PUT|DELETE'
		.'|OPTIONS|PATCH|CONNECT|TRACE) ([^"]*?) (HTTP/[0-9.]+)" (\d+) (\S+) "([^"]*)" "([^"]*)"'.chr(7);
	if (preg_match($pattern, $line, $matches)) {
		return ['ip' => $matches[1], 'datetime' => $matches[2], 'method' => $matches[3], 'url' => $matches[4],
			'protocol' => $matches[5], 'status' => (int)$matches[6], 'bytes' => $matches[7] === '-' ? 0 : (int)$matches[7],
			'referer' => $matches[8], 'useragent' => $matches[9]];
	}
	// Optionally log malformed lines if DEBUG is enabled
	if (DEBUG) syslog(LOG_ERR, 'Malformed log line: '.substr($line, 0, 200));
	return NULL;
}

function timezone_from_logfiles($logfiles, $timezone) {
	// Detect timezone from first log entry and convert offset to named timezone
	$logtimezone = new DateTimeZone($timezone); // fallback
	foreach ($logfiles as $file) {
		$fp = open_log_file($file);
		if ($fp) {
			while (($line = read_log_line($fp, substr($file, -3) === '.gz')) !== FALSE) {
				$entry = parse_log_line($line);
				if ($entry) {
					$dt = parse_datetime($entry['datetime']);
					if ($dt) {
						// Get the offset from the log entry
						$offset = $dt->getOffset();
						$timestamp = $dt->getTimestamp();

						// Find the named timezone that matches this offset at this timestamp
						foreach (DateTimeZone::listIdentifiers() as $tzid) {
							$tz = new DateTimeZone($tzid);
							$testdt = new DateTime('@'.$timestamp);
							$testdt->setTimezone($tz);
							if ($testdt->getOffset() == $offset) {
								$logtimezone = $tz;
								break 3; // Break out of all loops
							}
						}
					}
				}
			}
			close_log_file($fp, substr($file, -3) === '.gz');
		}
	}
	return $logtimezone;
}

function parse_datetime($datetime_str, $timezone=NULL) {
	// Format: 06/Oct/2023:09:15:30 -0400
	$dt = DateTime::createFromFormat('d/M/Y:H:i:s O', $datetime_str);

	// If named timezone is provided, convert to it
	if ($dt && $timezone instanceof DateTimeZone) $dt->setTimezone($timezone);
	return $dt;
}

function categorize_size($bytes) {
	if ($bytes < 1024) return '[0-1k>';
	if ($bytes < 6144) return '[1k-6k>';
	if ($bytes < 31744) return '[6k-31k>';
	if ($bytes < 159744) return '[31k-156k>';
	if ($bytes < 798720) return '[156k-781k>';
	return '[781k+>';
}

function extract_directory($url) {
	$parts = parse_url($url);
	$path = isset($parts['path']) ? $parts['path'] : '/';
	if ($path === '/') return '/';
	$dir = dirname($path);
	return $dir === '.' ? '/' : $dir;
}

function extract_browser($ua) {
	if (preg_match(chr(7).'bot|crawl|slurp|spider|bingpreview|facebookexternalhit|wget|curl'.chr(7), $ua)) return 'Bot';
	if ((stripos($ua, 'Edg/') !== FALSE) || (stripos($ua, 'Edge/') !== FALSE)) return 'Edge';
	if ((stripos($ua, 'Opr/') !== FALSE) || (strpos($ua, 'Opera') !== FALSE)) return 'Opera';
	if (stripos($ua, 'Chrome/') !== FALSE) return 'Chrome';
	if (stripos($ua, 'Firefox/') !== FALSE) return 'Firefox';
	if (stripos($ua, 'Safari/') !== FALSE) return 'Safari';
	if ((stripos($ua, 'MSIE') !== FALSE) || (stripos($ua, 'Trident/') !== FALSE)) return 'IE';
	if (stripos($ua, 'Mozilla') !== FALSE) return 'Navigator';
	return 'Unknown';
}

function extract_os($ua) {
	if (stripos($ua, 'Windows') !== FALSE) return 'Windows NT';
	if (stripos($ua, 'Mac OS') !== FALSE || stripos($ua, 'Macintosh') !== FALSE) return 'MacOS';
	if (stripos($ua, 'Linux') !== FALSE) return 'Linux';
	if (stripos($ua, 'Android') !== FALSE) return 'Android';
	if (stripos($ua, 'iPhone') !== FALSE || stripos($ua, 'iPad') !== FALSE) return 'iOS';
	return 'Other';
}

function extract_referring_site($referer) {
	if (($referer === '-') || empty($referer)) return NULL;
	$parts = parse_url($referer);
	return (isset($parts['host'])) ? $parts['host'] : NULL;
}

function format_bytes($bytes) {
	if ($bytes >= 1073741824) return round($bytes / 1073741824, 1).'G';
	if ($bytes >= 1048576) return round($bytes / 1048576, 1).'M';
	if ($bytes >= 1024) return round($bytes / 1024, 1).'k';
	return $bytes;
}

function increment(&$array, $key, $amount=1) {
	// Increment counter in associative array
	if (! isset($array[$key])) $array[$key] = 0;
	$array[$key] += $amount;
}

function top_n($array, $n, $by_value=TRUE) {
	// Sort and limit array
	if ($by_value) arsort($array);
	else ksort($array);
	return array_slice($array, 0, $n, TRUE);
}

function percent($part, $total) {
	// Calculate percentage
	if ($total == 0) return 0;
	return round(($part / $total) * 100, 1);
}

function get_file_mtime($filename) {
	return filemtime($filename);
}

function open_log_file($filename) {
	if (substr($filename, -3) === '.gz') return gzopen($filename, 'r');
	return fopen($filename, 'r');
}

function read_log_line($fp, $isgz) {
	if ($isgz) return gzgets($fp);
	return fgets($fp);
}

function close_log_file($fp, $isgz) {
	if ($isgz) gzclose($fp);
	else fclose($fp);
}

function process_log_file($filename, $starttime, $endtime, &$data, &$totalrequests, &$totalbytes) {
	// Process uncompressed or gzipped log file, return data by reference
	$isgz = (substr($filename, -3) === '.gz');
	$fp = open_log_file($filename);

	if (! $fp) {
		echo 'Warning: Cannot open log file: '.$filename."\n";
		return FALSE;
	}

	$filerequests = 0;

	while (($line = read_log_line($fp, $isgz)) !== FALSE) {
		$entry = parse_log_line($line);
		if (! $entry) continue;

		$dt = parse_datetime($entry['datetime']);
		if (! $dt) continue;  // Skip if datetime parsing failed
		if (($dt < $starttime) || ($dt >= $endtime)) continue;
		$totalrequests++;
		$totalbytes += $entry['bytes'];
		$filerequests++;

		// Aggregate by day
		$day = $dt->format('Y-m-d');
		increment($data['requestsbyday'], $day);
		increment($data['bytesbyday'], $day, $entry['bytes']);

		// Aggregate by hour
		$hour = $dt->format('H:00');
		increment($data['requestsbyhour'], $hour);

		// Request size
		$size_cat = categorize_size($entry['bytes']);
		increment($data['requestsbysize'], $size_cat);

		// Directory
		$dir = extract_directory($entry['url']);
		increment($data['bytesbydirectory'], $dir, $entry['bytes']);

		// Client hosts
		increment($data['requestsbyhost'], $entry['ip']);
		increment($data['bytesbyhost'], $entry['ip'], $entry['bytes']);

		// URLs
		increment($data['requestsbyurl'], $entry['url']);

		// Browser and OS
		$browser = extract_browser($entry['useragent']);
		increment($data['browsers'], $browser);

		$os = extract_os($entry['useragent']);
		increment($data['os'], $os);

		// HTTP method and version
		increment($data['httpmethods'], $entry['method']);
		increment($data['httpversions'], $entry['protocol']);
		increment($data['httpstatus'], $entry['status']);

		// Referers
		if (($entry['referer'] !== '-') && (! empty($entry['referer']))) {
			increment($data['referers'], $entry['referer']);
			$site = extract_referring_site($entry['referer']);
			if ($site) increment($data['referringsites'], $site);
		}

		// Sessions (simplified - group by IP and day)
		$sessionkey = $entry['ip'].'_'.$day;
		if (! isset($data['sessions'][$sessionkey])) {
			$data['sessions'][$sessionkey] = ['ip' => $entry['ip'], 'day' => $day,
				'firstpage' => $entry['url'], 'lastpage' => $entry['url'], 'count' => 0];
		}
		$data['sessions'][$sessionkey]['lastpage'] = $entry['url'];
		$data['sessions'][$sessionkey]['count']++;

		// Track first and last pages
		increment($data['firstpages'], $data['sessions'][$sessionkey]['firstpage']);
		$data['lastpages'][$entry['url']] = ($data['lastpages'][$entry['url']] ?? 0) + 1;
	}

	close_log_file($fp, $isgz);

	return TRUE;
}

function generate_report($data, $totalrequests, $totalbytes, $starttime, $endtime) {
	$report = 'Report generated: '.date('Y-m-d H:i:s T')."\n";
	$report = 'Report generated: '.(new DateTimeImmutable('now', $starttime->getTimezone()))->format('Y-m-d H:i:s T')."\n";
	$report .= 'Reporting on period: '.$starttime->format('Y-m-d H:i:s T').' - '.$endtime->format('Y-m-d H:i:s T')."\n\n";

	$report .= str_repeat(' ', 29).'Activity Reports'."\n";
	$report .= str_repeat(' ', 29).'----------------'."\n\n";

	// Requests by day
	$report .= '  Number of Requests Served by 1d Period'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Period', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	foreach ($data['requestsbyday'] as $day => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $day, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// Bytes by day
	$report .= '  Total Size of Requests Served By 1d Period'."\n\n";
	$report .= sprintf('  %-58s %5s %7s'."\n", 'Period', 'Bytes', '% Total');
	$report .= '  '.str_repeat('-', 58).' '.str_repeat('-', 5).' '.str_repeat('-', 7)."\n";
	foreach ($data['bytesbyday'] as $day => $bytes) {
		$report .= sprintf('  %-58s %5s %7.1f'."\n", $day, format_bytes($bytes), percent($bytes, $totalbytes));
	}
	$report .= '  '.str_repeat('-', 58).' '.str_repeat('-', 5).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-58s %5s %7.1f'."\n\n\n", 'Total for '
		.$totalrequests.' records', format_bytes($totalbytes), 100.0);

	// Sessions by day
	$report .= '  User Sessions By 1d Period'."\n\n";
	$report .= sprintf('  %-57s %8s %5s'."\n", 'Period', 'Sessions', '%');
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 8).' '.str_repeat('-', 5)."\n";
	$totalsessions = array_sum($data['sessionsbyday']);
	foreach ($data['sessionsbyday'] as $day => $count) {
		$report .= sprintf('  %-57s %8d %5.1f'."\n", $day, $count, percent($count, $totalsessions));
	}
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 8).' '.str_repeat('-', 5)."\n";
	$report .= sprintf('  %-57s %8d %5.1f'."\n\n\n", 'Total for '.$totalsessions.' records', $totalsessions, 100.0);

	// Requests by hour
	$report .= '  Number of Requests Served by 1h Timeslot'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Timeslot', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	ksort($data['requestsbyhour']);
	foreach ($data['requestsbyhour'] as $hour => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $hour, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// Request size distribution
	$report .= '  Number of Requests by Request\'s Size'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Size', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	foreach ($data['requestsbysize'] as $size => $count) {
		if ($count > 0) $report .= sprintf('  %-55s %8d %7.1f'."\n", $size, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// Top directories by bytes
	$report .= '  Total size of requests served by directory, Top '.TOPN."\n\n";
	$report .= sprintf('  %-57s %6s %7s'."\n", 'Directory', 'Bytes', '% Total');
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 6).' '.str_repeat('-', 7)."\n";
	$topdirs = top_n($data['bytesbydirectory'], TOPN);
	foreach ($topdirs as $dir => $bytes) {
		$report .= sprintf('  %-57s %6s %7.1f'."\n", $dir, format_bytes($bytes), percent($bytes, $totalbytes));
	}
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 6).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-57s %6s %7.1f'."\n\n\n", 'Total for '
		.$totalrequests.' records', format_bytes($totalbytes), 100.0);

	$report .= str_repeat(' ', 29).'Visitors Reports'."\n";
	$report .= str_repeat(' ', 29).'----------------'."\n\n";

	// Top hosts by requests
	$report .= '  Number of Requests by Client Hosts, Top '.TOPN."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Client Host', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$tophosts = top_n($data['requestsbyhost'], TOPN);
	foreach ($tophosts as $host => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $host, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// Top hosts by bytes
	$report .= '  Total size of requests by Client Hosts, Top '.TOPN."\n\n";
	$report .= sprintf('  %-57s %6s %7s'."\n", 'Client Host', 'Bytes', '% Total');
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 6).' '.str_repeat('-', 7)."\n";
	$tophostsbytes = top_n($data['bytesbyhost'], TOPN);
	foreach ($tophostsbytes as $host => $bytes) {
		$report .= sprintf('  %-57s %6s %7.1f'."\n", $host, format_bytes($bytes), percent($bytes, $totalbytes));
	}
	$report .= '  '.str_repeat('-', 57).' '.str_repeat('-', 6).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-57s %6s %7.1f'."\n\n\n", 'Total for '
		.$totalrequests.' records', format_bytes($totalbytes), 100.0);

	$report .= str_repeat(' ', 26).'Accessed Pages Reports'."\n";
	$report .= str_repeat(' ', 26).'----------------------'."\n\n";

	// Most requested pages
	$report .= '  Most Requested Pages, Top '.TOPN."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'URL', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$topurls = top_n($data['requestsbyurl'], TOPN);
	foreach ($topurls as $url => $count) {
		$displayurl = strlen($url) > 55 ? substr($url, 0, 52).'...' : $url;
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $displayurl, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	$report .= str_repeat(' ', 29).'Session Reports'."\n";
	$report .= str_repeat(' ', 29).'---------------'."\n\n";

	// First pages
	$report .= '  First Page In User Session, Top TOPN'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'First Page', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$topfirst = top_n($data['firstpages'], TOPN);
	foreach ($topfirst as $page => $count) {
		$displaypage = strlen($page) > 55 ? substr($page, 0, 52).'...' : $page;
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $displaypage, $count, percent($count, $totalsessions));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalsessions.' records', $totalsessions, 100.0);

	$report .= str_repeat(' ', 22).'Browsers and Platforms Reports'."\n";
	$report .= str_repeat(' ', 22).'------------------------------'."\n\n";

	// Browsers
	$report .= '  Top '.TOPN.' Requests By Browser'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Browser', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$topbrowsers = top_n($data['browsers'], TOPN);
	foreach ($topbrowsers as $browser => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $browser, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// Operating systems
	$report .= '  Top '.TOPN.' Requests By Operating System'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'OS', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$topos = top_n($data['os'], TOPN);
	foreach ($topos as $os => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $os, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	$report .= str_repeat(' ', 22).'Search Engines and "Referers"'."\n";
	$report .= str_repeat(' ', 22).'----------------------------'."\n\n";

	// Referring sites
	$report .= '  Top '.TOPN.' Referring Sites'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Referring Site', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$toprefsites = top_n($data['referringsites'], TOPN);
	$totalrefs = array_sum($data['referringsites']);
	foreach ($toprefsites as $site => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $site, $count, percent($count, $totalrefs));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrefs.' records', $totalrefs, 100.0);

	$report .= str_repeat(' ', 28).'Technical Reports'."\n";
	$report .= str_repeat(' ', 28).'-----------------'."\n\n";

	// HTTP methods
	$report .= '  Requests By HTTP Method'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Method', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	foreach ($data['httpmethods'] as $method => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $method, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// HTTP versions
	$report .= '  Requests By HTTP Protocol Version'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Protocol', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	foreach ($data['httpversions'] as $version => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $version, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	// HTTP status codes
	$report .= '  Requests By HTTP Result'."\n\n";
	$report .= sprintf('  %-55s %8s %7s'."\n", 'Result Code', 'Requests', '% Total');
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	ksort($data['httpstatus']);
	foreach ($data['httpstatus'] as $status => $count) {
		$report .= sprintf('  %-55s %8d %7.1f'."\n", $status, $count, percent($count, $totalrequests));
	}
	$report .= '  '.str_repeat('-', 55).' '.str_repeat('-', 8).' '.str_repeat('-', 7)."\n";
	$report .= sprintf('  %-55s %8d %7.1f'."\n\n", 'Total for '.$totalrequests.' records', $totalrequests, 100.0);

	return $report;
}

function send_report($report, $reporttime) {
	$filename = MEHOST.'_'.INTERVAL.'_report_'.$reporttime->format('Y-m-d').'.txt';
	$attachment = chunk_split(base64_encode($report), 70);
	$boundary = md5(time());
	$headers = 'From: '.MAILFROM."\r\n";
	$headers .= 'MIME-Version: 1.0'."\r\n";
	$headers .= 'Content-Type: multipart/mixed; boundary="'.$boundary.'"'."\r\n";
	$message = '--'.$boundary."\r\n";
	$message .= 'Content-Type: text/plain; charset=UTF-8'."\r\n";
	$message .= 'Content-Transfer-Encoding: 7bit'."\r\n\r\n";
	$message .= $report."\r\n";
	$message .= '--'.$boundary."\r\n";
	$message .= 'Content-Type: text/plain; name="'.$filename.'"'."\r\n";
	$message .= 'Content-Transfer-Encoding: base64'."\r\n";
	$message .= 'Content-Disposition: attachment; filename="'.$filename.'"'."\r\n\r\n";
	$message .= $attachment."\r\n";
	$message .= '--'.$boundary.'--';
	return mail_send(MAILFROM, MAILTO, MAILSUBJECT, $message, FALSE, $headers);
}

function lock_pid($lockpath=FALSE, &$unwritable=FALSE) {
	if (! $lockpath) $lockpath = LOCKFILE;
	if (file_exists($lockpath) && (! is_writable($lockpath))) {
		$unwritable = $lockpath;
		return FALSE;
	}
	if (! $lock = @fopen($lockpath, 'c+')) return FALSE;
	if (! flock($lock, LOCK_EX | LOCK_NB)) return FALSE;
	ftruncate($lock, 0);
	rewind($lock);
	fwrite($lock, getmypid());
	return $lock;
}

function mail_send($from, $to, $subject, $content, $file='', $headers=array()) {
	// e-mails $content, returns Message-ID or FALSE. Meant to be self-contained
	if (strtolower(substr(PHP_OS, 0, 6)) == 'win') {
		$options = '';
		$content = str_replace("\n.", "\n..", $content);
	}
	else $options = '-f'.$from;
	if (! is_array($headers)) {
		$hdrs = explode("\r\n", $headers);
		$headers = array();
		foreach ($hdrs as $hdr) {
			$pos = strpos($hdr, ':');
			if ($pos === FALSE) continue;
			$key = trim(substr($hdr, 0, $pos));
			$data = trim(substr($hdr, $pos + 1));
			if (trim($key)) $headers[$key] = $data;
		}
	}
	$sf = ini_get('sendmail_from');
	ini_set('sendmail_from', $from);
	$token = FALSE; // Token does not need to be secure
	if (function_exists('random_bytes')) $token = @random_bytes(8); // use builtin
	elseif (function_exists('openssl_random_pseudo_bytes')) $token = @openssl_random_pseudo_bytes(8); // use extension
	elseif (is_executable('/usr/bin/openssl')) { // shell out to command
		$handle = popen('/usr/bin/openssl rand 8', 'r');
		$token = stream_get_contents($handle);
		pclose($handle);
	}
	if ($token == '') // give up on 'secure' token; shuffle some characters around, it's good enough for a Message-ID
		$token = str_shuffle(substr(str_shuffle('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), 0, 8));
	$messageid = sprintf('%s.%s@%s', @base_convert(microtime(), 10, 36),
		@base_convert(bin2hex($token), 16, 36), php_uname('n'));
	if (! array_key_exists('From', $headers)) $headers['From'] = $from;
	$headers['X-Mailer'] = ((isset($GLOBALS['argv']))
		? basename($GLOBALS['argv'][0]) : basename($_SERVER['SCRIPT_NAME'])).' .v'.VERSION;
	$headers['Message-ID'] = '<'.$messageid.'>';
	$message = "\r\n";
	if (is_array($content)) $content = implode("\r\n", $content);
	if ($content) $message .= $content."\r\n";
	if ($file) {
		if (! is_readable($file)) {
			@fwrite(STDERR, 'Error: File "'.$file.'" does not exist or is not readable.'."\n");
			return FALSE;
		}
		if ($content) $message .= "\r\n";
		if ($handle = fopen($file, 'r')) {
			while (($line = fgets($handle)) !== FALSE) $message .= trim($line)."\r\n";
			fclose($handle);
		}
	}
	$mailed = mail($to, $subject, $message, $headers, $options);
	ini_set('sendmail_from', $sf);
	return ($mailed) ? $messageid : FALSE;
}

function hostname($fqdn=FALSE) {
	$hostname = php_uname('n');
	return ($fqdn) ? $hostname : substr($hostname, 0, strpos($hostname, '.'));
}

function restore_standard_timezone_policy(&$timezone=FALSE) {
	// Being explicitly told what the timezone is, is not a "guess" to be ignored.
	// Make PHP work correctly by again following decades long conventions.
	// * Use the explicitly provided timezone data *
	// 1. If application chooses a timezone, use that.
	// 2. Else, if the user's TZ if set, this takes priority.
	// 3. Else, if user has not set their TZ, fall back to the system's time zone.
	// 4. Else, if cannot find system timezone, fall back to UTC
	if (! $timezone) {
		$notset = TRUE;
		$timezone = 'UTC';
		$TZ = getenv('TZ');
		if ($TZ !== FALSE) {
			if (in_array($TZ, DateTimeZone::listIdentifiers())) {
				$notset = FALSE;
				$timezone = $TZ;
			}
			else {
				$error = 'Error: Invalid timezone: '.$TZ;
				if (function_exists('error')) error($error);
				else @fwrite(STDERR, $error."\n");
			}
		}
		if (! stristr(PHP_OS_FAMILY, 'windows')) {
			if ($notset && (file_exists('/etc/timezone'))) {
				// Debian / Ubuntu
				$data = file_get_contents('/etc/timezone');
				if ($data) {
					$notset = FALSE;
					$timezone = trim($data);
				}
			}
			if ($notset && file_exists('/etc/sysconfig/clock')) {
				// RHEL / CentOS
				$data = parse_ini_file('/etc/sysconfig/clock');
				if (! empty($data['ZONE'])) {
					$notset = FALSE;
					$timezone = $data['ZONE'];
				}
			}
			if ($notset && is_link('/etc/localtime')) {
				// Mac OSX (and older Linuxes)
				// /etc/localtime is a symlink to the timezone in /usr/share/zoneinfo or /var/db/timezone/zoneinfo
				$filename = readlink('/etc/localtime');
				if (strpos($filename, '/var/db/timezone/zoneinfo/') === 0) $timezone = substr($filename, 26);
				if (strpos($filename, '/usr/share/zoneinfo/') === 0) $timezone = substr($filename, 20);
			}
		}
		else { // Running under Windows
			$tz = exec('tzutil.exe /g', $out, $err);
			if (! $err) $timezone = intltz_get_id_for_windows_id($tz);
		}
	}
	else {
		if (! in_array($timezone, DateTimeZone::listIdentifiers())) {
			$error = 'Error: Invalid timezone: '.$timezone;
			if (function_exists('error')) error($error);
			else @fwrite(STDERR, $error."\n");
			$timezone = 'UTC';
		}
	}
	return (date_default_timezone_set($timezone)) ? $timezone : FALSE;
}

function formatstr($str, $cols=FALSE) {
	if (defined('COLUMNS') && (! $cols)) $cols = COLUMNS;
	return ($cols) ? wordwrap($str, $cols) : $str;
}

function terminal_init(&$rows=FALSE) {
	// 'stty -a'	 works, needs to parse: speed 38400 baud; rows 49; columns 167; line = 0;
	//	'tput cols'   works, but sometimes in the past, has not
	//	'resize'	  works, not commonly installed, needs to parse: COLUMNS=167;\nLINES=48;\nexport COLUMNS LINES;\n
	$rows = FALSE; $cols = FALSE;
	$out = array(); $return = 0;
	exec('stty -a 2>/dev/NULL', $out, $return);
	if ($return == 0) {
		$out = strtolower(implode("\n", $out));
		if (FALSE !== preg_match_all(chr(7).'rows.([0-9]+);.columns.([0-9]+);'.chr(7), $out, $matches)) {
			$rows = $matches[1][0];
			$cols = $matches[2][0];
		}
	}
	if ($cols == FALSE) {
		if (! (FALSE == exec('bash -c \'echo -e "lines\ncols"|tput -S\'', $out))) {
			$rows = $matches[1][0];
			$cols = $matches[2][0];
		}
	}
	if (! $cols) $cols = 80;
	if (! defined('COLUMNS')) define('COLUMNS', $cols);
	if ($rows && (! defined('ROWS'))) define('ROWS', $rows);
	return $cols;
}

function help($stderr=FALSE) {
	terminal_init();
	$out = ($stderr === FALSE) ? STDOUT : STDERR;	$str = ME.' v. '.VERSION;
	$str .= ' is a partial reimplementation of Lire/logreport.org for Web analysis.  '
		.ME.' performs the Web server log analysis function of Lire.'."\n";
	@fwrite($out, formatstr($str."\n", COLUMNS));
	@fwrite($out, formatstr('Usage: '.ME.' [options]'."\n", COLUMNS));
	@fwrite($out, formatstr("\n".'Options:'."\n", COLUMNS));
	@fwrite($out, formatstr('  [-h|--help] (show this help, exit)'."\n", COLUMNS));
	@fwrite($out, formatstr('  [-v|--version] (show version number, exit)'."\n", COLUMNS));
	@fwrite($out, formatstr('  [-w|--week] (show statistics for prior week)'."\n", COLUMNS));
	@fwrite($out, formatstr("\n".'If --week not specified, gives statistics for the prior day.'."\n", COLUMNS));
}
