#!/bin/perl

# WWWstats
#   - a WWW-log analyzing CGI-script
#
# See shared.pl for version information.
#
# Lars M. Garshol - larsga@ifi.uio.no - http://www.ifi.uio.no/~larsga/

# This program is freeware. Please let me know if you modify the code,
# and not just the configuration. 

# The GIF bars were stolen from http://weber.u.washington.edu/~reflux/

# Edit Config.pl to configure the program.

require "cgi.pl";
require "shared.pl";
require $VAR{"config"};

if ($DNSLookup) {
    use Socket;

    if ($DNSFile) {
	dbmopen(%HostHash, "wwwstats.dbm", 0644);
    }
}

# ---------- The program itself ------------------------

if (!$Redirect) { &printCType("text/html"); }

open(INN,$LogFile) ||
  error("Couldn't open logfile $LogFile: $!");

&PrintPageTop("Access statistics $ForString");

while (<INN>)
{	
  if (!/\S/) { next; }          	#Just whitespace, skip

  @felter=split(/ /,$_);      

  #Check to see if this line is OK
  if ($felter[8] != 200)            { next; }   # If HTTP-code not 200
  if ($felter[5] eq '"HEAD')         { next; }
  if (&SkipHost($felter[0]))        { next; }
  if (&SkipPage($felter[6]))        { next; }

  $_ = $felter[6]; 
  s/%7E/~/; 
  s/%7e/~/;
  s/\/$DefaultFile\Z/\//;
  $felter[6] = $_;
  $sider{$felter[6]}+=1;		     #Count accesses per page
  @klokkeslett=split(/:/,$felter[3]);        #Day/MM/YY:Hour:Min:Sec
  $time[$klokkeslett[1]]+=1;

  $TotalHits += 1;
  $TotalByte += $felter[9];
  $LastTime  =  $felter[3];
  if (!$FirstTime) { $FirstTime=$felter[3]; }

  #IP-address?
  $Host=$felter[0];					
  if ($Host =~ /[\d]+\.[\d]+\.[\d]+\.[\d]+/) {
      # Doing DNS lookup
      if ($DNSLookup) {
	  $new=$HostHash{$Host};
	  if (!$new) {
	      $new=gethostbyaddr(pack("C4",split(/\./,$Host)),AF_INET);
	      if ($new eq "") {
		  $HostHash{$Host}="IP";
	      } else {
		  $HostHash{$Host}=$new;
	      }
	  }

	  if ($new eq "IP") {
	      $Host="";
	  } else {
	      $Host=$new;
	  }
      }

      # If we're not doing lookup or if it failed
      if (!$DNSLookup || $Host eq "") {
	  $maskiner{"IP-address"}++;
	  $domener{"IP"}++;
	  $LastHost=$felter[0];
	  next;
      }
  }
  $LastHost=$Host;
  $Host =~ tr/A-Z/a-z/;            # downcase

  # Remove first part of domain name
  $_ = $Host;
  if (!$FullHostName) { s/^[\w\-]+\.//; }

  $maskiner{$_}+=1;                     # count accesses per machine

  if (/\./) {                           # if no . => $_ equals tld already
      /\.([\w]+)$/;                     # $1 now has top-level domain
      $_=$1;
  }

  if (!$Domain{$_}) { next; }           # Unknown domain, skip
  $domener{$_}++;                       # count accesses per top-level domain
}
close INN;

# ------ URL GROUPING -------

foreach $url (keys %sider) {
    if ($Aliases{$url}) {
	$sider{$Aliases{$url}} += $sider{$url};
	delete $sider{$url}; # Removes the alias
    }
}

#------- OVERSKRIFT -------

$LastTime =~ s/\[/ /; #Cosmetics
$FirstTime =~ s/\[/ /;

print "<H3>Total statistics</H3>\n";

print "<TABLE>\n";
print "<TR><TD><STRONG>Number of accesses: </STRONG><TD> $TotalHits\n";
print "<TR><TD><STRONG>Bytes transferred:  </STRONG><TD> $TotalByte\n";
print "<TR><TD><STRONG>First access at:    </STRONG><TD> $FirstTime\n";
print "<TR><TD><STRONG>Last access at:     </STRONG><TD> $LastTime\n";
print "<TR><TD><STRONG>Last host to access:</STRONG><TD> $LastHost\n";

if ($Redirect) {
    print "<TR><TD><STRONG>Report generated:</STRONG><TD> $LastRun\n";
}
print "</TABLE>\n";

#------- PR SIDE -------

print "<H3>Accesses per page</H3>\n\n";

print "<TABLE>\n";
print "<TR><TH>Page<TH>Number of hits\n";

@sortert=sort { $SortDir*$sider{$a} <=> $SortDir*$sider{$b} }
         keys (%sider);

if ($SortDir<0) { $max=$sider{$sortert[0]}; }
else            { $max=$sider{$sortert[$#sortert]}; }

$count=0;
foreach $key (@sortert) {
  if ((($SortDir<0 && $count<$MaxNoFiles) ||
       ($SortDir>0 && $#sortert-$MaxNoFiles<$count)) &&
        $sider{$key}>=$MinHitFile) { 
     $bredde=$Barwidth*$sider{$key}/$max+1;
     print "<TR><TD>";
     if ($LinkToPages) { print "<A HREF=\"$key\">$key</A>"; }
     else              { print "$key";                      }
     print "<TD><IMG SRC=\"$BarImage\" WIDTH=$bredde HEIGHT=10 ALT=\"\">\n";
     print "$sider{$key}\n";
  }
  
  $count++;
} 

print "</TABLE>\n\n\n";

#------- PR MASKIN -------

print "<H3>Accesses per host</H3>\n\n";

print "<TABLE>\n";
print "<TR><TH>Host<TH>Number of hits\n";

@sortert=sort {	$SortDir*$maskiner{$a} <=> $SortDir*$maskiner{$b} }
         keys (%maskiner);
$count=0;
foreach $key (@sortert)
{
  if ((($SortDir<0 && $count<$MaxNoHosts) ||
       ($SortDir>0 && $#sortert-$MaxNoHosts < $count)) &&
        $maskiner{$key}>=$MinHitHost) { 
    print "<TR><TD>$key<TD>$maskiner{$key}\n";
  }
  $count++;
}

print "</TABLE>\n\n\n";

#------- PR DOMENE -------

print "<H3>Accesses per domain</H3>\n\n";

print "<TABLE>\n";
print "<TR><TH>Domain<TH>Code<TH>Number of hits\n";

@sortert=sort { $SortDir*$domener{$a} <=> $SortDir*$domener{$b} }
         keys(%domener);
foreach $key (@sortert) {
  print "<TR><TD>$Domain{$key}<TD>$key<TD>$domener{$key}\n";
}

print "</TABLE>\n\n\n";

#------- PR TIME -------

print "<H3>Accesses per hour</H3>\n\n";

print "<TABLE>\n";
print "<TR>\n";

$max=0;
for ($ix=0; $ix<=23; $ix++) {
  print "<TH>$ix";
  if ($time[$ix]>$max) { $max=$time[$ix]; }
}
print "\n<TR>\n";

if ($max==0) { $max=1; }
for ($ix=0; $ix<=23; $ix++) {
  $bredde=$Barwidth*$time[$ix]/$max+1;
  print "<TD VALIGN=BOTTOM>$time[$ix]<BR>\n";
  print "<IMG SRC=\"$BarImage\" HEIGHT=$bredde WIDTH=10 ALT=\"\">\n";
}

print "</TABLE>\n\n\n";

&PrintPageBot;

# -------- CLOSING DOWN --------

if ($DNSFile) {
    dbmclose(%HostHash);
}
