#!/bin/perl

# WWWweek
#   - a WWW-log analyzing CGI-script (weekly report)
#
# See shared.pl for version information.
#
# Lars M. Garshol - larsga@ifi.uio.no - http://www.ifi.uio.no/~larsga/

# This program is freeware. Please let me know if you modify the code,
# and not just the configuration. 

# The GIF bars were stolen from http://weber.u.washington.edu/~reflux/

require "cgi.pl";
require "shared.pl";
require $VAR{"config"};

# ---------- Initialization     ------------------------

&InitDate; # $day.$mon.$year gives current date

@mlen=(31,28,31,30,31,30,31,31,30,31,30,31);
if ($year % 4 == 0) { $mlen[1]++; } # Leap year handling. Will fail in 2100.

# Creating a list of the dates we are checking

$e_thismon=0; #Earliest date this month
for ($ix=6; $ix>=0; ) {
    $dates[$ix]=[$day,$mon,$year];
    $ix--;
    $day--;

    if ($ix>=0 && $day<1) {
	$mon--;
	if ($mon==-1) {
	    $mon=11;
	    $year--;
	}
	$day=$mlen[$mon];
	$e_thismon=$ix+1;
    }
}

# ---------- The program itself ------------------------

if (!$Redirect) { &printCType("text/html"); }

open(INN,$LogFile) ||
  error("Couldn't open logfile $LogFile: $!");

&PrintPageTop("Weekly access report $ForString");

# Search for start of week (to speed up report generation)

seek(INN,0,2);
$size=tell(INN);
$lastpos=0;
$pos=int($size*0.9);

seek(INN,$pos,0);
<INN>; # Throwing a line away since we probably start in the middle of it
while(<INN>) {
    if (!/\S/) { next; }          	#Just whitespace: skip

    @felter=split(/ /,$_);      
    @klokkeslett=split(/\//,$felter[3]); #Dag/MM/AA:Time:Min:Sek
    $klokkeslett[0] =~ s/\[//;

    #Have we reached this weeks data?
    if (!( #First acceptable day: later or equal day, same month, same year
	  ($klokkeslett[0]>=$dates[0][0] && 
	   $klokkeslett[1] eq $months[$dates[0][1]] &&
	   $klokkeslett[2]==$dates[0][2])
	  || #Last acceptable day: earlier or equal day, same month, same year
	  (($klokkeslett[0]<=$dates[6][0] && 
	    $klokkeslett[1] eq $months[$dates[6][1]] &&
	    $klokkeslett[2]==$dates[6][2])
	   #Final check, was it before the the earliest acceptable day this month?
	   && $klokkeslett[0]>=$dates[$e_thismon][0])
	  )) {
	$lastpos=$pos;
	$pos=int(($pos+$size)/2);
	seek(INN,$pos,0);
	<INN>; # Skipping a line
    } else {
	seek(INN,$lastpos,0); # Found data for this week, using last position
	last; 
    }
}

# Reading file in the ordinary way

while (<INN>)
{	
  if (!/\S/) { next; }          	#Just whitespace: skip

  @felter=split(/ /,$_);      

  #Sjekk om vi skal ta med denne linjen
  if ($felter[5] eq '"HEAD')        { next; }
  if ($felter[8] != 200)            { next; }   # Hvis HTTP-kode ikke 200
  if (SkipHost($felter[0]))         { next; }
  if (SkipPage($felter[6]))         { next; }

  @klokkeslett=split(/\//,$felter[3]);     #Dag/MM/AA:Time:Min:Sek
  $klokkeslett[0] =~ s/\[//;

  #Was it this week?
  if (!( #First acceptable day: later or equal day, same month, same year
      ($klokkeslett[0]>=$dates[0][0] && 
       $klokkeslett[1] eq $months[$dates[0][1]] &&
       $klokkeslett[2]==$dates[0][2])
      || #Last acceptable day: earlier or equal day, same month, same year
     (($klokkeslett[0]<=$dates[6][0] && 
       $klokkeslett[1] eq $months[$dates[6][1]] &&
       $klokkeslett[2]==$dates[6][2])
	#Final check, was it before the the earliest acceptable day this month?
        && $klokkeslett[0]>=$dates[$e_thismon][0])
       )) {
      next; # Not this week, skipping
  }

  # Tidying up the page name
  $_ = $felter[6]; 
  s/%7E/~/; 
  s/%7e/~/;
  s/$DefaultFile\Z//;
  $felter[6] = $_;
  
  $TotalHits++;
  $TotalByte += $felter[9];

  $dag[$klokkeslett[0]]++;

  # Which day of the week was it? (0..6)
  if ($klokkeslett[1] eq $months[$dates[6][1]]) { #This month
      $ix=6+$klokkeslett[0]-$dates[6][0];
  } else { #Previous month
      $ix=$klokkeslett[0]-$dates[0][0];
  }

  $HitsADay[$ix]++;
  $PageOnDay{$felter[6]}[$ix]++;
  $PageTotal{$felter[6]}++;
}
close INN;

# ------ URL GROUPING -------

foreach $url (keys %PageOnDay) {
    if ($Aliases{$url}) {
	for ($ix=0; $ix<=6; $ix++) {
	    $PageOnDay{$Aliases{$url}}[$ix]+=$PageOnDay{$url}[$ix];
	}
	delete $PageInMonth{$url}; # Removes the alias
	
	$PageTotal{$Aliases{$url}}       +=$PageTotal{$url};
	delete $PageTotal{$url};
    }
}

#------- HEADER -------

print "<H3>Total statistics</H3>\n";

print "<TABLE>\n";

print "<TR><TD><STRONG>Week:</STRONG><TD>$dates[0][0] $months[$dates[0][1]] $dates[0][2] ";
print "to $dates[6][0] $months[$dates[6][1]] $dates[6][2]\n";
print "<TR><TD><STRONG>Number of accesses:</STRONG><TD>$TotalHits\n";
print "<TR><TD><STRONG>Bytes transferred:</STRONG><TD>$TotalByte\n";

if ($Redirect) {
    print "<TR><TD><STRONG>Report generated:</STRONG><TD> $LastRun\n";
}

print "</TABLE>\n";

#------- PER PAGE -------

print "<H3>Accesses per page</H3>\n\n";

print "<TABLE>\n";

print "<TR><TH>Page <TH>Total ";
for ($ix=0; $ix<=6; $ix++) {
    $_=$dates[$ix][1]+1;
    print "<TH>$dates[$ix][0].$_ ";
}

print "<TR><TH>Totals  <TD>$TotalHits  ";
for ($ix=0; $ix<=6; $ix++) {
    print "<TD>$HitsADay[$ix]\n";
}

@sortert=sort {
	 	$SortDir*$PageTotal{$a} <=> $SortDir*$PageTotal{$b}
         } keys(%PageTotal);

$count=0;
foreach $page (@sortert) {
  print "<TR><TD>$page <TD>$PageTotal{$page}\n";

  for ($ix=0; $ix<7; $ix++) {
      $_=$PageOnDay{$page}[$ix];
      if ($_==0) { $_=0; }
      print "    <TD>$_\n";
  }
  
  print "\n";

  $count++;
  if ($count==$NoOfWeekPs) { last; } # Don't print too many pages.
}

print "</TABLE>\n\n\n";

&PrintPageBot;
