#!/usr/bin/perl

# Copyright 2005 Scott Klein and Felix Sheng
#
# This file is part of apachegrep.
#
# apachegrep is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# apachegrep is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with apachegrep; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;

use Carp;

my $VERSION = "0.3";

if( grep { $_ eq '--version' } @ARGV ) {
   print getVersionBlurb();
   exit;
}

my( $checkline, $splitline, $output, $args ) = setup(@ARGV);

if( ! @$args ) { push @$args, 'STDIN' }

FILE:
foreach my $arg (@$args) {
   my $fh = getFH($arg);
   next FILE if ! defined $fh;

   my $fields;
   LINE:
   while( my $line = <$fh> ) {
      $fields = $splitline->($line);
      #if( ! $fields->{'host'} ) { print "BAD: $line" };
      next LINE if ! $fields->{'host'}; 
      $output->( \$line, $fields ) if $checkline->($fields);
   }
}

if( my $count = getCount() ) {
   print "$count\n";
}
exit;


sub getFH {
   my $arg = shift;

   my $fh;
   if( $arg eq 'STDIN' ) {
      $fh = *STDIN;
   }
   elsif( ! -s $arg ) {
      print STDERR "couldn't open $arg - skipping\n";
      return undef;
   }
   else {
      my $fileopener = ($arg =~ /\.bz2$/) ? [ '-|', "bzcat $arg" ]
                     : ($arg =~ /\.gz$/ ) ? [ '-|', "zcat $arg" ]
                     :                      [ '<',  $arg                ]
                     ;

      if( ! open( $fh, $fileopener->[0], $fileopener->[1] ) ) {
         print STDERR "couldn't open $arg ($!) @$fileopener - skipping\n";
         return undef;
      }
   }

   return $fh;
}

sub _makeFieldHandler {
   my $field = shift;
   return sub {
      my( $state, $stuff ) = @_;
      if( $state->{'in_field'} ) { croak "already in field" }
      if( $state->{'output'} eq 'fielded' ) {
         push @{$state->{'outputfields'}}, $field;
      }
      else {
         $state->{'in_field'} = $field;
      }
   }
}

sub _makeGlobalHandler {
  my $field = shift;
  return sub {
     my( $state, $stuff ) = @_;
     $state->{'globals'}{$field} = 1;
  }
}

sub _makeModHandler {
   my $field = shift;
   return sub {
      my( $state, $stuff ) = @_;
      push @{$state->{'mods'}}, $field;
   }
}

sub _makeOutputHandler {
   my $type = shift;
   return sub {
      my( $state, $stuff ) = @_;
      if( $state->{'output'} ) { croak "already specified output" }
      $state->{'output'} = $type;
   }
}

{
my $count;
sub _makeCountOutput {
   return sub { $count++; }
}

sub getCount {
   return $count;
}
}

sub _makeFieldedOutput {
   my $showfields = shift;
   return sub { my($lineref, $fields) = @_; print "@{$fields}{@$showfields}\n"; }
}


BEGIN {
my %FLAG_HANDLER = (
   h => _makeFieldHandler( 'host' ),
   u => _makeFieldHandler( 'user' ),
   t => _makeFieldHandler( 'timestamp' ),
   r => _makeFieldHandler( 'request' ),
   s => _makeFieldHandler( 'status' ),
   b => _makeFieldHandler( 'bytes' ),
   R => _makeFieldHandler( 'referer' ),
   U => _makeFieldHandler( 'useragent' ),
   x => _makeFieldHandler( 'extra' ),

   v => _makeModHandler( 'not' ),
   i => _makeModHandler( 'case_insensitive' ),

   c => _makeOutputHandler( 'count' ),
   o => _makeOutputHandler( 'fielded' ),

   a => _makeGlobalHandler( 'and' ),
);

sub setup {
   my @args = @_;

   my @searchfields;
   my %current_state = ( 'in_field' => '', output => '' );
   my $SANITY = 100;
   ARG:
   while( @args && $SANITY-- ) {
      if( my($flags) = $args[0] =~ /^-(\w+)/ ) {
         foreach my $flag (split /(?=\w)/, $flags) {
            if( ref $FLAG_HANDLER{$flag} ) { 
               $FLAG_HANDLER{$flag}->( \%current_state, \@searchfields );
            }
            else {
               croak qq{What does "-$flag" mean?};
            }
         }
      }
      else {
         my $value = $args[0];
         if( $current_state{'in_field'} ) {
            push @searchfields, { value => $value, field => $current_state{'in_field'} };
            if( ref $current_state{'mods'} ) {
               foreach my $mod (@{ $current_state{'mods'} }) {
                  $searchfields[-1]{$mod} = 1;
               }
            }
            delete $current_state{'in_field'};
            delete $current_state{'mods'};
         }
         else { #-- no more flags
            last ARG;
         }
      }

      shift @args;
   }

   my $output = $current_state{'output'} eq 'fielded' ?  _makeFieldedOutput($current_state{'outputfields'})
              : $current_state{'output'} eq 'count'   ?  _makeCountOutput()
              :                                          sub { print ${$_[0]} }
              ;
   my $checkline = makeCheckline(\@searchfields, $current_state{'globals'});
   my $splitline = makeSplitline(\@searchfields);
   return( $checkline, $splitline, $output, \@args );
}
}


sub _makePatternExpr {
   my( $data, $globals ) = @_;
   my $pattern = $data->{'value'};
   $pattern =~ s{/}{\\/}g;
   return "\$fields->{'$data->{'field'}'} "
          . ($data->{'not'} ? '!~ ' : '=~ ')
          . "/$pattern/"
          . ($data->{'case_insensitive'} ? 'io' : 'o')

}

BEGIN {
my @fields = qw(host null user timestamp timezone rType request protocol status bytes referer);
my %field_pos_for = ( host => 0,    user  => 2,  rType => 5,
                      request => 6, status => 8, bytes => 9,
                      referer => 10
);

sub _makeSplitSplitline {
   my $index = shift;

   my $fieldstr = join ' ', @fields[0..$index];
   my $splitsub;
   my %fields;
   my $substr = '$splitsub = sub {' . "\n"
              . '@fields{qw(' . $fieldstr . ')} = split / /, $_[0],' . ($index+2) . ';'
              . '\%fields;}';

   eval $substr;
   return $splitsub;
}

sub _makeRegexSplitline {
   my @fieldnames = 

   my $SPLITLINE
       = qr{^(\S+) (\S+) ([^[]+) \[([^]]+)\] "(\w+) (\S+) HTTP/[^"]*" (\d+) ([0-9-]+)(?: "([^"]*)" "([^"]+)")? ?(\S*)?$};

   my $splitsub;
   my %fields;
   my $substr = '$splitsub = sub { ' . "\n"
              . '@fields{qw(host timestamp rType request status bytes referer useragent extra)} = ($_[0] =~ /$SPLITLINE/)[0,3..10];'
              . "\n"
              . '\%fields;}' ;
   eval $substr;
   return $splitsub;
}


sub makeSplitline {
   my $flags = shift;
   my $max = -1;
   FLAG:
   foreach my $flag (@$flags) {
      if( exists $field_pos_for{$flag->{'field'}}) {
         if( $max < $field_pos_for{$flag->{'field'}} ) {
            $max = $field_pos_for{$flag->{'field'}};
         }
      }
      else { #-- we have a non-split friendly position
         $max = -1;
         last FLAG;
      }
   }

   if( $max >= 0 ) {
      return _makeSplitSplitline($max);
   }
   else {
      return _makeRegexSplitline();
   }

}
}

sub makeCheckline {
   my( $flags, $globals ) = @_;
   if( ! ref $flags || ! @$flags ) { croak "what are you looking for?" }

   my $checksub;
   my $substr = '$checksub = sub { my $fields = shift;' . "\n";
   if( ref $globals && $globals->{'and'} ) {
      $substr .= 'return 1 if ( ';
      $substr .= join ' and ',
                 map { _makePatternExpr($_, $globals) }
                 @$flags;
      $substr .= ");\n";
   }
   else {
      foreach my $flag (@$flags) {
         my $pattern = $flag->{'value'};
         $pattern =~ s{/}{\\/}g;
         $substr .= "return 1 if " . _makePatternExpr($flag, $globals) . ";\n";
      }
   }
   $substr .= "return 0;}";

   eval $substr;
   if( $@ ) { croak "something bad just happened" }
   return $checksub;
} 


sub getVersionBlurb {
   return <<EOVERSION;
apachegrep $VERSION

Copyright 2005 Scott Klein and Felix Sheng
This is free software; see the source for copying conditions. It is
distributed WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOVERSION
}
