#!/usr/bin/perl -w 

#$Header: /home2/cvsroot/LogTrend/Agent/HttpAgent.pm,v 1.11 2001/11/07 16:08:07 lsimonneau Exp $
##*****************************************************************************
##  HttpAgent
##  Description  : 
##
##  Project      : LogTrend 1.0.0.0 - Atrid Systemes
##  Author       : Laurent Simonneau (l.simonneau@atrid.fr)
##*****************************************************************************
#$Log: HttpAgent.pm,v $
#Revision 1.11  2001/11/07 16:08:07  lsimonneau
#*** empty log message ***
#
#Revision 1.10  2001/09/24 10:08:55  lsimonneau
#Add HTTPS support.
#
#Revision 1.9  2001/09/19 08:29:16  lsimonneau
#Modify POD doc.
#
#Revision 1.8  2001/09/14 15:22:27  lsimonneau
#Add documentation in pod.
#
#Revision 1.7  2001/09/03 15:34:35  lsimonneau
#Minr bugfixes.
#
#Revision 1.6  2001/08/29 16:11:03  lsimonneau
#Add Proxy entry in Config file.
#
#Revision 1.5  2001/08/22 08:17:56  lsimonneau
#Minor bugfixes.
#
#Revision 1.4  2001/08/21 16:34:25  lsimonneau
#Minor bugfixes
#
#Revision 1.3  2001/08/21 16:19:18  lsimonneau
#Major bugfixes : remove 'ping' because of proxy problems.
#                 'Can not reach host' alarm => Can't download URL.
#		 'HTTP Server Down' alarm no longer exists.
#		 NotFound and Forbidden alarms became optional.
#Add a lot of comments.
#
#Revision 1.2  2001/07/25 14:09:34  lsimonneau
#Documentation des agent FTP et HTTP.
#
#Revision 1.1  2001/07/19 16:40:27  fdesar
#
#Moved module files to the right directories
#Updated package names and uses to reflect those changes
#Corrected bug in SnortAgent.pm for negating first value in SID parsing
#
#Revision 1.8  2001/06/29 15:37:38  lsimonneau
#Bug fixe : ajout de l'entit ApacheSettings pour rgler le nom du binaire et desfichier de logs.
#
#Revision 1.7  2001/06/27 12:37:35  lsimonneau
#Rien qui ne vaille un commentaire.
#
#Revision 1.6  2001/06/26 15:38:22  lsimonneau
#Ajout des alarmes NotFound, Forbidden et MaxClient.
#
#Revision 1.5  2001/06/21 15:38:17  lsimonneau
#die -> Die
#
#Revision 1.4  2001/06/21 15:21:41  lsimonneau
#Minor bugfixes
#
#Revision 1.3  2001/06/15 08:44:15  lsimonneau
#Ajout du support d'xecution locale ou distante
#
#Revision 1.2  2001/06/14 12:29:15  lsimonneau
#Minor code optimization
#
#Revision 1.1  2001/06/13 09:46:12  lsimonneau
#Premiere release du HttpAgent.
#
#5 Data : download time
#	  requests per seconds
#	  bytes per seconds
#	  active connections 
#	  memory load
#
#4 Alarms : Can not reach host
#           Too Much 403 Forbidden
#           Too Much 404 NotFound
#           server reached MaxClients setting

package LogTrend::Agent::HttpAgent;

use strict;

use vars qw( @ISA );

use XML::DOM;
use LogTrend::Agent;

use POSIX qw(strftime);
use Time::Local;
use LWP;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use URI;
use Net::Domain;

@ISA = qw( LogTrend::Agent );

my $name = "HttpAgent";
my $version = "1.0.0";

my %month_hash = ("Jan", 0, "Feb", 1, "Mar", 2, "Apr", 3, "May", 4, "Jun", 5, "Jul", 6, "Aug", 7, "Sep", 8, "Oct", 9, "Nov", 10, "Dec", 11);

##******************************************************************************
## Constructor  public > Agent
##  Description  : creat a new HttpAgent
##  Parameters   : none
##******************************************************************************
sub new
{
    my ( $classname ) = @_;
    
    my $self = $classname->SUPER::new( $name, $version );
    bless($self, $classname);
   
    return $self;
}


##******************************************************************************
## Method ParseXMLConfigFile  public  (>Agent)
##  Description  : parses the XML config file
##  Parameters   : the file name to parse, the agent state
##  Return value : none
##******************************************************************************
sub ParseXMLConfigFile
{       
    my ($self,$file,$agentstate) = @_;
    $self->SUPER::ParseXMLConfigFile( $file, $agentstate );
    
    ##===========================================================================
    ## Agent-specific configuration parameters :
    ## use $self->{FOO} to stock information ( all $self->{_FOO} are reserved )
    ##===========================================================================

    my $parser = new XML::DOM::Parser() || die("XML::DOM::Parser: $!");
    my $doc = $parser->parsefile( $file ) || die("$file :$!");
    
    my $rootlist = $doc->getElementsByTagName("Configuration") ||
	die("$file :No \"Configuration\" tag.");
    my $rootnode = $rootlist->item(0) || die("$file :No \"Configuration\" tag.");
    
    ##
    ## Tag 'Specific'
    ##
    $rootlist = $rootnode->getElementsByTagName("Specific") ||
	die("$file :No \"Specific\" tag.");
    $rootnode = $rootlist->item(0) || die("$file :No \"Specific\" tag.");

    $rootlist = $rootnode->getElementsByTagName("URL") ||
	die("$file :No \"URL\" tag.");
    my $urlnode = $rootlist->item(0) || die("$file :No \"URL\" tag.");

    $urlnode = $urlnode->getFirstChild
        || die("Error in \"URL\" tag.");

    $self->{URL} = $urlnode->getNodeValue();
    my $uri = URI->new($self->{URL});
    $self->{HOST} = $uri->host;


    ##
    ## Tag 'ApacheSettings' (optional)
    ## Defaults values are debian like settings (/usr/bin/apache, /var/log/apache/error.log,
    ##                                           /var/log/apache/access.log)
    ##
    my $apachenode;
    if($rootlist = $rootnode->getElementsByTagName("ApacheSettings") and $apachenode = $rootlist->item(0)) {
	
	$self->{APACHE_BIN_NAME} = "apache" unless 
	    $self->{APACHE_BIN_NAME} = $apachenode->getAttribute ("binary_name");
	    
	
	$self->{APACHE_ACCESS_LOG_FILE} = "/var/log/apache/access.log" unless
	    $self->{APACHE_ACCESS_LOG_FILE} = $apachenode->getAttribute ("access_log");
		
	$self->{APACHE_ERROR_LOG_FILE} = "/var/log/apache/error.log" unless
	    $self->{APACHE_ERROR_LOG_FILE} = $apachenode->getAttribute ("error_log");
    }
    else {
	$self->{APACHE_BIN_NAME} = "apache";
	$self->{APACHE_ERROR_LOG_FILE} = "/var/log/apache/error.log";
	$self->{APACHE_ACCESS_LOG_FILE} = "/var/log/apache/access.log";
    }

    ##
    ## Tag 'Proxy' (optional)
    ##
    my $proxylist = $rootnode->getElementsByTagName("Proxy");
    if(my $proxynode =  $proxylist->item(0)) {	
	my $cdatanode = $proxynode->getFirstChild
	    || die("Error in \"Proxy\" tag.");
	$self->{PROXY} = $cdatanode->getNodeValue();
    }

    ##
    ## Tag 'NotFoundAlarm' (optional)
    ##
    $rootlist = $rootnode->getElementsByTagName("NotFoundAlarm");
    if(my $notfoundnode =  $rootlist->item(0)) {	
	$self->{NOT_FOUND_LIMIT} = $notfoundnode->getAttribute ("limit")
	    || die("Error in \"NotFoundAlarm\" tag, can't find 'limit' attribute.");
	
	$self->{NOT_FOUND_TIME_INTERVAL} = $notfoundnode->getAttribute ("time_interval")
	    || die("Error in \"NotFoundAlarm\" tag, can't find 'time_interval' attribute.");
    }
    
    ##
    ## Tag 'ForbiddenAlarm' (optional)
    ##
    $rootlist = $rootnode->getElementsByTagName("ForbiddenAlarm");
    if(my $forbiddennode =  $rootlist->item(0)){       
	$self->{FORBIDDEN_LIMIT} = $forbiddennode->getAttribute ("limit")
	    || die("Error in \"ForbiddenAlarm\" tag, can't find 'limit' attribute.");
	
	$self->{FORBIDDEN_TIME_INTERVAL} = $forbiddennode->getAttribute ("time_interval")
	    || die("Error in \"ForbiddenAlarm\" tag, can't find 'time_interval' attribute.");
    }
}


##******************************************************************************
## Method CreateAgentDescription  public  (>Agent)
##  Description  : creates an agent's description
##  Parameters   : none
##  Return value : none
##******************************************************************************
sub CreateAgentDescription
{
   my $self = shift;
   my ($d,$a) = (1,1);

   ## Must use AddADataDescription and AddAnAlarmDescription methods
   ## Data
   $self->AddADataDescription($d++, "Integer", "none", "download time",  "");

   ## Alarm
   $self->AddAnAlarmDescription($a++, "Error", "Can not reach host", "HostDown" );

   if($self->{HOST} ne Net::Domain::hostfqdn and
      $self->{HOST} ne Net::Domain::hostname) {
       return;
   }
   
   ## Data
   $self->AddADataDescription($d++, "Real", "none", "requests per second",  "");
   $self->AddADataDescription($d++, "Real", "bytes", "bytes per second",  "");
   $self->AddADataDescription($d++, "Integer", "none", "active connections",  "");
   $self->AddADataDescription($d++, "Real", "percentage", "memory used",  "");

   ## Alarm
   $self->AddAnAlarmDescription($a++, "Error", "server reached MaxClients setting", "MaxClients");

   if(defined $self->{NOT_FOUND_LIMIT}) {
       $self->AddAnAlarmDescription($a++, "Warning", "Too much 404 Not Found", "NotFound");
   }

   if(defined $self->{FORBIDDEN_LIMIT}) {
       $self->AddAnAlarmDescription($a++, "Warning", "Too much 403 Forbidden", "Forbidden");
   }
}


##******************************************************************************
## Method CollectData  public  (>Agent)
##  Description  : collects data and alarms
##  Parameters   : none
##  Return value : none
##******************************************************************************
sub CollectData
{
   my $self = shift;
   
   ## 
   ## The first time, initialize variables
   ##
   $self->initVariable() unless (defined $self->{ACCESS_CUR_LINE});

   my $useragent = LWP::UserAgent->new;
   
   if(defined $self->{PROXY}) {
       $ENV{"http_proxy"} = $self->{PROXY};
       $ENV{"https_proxy"} = $self->{PROXY};
   }
   
   my $https_proxy=$ENV{https_proxy};
   delete $ENV{https_proxy} if($https_proxy);
   $useragent->env_proxy;  # Load proxy settings (but not the https proxy)
   $ENV{https_proxy}=$https_proxy if($https_proxy);
   

   ##
   ## Try to reach URL
   ##   if the server up, measure download time
   ##   else, add an alarm (HostDown or HTTPServerDown
   ##
   
   my $request = HTTP::Request->new('GET', $self->{URL}) or die;
   
   my $now = time;
   my $response = $useragent->request($request);
   my $response_time = time() - $now;
   
   if ($response->is_error) {
       $self->AddAlarm ($self->{"HostDown_Error"});
   }
   else {
       $self->AddDataInteger($self->{"download time"}, $response_time);

       ##
       ## Return if the agent is not running on the server
       ## 
       return unless $self->{LOCALLY_RUNNING};

       ##
       ## Retrieve server-status information
       ##
       $request = HTTP::Request->new('GET', "http://$self->{HOST}/server-status?/auto");
       $response = $useragent->request($request);

       if($response->is_error) {
	   die("Can't connect to $self->{HOST}/server-status");
       }

       my $html_data = $response->content;

       ##
       ## In server-status results, look for : 
       ##    - Number of active connections
       ##    - Number of bytes sent per second
       ##    - Number of request per second
       ##
       $html_data =~ /^BusyServers: (.*)/m;
       $self->AddDataInteger($self->{"active connections"}, $1);
       
       $html_data =~ /^BytesPerSec: (.*)/m;
       $self->AddDataReal($self->{"bytes per second"}, $1);
       
       $html_data =~ /^ReqPerSec: (.*)/m;
       $self->AddDataReal($self->{"requests per second"}, $1);
       
       ##
       ## Retrieve memory used by Apache with ps
       ## 
       my @psresult = `ps aux | grep $self->{APACHE_BIN_NAME} | grep -v grep`;
       
       my $memload=0;       
       foreach my $line (@psresult) {
	   $line =~ /^.*?\s+.*?\s+.*?\s+(.*?)\s+/;
	   $memload += $1;
       }

       $self->AddDataReal($self->{"memory used"}, $1);
       
       ##
       ## Parse new data in Log files
       ##
       $self->checkAlarmFromLog;
   }
}


##******************************************************************************
## Method initVariable private
##  Description  : Initialize private variables
##  Parameters   : none
##  Return value : none
##******************************************************************************
sub initVariable {
    my $self = shift;

    ##
    ## Don't analyze previous data. 
    ## Start to the end of the log file and wait for new data
    ## Use wc to count lines in error and access log files
    ##
    my $wc_result = `wc -l $self->{APACHE_ERROR_LOG_FILE}`;
    $wc_result =~ s/^\s+//;
    my @res_list = split /\s+/, $wc_result;
    $self->{ERROR_CUR_LINE} = $res_list[0] + 1;

    $wc_result = `wc -l $self->{APACHE_ACCESS_LOG_FILE}`;
    $wc_result =~ s/^\s+//;
    @res_list = split /\s+/, $wc_result;
    $self->{ACCESS_CUR_LINE} = $res_list[0] + 1;
        
    $self->{NOT_FOUND_LIST} = () if defined $self->{NOT_FOUND_LIMIT};
    $self->{FORBIDDEN_HASH} = {} if defined $self->{FORBIDDEN_LIMIT};
    
    ##
    ## Test if the agent is running on the server
    ##
    $self->{LOCALLY_RUNNING} = 1;
    if($self->{HOST} ne Net::Domain::hostfqdn and
       $self->{HOST} ne Net::Domain::hostname) {
	$self->{LOCALLY_RUNNING} = 0;
    }

    

}


##******************************************************************************
## Method checkAlarmFromLog private
##  Description  : Look for alarms in Apache log file
##  Parameters   : none
##  Return value : none
##******************************************************************************
sub checkAlarmFromLog {
    my $self = shift;
    
    ##
    ## get new lines in error log file and look for a MaxClient error
    ##
    open(ERROR_LOG, "tail -n +$self->{ERROR_CUR_LINE} $self->{APACHE_ERROR_LOG_FILE} | ")
        || die("Can't found error log file");

    while (<ERROR_LOG>) {
        $self->{ERROR_CUR_LINE}++;

	if(/server reached MaxClients setting, consider raising the MaxClients setting/) {
	    $self->AddAlarm($self->{"MaxClients_Error"});	    
	}
    }
    close(ERROR_LOG);

    return if ((!defined $self->{NOT_FOUND_LIMIT}) and (!defined $self->{FORBIDDEN_LIMIT}));

    ##
    ## get new lines in access log file and look for 404 Not Found and 403 Forbidden report
    ##
    open(ACCESS_LOG, "tail -n +$self->{ACCESS_CUR_LINE} $self->{APACHE_ACCESS_LOG_FILE} | ")
	|| die("Can't found access log file");
    
    while (<ACCESS_LOG>) {
        $self->{ACCESS_CUR_LINE}++;
	
	# If it's a 404 Not Found report
	if(/\[(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+) .*?\] "\w+ .*? HTTP\/1\.." 404/) {
	    push @{$self->{NOT_FOUND_LIST}}, timelocal($6, $5, $4, $1, $month_hash{$2}, $3);
	}
	
	# If it's a 403 Forbidden report
	elsif(/^(.*?) .*? .*? \[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+) .*?\] "\w+ .*? HTTP\/1\.." 403/) {
	    push @{$self->{FORBIDDEN_HASH}->{$1}}, timelocal($7, $6, $5, $2, $month_hash{$3}, $4);
	}
    }
    close (ACCESS_LOG);

    
    ##
    ## look for alarm conditions in  FORBIDDEN_HASH  and NOT_FOUND_LIST
    ##
    if(defined $self->{FORBIDDEN_LIMIT}) {
	foreach my $key (keys %{$self->{FORBIDDEN_HASH}}) {
	    my $forbidden_list = \@{$self->{FORBIDDEN_HASH}->{$key}};
	    
	    for(my $i=0;  defined $forbidden_list->[$i+$self->{FORBIDDEN_LIMIT}-1]; $i++){
		if($forbidden_list->[$i+$self->{FORBIDDEN_LIMIT}-1] - $forbidden_list->[$i] <= $self->{FORBIDDEN_TIME_INTERVAL}) {
		    $self->AddAlarm($self->{"Forbidden_Warning"});
		}
	    }
	    
	    # remove old forbidden date in lists for each clients
	    my $now = time;
	    for(my $i=0; defined $forbidden_list->[$i]; $i++){	    
		if($forbidden_list->[$i] + $self->{FORBIDDEN_TIME_INTERVAL} < $now){
		    shift @$forbidden_list;
		}
	    }
	    
	    # if there is no more date in list, remove the entry in hash
	    if($#$forbidden_list == -1) {
		delete $self->{FORBIDDEN_HASH}->{$key};
	    }
	    
	}
    }

    # Verify not found list
    if(defined $self->{NOT_FOUND_LIMIT}) {
	for(my $i=0;  defined $self->{NOT_FOUND_LIST}->[$i+$self->{NOT_FOUND_LIMIT}-1]; $i++){
	    if($self->{NOT_FOUND_LIST}->[$i+$self->{NOT_FOUND_LIMIT}-1] - $self->{NOT_FOUND_LIST}->[$i] <= $self->{NOT_FOUND_TIME_INTERVAL}) {
		$self->AddAlarm($self->{"NotFound_Warning"});
	    }
	}
	
	# remove old date from not found list
	my $now = time;
	for(my $i=0; defined $self->{NOT_FOUND_LIST}->[$i]; $i++){
	    if($self->{NOT_FOUND_LIST}->[$i] + $self->{NOT_FOUND_TIME_INTERVAL} < $now){
		shift @{$self->{NOT_FOUND_LIST}};
	    }
	}
    }
}

1;

__END__


=head1 NAME

HttpAgent.pm - Perl Extension for LogTrend : HttpAgent Agent

=head1 SYNOPSIS 

    use LogTrend::Agent::HttpAgent

    LogTrend::Agent::HttpAgent->new();

=head1 DESCRIPTION

LogTrend::Agent::HttpAgent is a Perl extention implementing a Http server Agent for LogTrend.

This module is not intended for direct use, but to be
called through its intertface utility called HttpAgent.

As it inherits from LogTrend::Agent, the various Agent
command line switches apply to it.

The HttpAgent can work in two modes : local and remote.

In local mode (when the agent is running on the server), this agent
collects data from log files and the server-status apache module.

In remote mode (when the agent is running on a remote machine), the
agent just collect web page dowload time.


=head2 Data and alarms collected in remote mode :

=over 2

=item *

Data: 

- Download time : time needed to download a web page from the server.

=item *

Alarms :

- Can not reach host : the server is not available.

=back
=back


=head2 Data and alarms collected in local mode :

=over 2

=item *

Data :
   - Download time : time needed to download a web page from the server.
   - Requests per seconds : number of requests received by the server each seconds.
   - Bytes per seconds : number of bytes sent by the server each seconds.
   - Active connections : number of active connections.
   - Memory load : memory used by apache.

=item *

Alarms :
   - Can not reach host : the server is not available.
   - Too Much 403 Forbidden : too much forbidden acces from a source.
   - Too Much 404 NotFound : too much not found error on an URL.
   - server reached MaxClients setting : Apache has reach MaxClients setting.
  
=back

=head1 PRE-REQUISTES

The following Perl modules are definitly needed for this
agent to work:

    Time::Local
    LWP::UserAgent
    HTTP::Request
    HTTP::Response
    URI
    Net::Domain

  For Https support :
    Net::SSL

=head1 CONFIGURATION

The Http Agent configuration is done using an XML file.

See documentation:
/usr/share/doc/LogTrend/Agent/install-guide/agent-install-guide.ps

=head1 AUTHOR

Laurent Simonneau -- Atrid Systmes (l.simonneau@atrid.fr)

=head1 COPYRIGHT

Copyright 2001, Atrid Systme http://www.atrid.fr/

Project home page: http://www.logtrend.org/

Licensed under the same terms as LogTrend project is.

=head1 WARRANTY

THIS SOFTWARE COMES WITH ABSOLUTLY NO WARRANTY OF ANY KIND.
IT IS PROVIDED "AS IS" FOR THE SOLE PURPOSE OF EVENTUALLY
BEEING USEFUL FOR SOME PEOPLE, BUT ONLY AT THEIR OWN RISK.

=cut

