#!/usr/bin/perl -w

=head1 NAME

ngconfigtest - checks the arc.conf for inconsistencies, known problems
or (in a future development) just general bad taste.

=head1 SYNOPSIS

ngconfigtest --printall

=head1 DESCRIPTION

The motivation behind this little script was to have a repository for
automated tests on issues that came up on the NorduGrid developers
mailing list. As such this script indicates directories that are not
present, checks host certificates, CA certificates and CRLs, validates
the sanity of ARC configuration and tests for clock skew.

BECAUSE EVERY INSTALLATION OF ARC IS DIFFERENT THIS UTILITY ONLY SUGGESTS
WHAT COULD BE WRONG. SOMETIMES IT IS OVERRESTRICTIVE. AND SOMETIMES IT
CAN MISS SOME MISCONFIGURATION. NEVER TREAT RESULTS PRODUCED BY IT AS
ULTIMATE TRUTH.

=head1 OPTIONS

=over 4

=item --config <string>

Specifies the location of the config file, by default it is /etc/arc.conf

=item --printall

Lists all variable names of the config file together with their values.

=item --timeserver <server>

Allows the specification of a server against which to test the local
system's time.

=item --skip-warnings

Do not show warnings.

=item --help

Quick summary of options,

=item --man

Detailed man page.

=item --openssl-path <filename>

Path to openssl binary, determined with 'which openssl' if not set.

=back

=cut


#################### P R E A M B E L  and options parsing ####################

use strict;
use warnings;
use Getopt::Long;

my ($conffile,$printall,$skip_warnings,$help,$man)=("/etc/arc.conf",0,0,0,0);

#Please make sure this reference server is not one you administer yourself.\n";
my $timeserver="europe.pool.ntp.org";

my $globusloc = $ENV{"GLOBUS_LOCATION"};

my $arcloc = undef;
if (defined $ENV{"ARC_LOCATION"}) {
	$arcloc = $ENV{"ARC_LOCATION"};
} else {
	$arcloc = "/usr";
}
my $OS = `uname`;
chomp $OS;

my $usercert;
my $hostcert;
my $CApath;
my $opensslpath;
my $verbose=0;
my $debug=0;

GetOptions(
	"config:s" => \$conffile,
	"printall" => \$printall,
	"skip-warnings" => \$skip_warnings,
	"timeserver:s" => \$timeserver,
	"openssl-path:s" => \$opensslpath,
	"verbose" => \$verbose,
	"debug" => \$debug,
	"help" => \$help,
	"man" => \$man
	) or die "Could not parse options.\n";

if ( $man or $help ) {
        # Load Pod::Usage only if needed.
	require "Pod/Usage.pm";
	import Pod::Usage;
	pod2usage(1) if $help;
	pod2usage(VERBOSE => 2) if $man;
}


# key counters

my $warnings=0;
my $errors=0;

$verbose=1 if $debug;


########## S E E D   A S S U M P T I O N S   F O R   R U L E S ############

# Some bits of the configuration are dynamic and may cross-reference other
# bits of the configuration. This hash shall keep track of these.

my %introducedSections = (
	"group" => [],
	"gridftpd" => [],
	"queue" => [],
	"infosys/cluster/registration" => [],
	"infosys/index" => [],
	"infosys/se" => [],
	"janitor" => [],
	"queue" => [],
	"se" => []
);

print STDERR "The following sections have configurable subgroups: "
		. join(", ",keys %introducedSections)."\n" if $verbose;

################ U T I L  R O U T I N E S #################################

$opensslpath=`which openssl | tr -d '\n'` unless defined($opensslpath);

# prints and counts a warning
sub w($) {
	my $s = shift;
	if (!$skip_warnings) {
		print STDERR "W: $s";
		$warnings++;
	}
}

# prints and counts an error
sub e($) {
	my $s = shift;
	print STDERR "E: $s";
	$errors++;
}

sub v($) {
	return unless $verbose or $debug;
	my $s = shift;
	print STDERR "$s";
}

####################### C H E C K S  #######################################

=head1 PERFORMED TESTS

=over 4

=item timecheck

The current time is compared with an external time server. A clock
shift higher than a maximally allowed maxtimediff results in an error.

=cut

sub timecheck($$) {
	my ($timeserver, $maxtimediff) = @_;
	my $timeoffset = undef;

	my $ntpdate = "/usr/sbin/ntpdate";
	unless ( -x $ntpdate )  {
		w("Could not find location of 'ntpdate'.\n");
		return;
	}

	unless (open(NTPDATE, "$ntpdate -q $timeserver |")) {
		w("Could not properly invoke 'ntpdate'.\n");
		return;
	}
	while (<NTPDATE>) {
		next unless m/^server/;
		if (m/offset *[-+]?([0-9]*\.[0-9]*)/) {
			$timeoffset = $1;
		}
	}
	close NTPDATE;

	if (defined $timeoffset) {
		if (abs($timeoffset)>=$maxtimediff) {
			e("Timecheck: Your time differs by more than " .
				"$maxtimediff seconds ($timeoffset seconds) from the " .
				"public time server '$timeserver'\n");
		} else {
			w("Timecheck: Your time differs slightly " .
				"$timeoffset seconds) from the public time " .
				"server '$timeserver'.\n");
		}
	} else {
		w("Timecheck: Can't check the time\n");
	}
}

=item check of permissions

The permission to access several different directories are checked.
The first argument is the file to be checked. The second is the permission
that is should have. The third is a binary mask that selects the bits
that are to be inspected.

=cut

sub permcheck($$$) {
	my ($filename, $p, $mask) = @_;
	my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
	       $atime,$mtime,$ctime,$blksize,$blocks)
	                  = stat($filename);
	$mode &= 07777;
	printf "$filename: mode %04o to be compared with %04o at mask %04o\n",$mode,$p,$mask if $debug;
	$mode &= $mask;
	return ($p == $mode);
}


# this performs simple stateless checks of configuration entries
sub confchecktripel($$$) {

	my ($block, $name, $value) = @_;

	printf STDOUT "Checking in block %s, name %s, value %s\n",$block,$name,$value if ($verbose);

	# check the certificate
	if ($block eq "common" and $name eq "x509_user_cert") {
		if (! -e $value) {
			e("The host certificate '$value' does not exist or is unreadable.\n");
		} elsif (! -O $value) {
			e("The host certificate '$value' is not owned by this user.\n");
		} elsif (!permcheck($value,0644,0777) and !permcheck($value,0444,0777)) {
			e("Permission of '$value' must be 'rw-r--r--' or 'r--r--r--'\n");
		}
		else {
			$hostcert=$value;
		}
	}

	# check the key
	elsif ($block eq "common" and $name eq "x509_user_key") {
		if (! -e $value) {
			e("The host key '$value' does not exist or is unreadable.\n");
		} elsif (! -O $value) {
			e("The host key '$value' is not owned by this user.\n");
		} elsif (!permcheck($value,0400,0777)) {
			e("Permission of '$value' must be 'r--------'\n");
		}
	}

	# check the certificate direcotry
	elsif ($block eq "common" and $name eq "x509_cert_dir") {
		if (! -d $value) {
			e("$name: The certificate directory does not exist.\n");
		} else {
			my @r0s=glob($value."/*.r0");
			if ($#r0s == -1) {
				w("$name: There are no certificate revocation lists.\n")
							unless $skip_warnings;
			} else {
				require File::stat;
				my $t=time();
				my $maxdiffsecs=60*60*24*2; # two days
				foreach my $r0 (@r0s) {
					my ($dev,$ino,$mode,$nlink,
						$uid,$gid,$rdev,$size,
						$atime,$mtime,$ctime,
						$blksize,$blocks)
						  = stat($r0);
					if ($t < $mtime ) {
						e("$r0: mtime in future\n");
					} elsif ($t > $mtime + $maxdiffsecs) {
						w("$r0: Older than $maxdiffsecs seconds. rerun grid-update-crls\n");
					}
				}
			}
			$CApath=$value;
		}
	}

	# check the cache directory
	elsif ($block eq "grid-manager" and $name eq "cachedir") {
		my @dirs=split(/\s+/,$value);
		# If available, second dir is on workernode
		my $d = $dirs[0];
		if (! -d $d)  {
			e("cachedir: not existing at '$d'\n");
		}
	}

	# check the controldir
	elsif ($block eq "grid-manager" and $name eq "controldir") {
		if (! -d $value)  {
			e("$value: control directory ($name) does not exist\n");
		} elsif (!permcheck($value,0755,0777)) {
			e("$value: directory ($name) should be 755\n");
		}
	}

	# check all remaining directory entries of the grid-manager block for existence
	elsif ($block eq "grid-manager" and $name =~ m/dir$/) {
		if (! -d $value)  {
			e("$value: directory ($name) does not exist\n");
		}
		else {
			v("$name exists.");
		}
		if ($name =~ /^(tmpdir)$/) {
			if (! permcheck($value,0777,0777)) {
				e("The tmpdir directory must be writable and have the sticky bit set (chmod +t \"$name\")\n");
			}
		}
	}

	# check for any mount dirs
	if ($name eq "mount") {
		if (! -d "$value") {
			e("$value: directory ($name) does not exist\n");
		}
		else {
			v("$name exists.");
		}
	}
}

=item configuration check

General checks on the sensibility of the arc.conf

=cut

sub confcheck($) {
	my ($arcconf) = @_;
	my $config = {};

	unless (open (CONFIGFILE, "<$conffile")) {
		e("Could not open '$arcconf' for reading.\n");
		return;
	}
	# parameters which are allowed more than once
	# taken from arc.conf.reference - somehow this needs to be kept in sync
	my @multiple = ("source", "filter", "sessiondir", "cachedir",
		"remotecachedir", "jobreport", "share_limit", "authplugin",
		"allowsubmit", "copyurl", "linkurl", "definedshare",
		"deliveryservice", "unixmap", "unixgroup", "unixvo", "dir",
		"remotegmdirs", "allowreg", "cluster_owner", "authorizedvo",
		"clustersupport", "opsys", "benchmark", "nodeaccess",
		"localse", "cacheserver");
	my $blockname = undef;
	my $blockcontents = 0;
	my $c = 0;
	my $vo_counter = 0;
	while (my $line = <CONFIGFILE>) {
		$c++;

		next if $line =~ m/^#/;
		next if $line =~ m/^\s*$/;

		# a new block?
		if ($line =~ m/^\s*\[(.+)\]\s*$/) {
			if (defined $blockname and $blockcontents == 0) {
				printf STDERR "E: %s: Block \"%s\" is empty\n",
					$arcconf, $blockname;
			}

			$blockname = $1;
			$blockcontents = 0;

			# blocknames must be unique
			# but there is a special case of vo-blocks...
			if ($blockname eq "vo") {
				$blockname .= "|" . ++$vo_counter;
			}
			if (defined $config->{$blockname}) {
				if (!$skip_warnings) {
					w("$arcconf:$c: Block '"
					  . $blockname
					  ."' is defined multiple times\n");
				}
				$warnings++;
			}

			$config->{$blockname}{">]|found|[<"} = $c;

			next;
		}

		my $name;
		my $value;

		# look out for crap
		unless ($line =~ m/^([^=]*)=(.*)$/) {
			e("$arcconf: $c: Line is erroneous!\n");
			next;
		}

		$name = $1;
		$value = $2;

		$name =~ s/^\s*//;
		$name =~ s/\s*$//;
		$name =~ s/^"(.*)"$/$1/;
		$name =~ s/^'(.*)'$/$1/;

		$value =~ s/^\s*//;
		$value =~ s/\s*$//;
		$value =~ s/^"(.*)"$/$1/;
		$value =~ s/^'(.*)'$/$1/;

		if ($name =~ m/^"/ and $name !~ m/"$/
			or $name =~ m/^'/ and $name !~ m/'$/
			or $name !~ m/^"/ and $name =~ m/"$/
			or $name !~ m/^'/ and $name =~ m/'$/) {
				w("$arcconf: $c: badly quoted attribute name?\n");
		}
		if ($value =~ m/^"/ and $value !~ m/"$/
			or $value =~ m/^'/ and $value !~ m/'$/
			or $value !~ m/^"/ and $value =~ m/"$/
			or $value !~ m/^'/ and $value =~ m/'$/) {
				w("$arcconf: $c: badly quoted value?\n");
				$warnings++;
		}

		# are we within a block?
		unless (defined $blockname) {
			e("$arcconf:$c: found value=name pair which is " .
				"not part of a block\n");
			next;
		}

		# check if we know more about this kind of tripel
		confchecktripel($blockname, $name, $value);

		#count
		$blockcontents++;

		if ($config->{$blockname}{$name}) {
			if (grep(/^$name$/, @multiple)) {
				$config->{$blockname}{$name} .= ">]|sep|[<" . $value;
			} else {
				w("$arcconf:$c: duplicate parameter: ".$name."\n");
			}
		} else {
			$config->{$blockname}{$name} = $value;
		}
	}

	close CONFIGFILE;

	check_completeness($config);

	if ($printall) {
		foreach my $key (sort { $config->{$a}{">]|found|[<"} <=> $config->{$b}{">]|found|[<"} }  keys %$config) {
			printf "\n# line: %d\n", $config->{$key}{">]|found|[<"};
			if ($key =~ m/^(.*)\|[0-9]+$/) {
				printf "[%s]\n", $1;
			} else {
				printf "[%s]\n", $key;
			}
			my $x = $config->{$key};
			foreach my $item (sort keys %$x) {
				next if $item eq ">]|found|[<";
				foreach my $val (split />\]\|sep\|\[</, $config->{$key}{$item}) {
					printf "%s=\"%s\"\n", $item, $val;
				}
			}
		}
	}
}

=item check for completeness

Return error if the presence of one value implies one of another

=cut

sub check_completeness() {
	my $config=shift;

	my @required=("common", "grid-manager", "infosys", "cluster");

	my @optional=("gridftpd", "data-staging", "gangliarc", "group", "vo", "registration", "nordugridmap");

	my @all=(@required,@optional);

	# testing for unknown
	foreach my $k (keys %$config) {
		next if grep(/^$k$/,@all);
		my @secs;
		map { push(@secs,$_) if $k=~/^$_/;} keys %introducedSections;
		print STDERR "$k -> " . join(":",@secs)."\n" if $debug;

		if (0 == @secs) {
			unless ($skip_warnings) {
				w("Unknown group identifier '$k'\n");
			}
			next;
		}
		elsif (1 < @secs) {
			die "Programming error: found multiple sections "
			    .join(",",@secs)." to be prefixes of secion '$k'.\n";
		}

		my $secs1 = $secs[0];
		unless (exists($introducedSections{$secs1})) {
			die "Programming error: section '$secs1' not amongs keys of hash %introducedSections: "
			    . join(",",keys %introducedSections)."\n";
		}
		my $listref=$introducedSections{$secs1};
		push @$listref,$k;
	}

	# testing for the missing
	foreach my $k (@required) {
		unless (exists($config->{$k})) {
			e("Missing group identifier '$k'\n");
		}
	}

	if (exists($config->{common})) {
	    my $hn=`hostname -f`;
	    chomp($hn);
	    if (!exists($config->{common}{lrms})) {
		e("The entry lrms must not be missed.\n");
	    }
	    if (!exists($config->{common}{hostname})) {
		e("The entry hostname must not be missed.\n");
	    }
	    elsif ($config->{common}{hostname} ne "$hn") {
		e("The entry of the full hostname (".$config->{common}{hostname}
			                      . ") is better\n"
		            ."   equal to `hostname -f` ($hn).\n"
		            ."   Also test reverse lookup of the hostname.\n");
	    }

	}
}

=item check of applications

Some applications are required for the server to be functional.

=cut

sub check_applications() {
	unless ( -x "/usr/bin/time" ) {
		e("Could not find GNU time utility (or a substitute) at /usr/bin/time.");
	}
}


=item check of libraries

uses ldd to check if all libraries are installed

=cut

sub check_libraries() {

	my $prob=0;
	
	if (! defined($globusloc)) {
		v("check_libraries: interpreting undefined GLOBUS_LOCATION as /usr\n");
		$globusloc="/usr";
	}

	unless ( -e $globusloc) {
		e("Cannot find Globus at $globusloc: no such file or directory\n");
		$prob++;
	}

	if (defined($arcloc)) {
		unless (-e $arcloc) {
			e("Cannot find ARC at $arcloc: no such file or directory\n");

			if (defined $ENV{"ARC_LOCATION"}) {
				w("The location was retrieved from the environment variable 'ARC_LOCATION'. Maybe this needs an update.");
			}
			$prob++;
		}
	}

	return if ($prob);

	my @globmes = ( $arcloc . "/bin/arc*",
			$arcloc . "/libexec/arc/gm*",
			$arcloc . "/sbin/arc*",
			$arcloc . "/sbin/grid*",
	);

	push(@globmes, $arcloc . "/lib/*") if "/usr" ne "$arcloc"; # takes too long, little gain expected

	my @to_check;
	foreach my $d (@globmes) {
		@to_check = ( @to_check , glob $d );
	}

	if ($verbose) {
		print "Checking the following files for their dependencies:\n";
		print join("\n",@to_check);
		print "\n";
	}

	my %missing;
	foreach my $file ( @to_check ) {
		next unless -f $file;
		next if $file =~ m/\.a$/;
		next if $file =~ m/\.la$/;

		my $command = "LC_ALL=C ";
		$command .= "LD_LIBRARY_PATH=";
		$command .= "$arcloc/lib:" if "/usr" ne "$arcloc" and "/usr/" ne "$arcloc";
		$command .= "\$LD_LIBRARY_PATH ";
		$command .= "ldd $file 2>/dev/null |";

		my %libs;

		if (open LDD, $command) {
			while (<LDD>) {
				chomp;
				my $lib = $_;

				if ($lib =~ m/^\s*([^\s]+)\.so\.([^\s]+)\s*=>/) {
					my $a=$1;
					my $b=$2;
					my $index=$a;
					$index =~ s/_(gcc)(16|32|64|128|256)(dbg)?(pthr)?\././;
					if (defined $libs{$index}) {
						e("$file: uses multiple versions of lib " .
							"$a: ".$libs{$index}." and $b. This might not work\n");
					} else {
						$libs{$index} = $b;
					}
				}

				next unless /not found/;

				$lib =~ m/^\s*([^\s]+)\s*=>/;
				my $missing = $1;

				unless (defined $missing{$missing}) {
					$missing{$missing} = 1;
					e("$file: needs $missing. Not found.\n");
				}

			}
			close LDD;
		} else {
			if (!$skip_warnings) {
				w("Cannot check used libraries of $file.\n");
			}
		}
	}
}

sub check_certificates() {

	# check if CAdir is present

	if (!defined($CApath)) {
		e("The x509_cert_dir was not set.\n");
		$CApath="/etc/grid-security/certificates";
		if ( ! -d $CApath) {
			return;
		}
	}
	elsif ( ! -d "$CApath") {
		e("CApath does not exist at '$CApath");
		return;
	}

	unless (defined($opensslpath) and "" ne "$opensslpath") {
		w("openssl application not in path and not specified.\n");
		return;
	}

	unless ( -x "$opensslpath") {
		e("Cannot execute openssl application at '$opensslpath'");
	}

	# check of host certificate

	if (!defined($hostcert)) {
		$hostcert="/etc/grid-security/hostcert.pem";
	}
	if ( -f $hostcert) {
		if (system("$opensslpath verify -CApath $CApath $hostcert | grep 'OK'")) {
			e("Verification of host cert at $hostcert with $opensslpath failed.\n");
		}
	}
	else {
		if (!$skip_warnings) {
			w("Not verifying host cert which is not present at $hostcert (should already be reported).\n");
		}
	}

}


timecheck($timeserver, 0.2);
confcheck($conffile);
check_applications();
check_libraries();
check_certificates();

if (0 == $errors) {
	print "Found no apparent failures.\n";
} else {
	printf "Found %d failure%s.\n", $errors, ($errors > 1) ? "s" : "";
}
if ($warnings) {
	printf "Found %d non-critical issue%s%s.\n",
		$warnings, ($warnings > 1) ? "s" : "",
		($skip_warnings?" (not shown)":"");
}

exit $errors;

=back

=head1 SEE ALSO

http://www.nordugrid.org and our mailing lists.

=cut


# EOF
