From: Russ Allbery Date: Thu, 25 Mar 2004 06:19:52 +0000 (+0000) Subject: Initial version. X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=19a29854ead44999f123a2557d708649c01b0027;p=packages%2Fa%2Fafs-monitor.git Initial version. --- diff --git a/check_udebug b/check_udebug new file mode 100755 index 0000000..3ed4b07 --- /dev/null +++ b/check_udebug @@ -0,0 +1,203 @@ +#!/usr/bin/perl -w +$ID = q$Id$; +# +# check_udebug -- Check AFS database servers using udebug for Nagios. +# +# Written by Russ Allbery +# Copyright 2004 Board of Trustees, Leland Stanford Jr. University +# +# This program is free software; you may redistribute it and/or modify it +# under the same terms as Perl itself. +# +# Takes a hostname and a port number and checks the udebug output for that +# host and port. Reports an error if the recovery state is not 1f on the sync +# site (ensuring that it considers all of the other servers up-to-date) or if +# any of the servers don't believe there is a sync site. + +############################################################################## +# Site configuration +############################################################################## + +# The default timeout in seconds (implemented by alarm) for udebug. +$TIMEOUT = 10; + +# The full path to udebug. Make sure that this is on local disk so that +# monitoring doesn't have an AFS dependency. +($UDEBUG) = grep { -x $_ } qw(/usr/bin/udebug /usr/local/bin/udebug); +$UDEBUG ||= '/usr/local/bin/udebug'; + +############################################################################## +# Modules and declarations +############################################################################## + +require 5.003; + +use strict; +use vars qw($ID $TIMEOUT $UDEBUG); + +use Getopt::Long qw(GetOptions); + +############################################################################## +# Implementation +############################################################################## + +# Parse command line options. +my ($help, $host, $port, $version); +Getopt::Long::config ('bundling', 'no_ignore_case'); +GetOptions ('hostname|H=s' => \$host, + 'help|h' => \$help, + 'port|p=i' => \$port, + 'timeout|t=i' => \$TIMEOUT, + 'version|V' => \$version) or exit 3; +if ($help) { + print "Feeding myself to perldoc, please wait....\n"; + exec ('perldoc', '-t', $0) or die "Cannot fork: $!\n"; +} elsif ($version) { + my $version = join (' ', (split (' ', $ID))[1..3]); + $version =~ s/,v\b//; + $version =~ s/(\S+)$/($1)/; + $version =~ tr%/%-%; + print $version, "\n"; + exit 0; +} +if (@ARGV || !(defined ($host) && defined ($port))) { + warn "Usage: $0 [-hv] [-t ] -H -p \n"; + exit 3; +} + +# Set up the alarm. +$SIG{ALRM} = sub { + print "UBIK CRITICAL: network timeout after $TIMEOUT seconds\n"; + exit 2; +}; +alarm ($TIMEOUT); + +# Run udebug and parse the output. We're looking for three things: first, +# we're looking to see if this host claims to be the sync site. If so, check +# that recovery state is 1f. Otherwise, make sure that there's a defined sync +# host. +unless (open (UDEBUG, "$UDEBUG $host $port |")) { + warn "$0: cannot run udebug\n"; + exit 3; +} +my ($issync, $recovery, $synchost); +while () { + $issync = 1 if /^I am sync site /; + $recovery = 1 if /^Recovery state 1f/; + $synchost = 1 if /^Sync host \d+(\.\d+){3} was set /; +} +close UDEBUG; +if ($? != 0) { + print "UBIK CRITICAL: udebug failed\n"; + exit 2; +} + +# Check the results. +if ($issync && !$recovery) { + print "UBIK CRITICAL: recovery state not 1f\n"; + exit 2; +} elsif (!$issync && !$synchost) { + print "UBIK CRITICAL: no sync site\n"; + exit 2; +} else { + print "UBIK OK\n"; + exit 0; +} + +############################################################################## +# Documentation +############################################################################## + +=head1 NAME + +check_udebug - Check AFS servers for blocked connections in Nagios + +=head1 SYNOPSIS + +check_udebug [B<-hV>] [B<-t> I] B<-H> I B<-p> I + +=head1 DESCRIPTION + +B is a Nagios plugin for checking AFS database servers to make +sure the Ubik replication between the database servers is running correctly. +B is used to connect to the specified port, which should generally +be one of 7002 (ptserver), 7003 (vlserver), or 7004 (kaserver), on the +specified server. The resulting output is checked to make sure that the +recovery state is 1f if that server is the sync site, or that a sync site is +known if that server doesn't claim to be the sync site. + +B will always print out a single line of output. That line +will be C if everything is fine, or C followed by +an error message otherwise. + +=head1 OPTIONS + +=over 4 + +=item B<-H> I, B<--hostname>=I + +The AFS database server whose Ubik status B should check. +This option is required. + +=item B<-h>, B<--help> + +Print out this documentation (which is done simply by feeding the script +to C). + +=item B<-p> I, B<--port>=I + +The port to connect to on the AFS database server. This should generally be +one of 7002 (ptserver), 7003 (vlserver), or 7004 (kaserver). This option is +required. + +=item B<-t> I, B<--timeout>=I + +Change the timeout for the B command. The default timeout is 60 +seconds. + +=item B<-V>, B<--version> + +Print out the version of B and quit. + +=back + +=head1 EXIT STATUS + +B follows the standard Nagios exit status requirements. This +means that it will exit with status 0 if there are no problems or with +status 2 if there are critical problems. For other errors, such as invalid +syntax, B will exit with status 3. + +=head1 BUGS + +The standard B<-v> verbose Nagios plugin option is not supported. It should +print out the full B output. + +The usage message for invalid options and for the B<-h> option doesn't +conform to Nagios standards. + +=head1 CAVEATS + +This script does not use the Nagios util library or any of the defaults that +it provides, which makes it somewhat deficient as a Nagios plugin. This is +intentional, though, since this script can be used with other monitoring +systems as well. It's not clear what a good solution to this would be. + +=head1 SEE ALSO + +The current version of this and other AFS monitoring plugins for Nagios are +available from the AFS monitoring tools page at +L. + +=head1 AUTHORS + +Russ Allbery + +=head1 COPYRIGHT AND LICENSE + +Copyright 2004 Board of Trustees, Leland Stanford Jr. University. + +This program is free software; you may redistribute it and/or modify it +under the same terms as Perl itself. + +=cut