--- /dev/null
+#!/usr/bin/perl -w
+$ID = q$Id$;
+#
+# check_udebug -- Check AFS database servers using udebug for Nagios.
+#
+# Written by Russ Allbery <rra@stanford.edu>
+# Copyright 2004 Board of Trustees, Leland Stanford Jr. University
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the same terms as Perl itself.
+#
+# Takes a hostname and a port number and checks the udebug output for that
+# host and port. Reports an error if the recovery state is not 1f on the sync
+# site (ensuring that it considers all of the other servers up-to-date) or if
+# any of the servers don't believe there is a sync site.
+
+##############################################################################
+# Site configuration
+##############################################################################
+
+# The default timeout in seconds (implemented by alarm) for udebug.
+$TIMEOUT = 10;
+
+# The full path to udebug. Make sure that this is on local disk so that
+# monitoring doesn't have an AFS dependency.
+($UDEBUG) = grep { -x $_ } qw(/usr/bin/udebug /usr/local/bin/udebug);
+$UDEBUG ||= '/usr/local/bin/udebug';
+
+##############################################################################
+# Modules and declarations
+##############################################################################
+
+require 5.003;
+
+use strict;
+use vars qw($ID $TIMEOUT $UDEBUG);
+
+use Getopt::Long qw(GetOptions);
+
+##############################################################################
+# Implementation
+##############################################################################
+
+# Parse command line options.
+my ($help, $host, $port, $version);
+Getopt::Long::config ('bundling', 'no_ignore_case');
+GetOptions ('hostname|H=s' => \$host,
+ 'help|h' => \$help,
+ 'port|p=i' => \$port,
+ 'timeout|t=i' => \$TIMEOUT,
+ 'version|V' => \$version) or exit 3;
+if ($help) {
+ print "Feeding myself to perldoc, please wait....\n";
+ exec ('perldoc', '-t', $0) or die "Cannot fork: $!\n";
+} elsif ($version) {
+ my $version = join (' ', (split (' ', $ID))[1..3]);
+ $version =~ s/,v\b//;
+ $version =~ s/(\S+)$/($1)/;
+ $version =~ tr%/%-%;
+ print $version, "\n";
+ exit 0;
+}
+if (@ARGV || !(defined ($host) && defined ($port))) {
+ warn "Usage: $0 [-hv] [-t <timeout>] -H <host> -p <port>\n";
+ exit 3;
+}
+
+# Set up the alarm.
+$SIG{ALRM} = sub {
+ print "UBIK CRITICAL: network timeout after $TIMEOUT seconds\n";
+ exit 2;
+};
+alarm ($TIMEOUT);
+
+# Run udebug and parse the output. We're looking for three things: first,
+# we're looking to see if this host claims to be the sync site. If so, check
+# that recovery state is 1f. Otherwise, make sure that there's a defined sync
+# host.
+unless (open (UDEBUG, "$UDEBUG $host $port |")) {
+ warn "$0: cannot run udebug\n";
+ exit 3;
+}
+my ($issync, $recovery, $synchost);
+while (<UDEBUG>) {
+ $issync = 1 if /^I am sync site /;
+ $recovery = 1 if /^Recovery state 1f/;
+ $synchost = 1 if /^Sync host \d+(\.\d+){3} was set /;
+}
+close UDEBUG;
+if ($? != 0) {
+ print "UBIK CRITICAL: udebug failed\n";
+ exit 2;
+}
+
+# Check the results.
+if ($issync && !$recovery) {
+ print "UBIK CRITICAL: recovery state not 1f\n";
+ exit 2;
+} elsif (!$issync && !$synchost) {
+ print "UBIK CRITICAL: no sync site\n";
+ exit 2;
+} else {
+ print "UBIK OK\n";
+ exit 0;
+}
+
+##############################################################################
+# Documentation
+##############################################################################
+
+=head1 NAME
+
+check_udebug - Check AFS servers for blocked connections in Nagios
+
+=head1 SYNOPSIS
+
+check_udebug [B<-hV>] [B<-t> I<timeout>] B<-H> I<host> B<-p> I<port>
+
+=head1 DESCRIPTION
+
+B<check_udebug> is a Nagios plugin for checking AFS database servers to make
+sure the Ubik replication between the database servers is running correctly.
+B<udebug> is used to connect to the specified port, which should generally
+be one of 7002 (ptserver), 7003 (vlserver), or 7004 (kaserver), on the
+specified server. The resulting output is checked to make sure that the
+recovery state is 1f if that server is the sync site, or that a sync site is
+known if that server doesn't claim to be the sync site.
+
+B<check_udebug> will always print out a single line of output. That line
+will be C<UBIK OK> if everything is fine, or C<UBIK CRITICAL: > followed by
+an error message otherwise.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-H> I<host>, B<--hostname>=I<host>
+
+The AFS database server whose Ubik status B<check_udebug> should check.
+This option is required.
+
+=item B<-h>, B<--help>
+
+Print out this documentation (which is done simply by feeding the script
+to C<perldoc -t>).
+
+=item B<-p> I<port>, B<--port>=I<port>
+
+The port to connect to on the AFS database server. This should generally be
+one of 7002 (ptserver), 7003 (vlserver), or 7004 (kaserver). This option is
+required.
+
+=item B<-t> I<timeout>, B<--timeout>=I<timeout>
+
+Change the timeout for the B<udebug> command. The default timeout is 60
+seconds.
+
+=item B<-V>, B<--version>
+
+Print out the version of B<check_udebug> and quit.
+
+=back
+
+=head1 EXIT STATUS
+
+B<check_udebug> follows the standard Nagios exit status requirements. This
+means that it will exit with status 0 if there are no problems or with
+status 2 if there are critical problems. For other errors, such as invalid
+syntax, B<check_udebug> will exit with status 3.
+
+=head1 BUGS
+
+The standard B<-v> verbose Nagios plugin option is not supported. It should
+print out the full B<udebug> output.
+
+The usage message for invalid options and for the B<-h> option doesn't
+conform to Nagios standards.
+
+=head1 CAVEATS
+
+This script does not use the Nagios util library or any of the defaults that
+it provides, which makes it somewhat deficient as a Nagios plugin. This is
+intentional, though, since this script can be used with other monitoring
+systems as well. It's not clear what a good solution to this would be.
+
+=head1 SEE ALSO
+
+The current version of this and other AFS monitoring plugins for Nagios are
+available from the AFS monitoring tools page at
+L<http://www.eyrie.org/~eagle/software/afs-monitor/>.
+
+=head1 AUTHORS
+
+Russ Allbery <rra@stanford.edu>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2004 Board of Trustees, Leland Stanford Jr. University.
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut