-#!/usr/local/bin/perl
-# $Id$
+#!/usr/bin/perl -w
+$ID = q$Id$;
#
-# Monitor disk space usage. Susan Feng (sfeng@stanford.edu)
+# check_afsspace -- Monitor AFS disk space usage under Nagios.
#
-# Arguments are:
+# Written by Susan Feng <sfeng@stanford.edu>
+# Updated by Russ Allbery <rra@stanford.edu>
+# Copyright 2003, 2004 Board of Trustees, Leland Stanford Jr. University
#
-# host [host...]
-#
-# This script will exit with value 1 if the partions on "host" is above 85%
-# full.
-#
-# The first output line is a list of the hosts which failed, and
-# how much space is free, in kbytes.
+# This program is free software; you may redistribute it and/or modify it
+# under the same terms as Perl itself.
#
+# Expects a list of hosts as command-line arguments and checks the partition
+# usage with vos partinfo. Exits with status 1 if the free space is below a
+# warning percentage and with status 2 if the free space is above a critical
+# percentage (this works with the Nagios check architecture).
+
+##############################################################################
+# Site configuration
+##############################################################################
+
+# The default percentage full at which to warn and at which to send a critical
+# alert. These can be overridden with the -w and -c command-line options.
+$WARNINGS = 85;
+$CRITICAL = 90;
+
+# The default timeout in seconds (implemented by alarm) for vos partinfo.
+$TIMEOUT = 10;
+
+# The full path to vos. Make sure that this is on local disk so that
+# monitoring doesn't have an AFS dependency.
+($VOS) = grep { -x $_ } qw(/usr/bin/vos /usr/local/bin/vos);
+$VOS ||= '/usr/local/bin/vos';
+
+##############################################################################
+# Modules and declarations
+##############################################################################
-use Getopt::Std;
-use English;
+require 5.003;
-%options=();
-getopts ("c:w:H:", \%options);
+use strict;
+use vars qw($CRITICAL $ID $TIMEOUT $VOS $WARNINGS);
-$WARNINGS = $options{w} || 85;
-$CRITICAL = $options{c} || 90;
-$h = $options{H};
+use Getopt::Long qw(GetOptions);
+##############################################################################
+# Implementation
+##############################################################################
+
+# Parse command line options.
+my ($help, $host, $version);
+Getopt::Long::config ('bundling', 'no_ignore_case');
+GetOptions ('critical|c=i' => \$CRITICAL,
+ 'hostname|H=s' => \$host,
+ 'help|h' => \$help,
+ 'timeout|t=i' => \$TIMEOUT,
+ 'version|V' => \$version,
+ 'warning|w=i' => \$WARNINGS) or exit 3;
+if ($help) {
+ print "Feeding myself to perldoc, please wait....\n";
+ exec ('perldoc', '-t', $0) or die "Cannot fork: $!\n";
+} elsif ($version) {
+ my $version = join (' ', (split (' ', $ID))[1..3]);
+ $version =~ s/,v\b//;
+ $version =~ s/(\S+)$/($1)/;
+ $version =~ tr%/%-%;
+ print $version, "\n";
+ exit 0;
+}
+if (@ARGV) {
+ warn "Usage: $0 [-hv] [-c <level>] [-w <level>] -H <host>\n";
+ exit 3;
+}
if ($WARNINGS > $CRITICAL) {
- print "Warning amount cannot be greater than Critical amount.\n";
- exit 0;
+ warn "$0: warning level $WARNINGS greater than critical level $CRITICAL\n";
+ exit 3;
}
-
-my $voscmd="/usr/local/bin/vos partinfo";
-my @failures=();
-my ($part, $used, $free, $total, $percent);
-
-@infos=();
-@infos=`$voscmd $h 2> /dev/null`;
-
-foreach $l (@infos) {
- ($part,$free,$total) = (split(/\s+/,$l))[4,5,11];
- $used=$total-$free;
- $percent=int (($used/$total)*100);
- if ($percent >= $CRITICAL) {
- push (@critical, "$h: $part $percent% full, free $free ");
- }
- elsif ($percent >= $WARNINGS) {
- push (@warnings, "$h: $part $percent% full, free $free ");
- }
- push (@final, "$part$percent%");
-
+
+# Set up the alarm.
+$SIG{ALRM} = sub {
+ warn "AFS CRITICAL: network timeout after $TIMEOUT seconds\n";
+ exit 2;
+};
+alarm ($TIMEOUT);
+
+# Get the partinfo information and calculate the percentage free for each
+# partition. Accumulate critical messages in @critical and warnings in
+# @warnings. Accumulate all percentages in @all.
+my (@critical, @warnings, @all);
+my @data = `$VOS partinfo '$host' 2> /dev/null`;
+for (@data) {
+ my ($partition, $free, $total) = (split)[4,5,11];
+ my $percent = int ((($total - $free) / $total) * 100);
+ if ($percent >= $CRITICAL) {
+ push (@critical, "$partition$percent% (free $free)");
+ } elsif ($percent >= $WARNINGS) {
+ push (@warnings, "$partition$percent% (free $free)");
+ }
+ push (@all, "$partition$percent%");
}
+# Exit with the appropriate error messages.
if (@critical) {
- print @critical;
+ print "AFS CRITICAL: @critical\n";
exit 2;
+} elsif (@warnings) {
+ print "AFS WARNING: @warnings\n";
+ exit 1;
+} else {
+ print "AFS OK: @all\n";
+ exit 0;
}
-if (@warnings) {
- print @warnings;
- exit 1;
-}
-foreach $l (@final) {
- chomp($l);
- print "$l ";
-}
-print "\n";
-exit 0;
+
+##############################################################################
+# Documentation
+##############################################################################
+
+=head1 NAME
+
+check_afsspace - Monitor AFS disk space usage under Nagios
+
+=head1 SYNOPSIS
+
+check_afsspace [B<-hv>] [B<-c> I<threshold>] [B<-w> I<threshold>]
+[B<-t> I<timeout>] B<-H> I<host>
+
+=head1 DESCRIPTION
+
+B<check_afsspace> is a Nagios plugin for checking free space on AFS server
+partitions. It uses C<vos partinfo> to obtain the free space on the
+partitions on an AFS server and will return an alert if the percentage of
+used space exceeds a threshold. By default, it returns a critical error if
+the used space is over 90% and a warning if it is over 85% (changable with
+the B<-c> and B<-w> options).
+
+B<check_afsspace> will always print out a single line of output, giving the
+critical errors if any, otherwise giving the warnings if any, otherwise
+listing in an abbreviated form the percentage free space for all partitions.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-c> I<threshold>, B<--critical>=I<threshold>
+
+Change the critical percentage threshold to I<threshold>, which should be an
+integer percentage. The default is 90.
+
+=item B<-H> I<host>, B<--hostname>=I<host>
+
+The AFS file server whose free space B<check_afsspace> should check. This
+option is required.
+
+=item B<-h>, B<--help>
+
+Print out this documentation (which is done simply by feeding the script
+to C<perldoc -t>).
+
+=item B<-t> I<timeout>, B<--timeout>=I<timeout>
+
+Change the timeout for the C<vos partinfo> command. The default timeout is
+10 seconds.
+
+=item B<-V>, B<--version>
+
+Print out the version of B<check_afsspace> and quit.
+
+=item B<-w> I<threshold>, B<--warning>=I<threshold>
+
+Change the warning percentage threshold to I<threshold>, which should be an
+integer percentage. The default is 85.
+
+=back
+
+=head1 EXIT STATUS
+
+B<check_afsspace> follows the standard Nagios exit status requirements.
+This means that it will exit with status 0 if there are no problems, with
+status 2 if there is at least one critical partition for that server, and
+with status 1 if there are no critical partitions but at least one warning
+partition. For other errors, such as invalid syntax, B<check_afsspace> will
+exit with status 3.
+
+=head1 BUGS
+
+The standard B<-v> verbose Nagios plugin option is not supported and should
+be. (For example, under B<-vv> we would want to show the actual total,
+free, and used byte counts, not just the percentages.)
+
+The usage message for invalid options and for the B<-h> option doesn't
+conform to Nagios standards.
+
+=head1 CAVEATS
+
+This script does not use the Nagios util library or any of the defaults that
+it provides, which makes it somewhat deficient as a Nagios plugin. This is
+intentional, though, since this script can be used with other monitoring
+systems as well. It's not clear what a good solution to this would be.
+
+=head1 SEE ALSO
+
+vos(1)
+
+The current version of this and other AFS monitoring plugins for Nagios are
+available from the AFS monitoring tools page at
+L<http://www.eyrie.org/~eagle/software/afs-monitor/>.
+
+=head1 AUTHORS
+
+Originally written by Susan Feng for use with mon. Updated by Quanah
+Gibson-Mount to work with Nagios, and then further updated by Russ Allbery
+<rra@stanford.edu> to support more standard options and to use a more
+uniform coding style.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2003, 2004 Board of Trustees, Leland Stanford Jr. University.
+
+This program is free software; you may redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut