install:
$(INSTALL) -d $(DSTPLUGINDIR)
- $(INSTALL) -m 0755 $(PLUGINDIR)/snmp__networkclients $(DSTPLUGINDIR)
+ $(INSTALL) -m 0755 $(PLUGINDIR)/* $(DSTPLUGINDIR)
.PHONY: install
+munin-plugins-local (0.4) unstable; urgency=low
+
+ * Add smart_byid_ until munin changeset 492fb56 makes it in to smart_
+ * Add smart_raw_ to graph the raw values of things
+
+ -- Michael Howe <michael@michaelhowe.org> Mon, 07 Jan 2013 12:28:45 +0000
+
munin-plugins-local (0.3) unstable; urgency=low
* Let us track individual MAC addresses
Package: munin-plugins-local
Architecture: all
Depends: munin-node, ${misc:Depends}
+Suggests: python
Description: network-wide graphing framework (local plugins for node)
Extra plugins for munin-node, for use on michaelhowe.org machines.
--- /dev/null
+#!/usr/bin/env python
+# -*- python -*-
+#
+# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
+# which is part of smartmontools package:
+# http://smartmontools.sourceforge.net/
+#
+# To monitor a S.M.A.R.T device, link smart_<device> to this file.
+# E.g.
+# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
+# ...will monitor /dev/hda.
+#
+# Needs following minimal configuration in plugin-conf.d/munin-node:
+# [smart_*]
+# user root
+# group disk
+#
+# Parameters
+# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
+# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
+# ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
+#
+# Parameters can be specified on a per-drive basis, eg:
+# [smart_hda]
+# user root
+# group disk
+# env.smartargs -H -c -l error -l selftest -l selective -d ata
+# env.smartpath /usr/local/sbin/smartctl
+#
+# [smart_twa0-1]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,1
+# env.ignorestandby True
+#
+# [smart_twa0-2]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,2
+#
+# Author: Nicolas Stransky <Nico@stransky.cx>
+#
+# v1.0 22/08/2004 - First draft
+# v1.2 28/08/2004 - Clean up the code, add a verbose option
+# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
+# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
+# - config now prints the critical thresholds, as reported by smartctl
+# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
+# v1.6 21/11/2004 - Add autoconf and suggest capabilities
+# - smartctl path can be passed through "smartpath" environment variable
+# - Additional smartctl args can be passed through "smartargs" environment variable
+# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
+# - Allow to override completely the smartctl arguments with "smartargs"
+# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
+# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
+# - Correct a bug when '-i' was not listed in smartargs
+# - Don't fail if no value was obtained for hard drive model
+# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
+# v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code
+# - New option to suppress SMART warnings in munin
+# - Temporary lack of output for previously existing drive now reports U
+# - The plugin now contains its own documentation for use with munindoc
+# - Removed python<2.2 compatibility comments
+# - Better autodetection of drives
+# - Don't spin up devices in a low-power mode.
+# v2.1 2012-02-14 - Add support for Darwin (Mac OS X).
+# - Print the last line of smartctl output to verbose
+# log to aid in understanding why smartctl exited with
+# a nonzero status.
+#
+# Copyright (c) 2004-2009 Nicolas Stransky.
+#
+# Permission to use, copy, and modify this software with or without fee
+# is hereby granted, provided that this entire notice is included in
+# all source code copies of any software which is or includes a copy or
+# modification of this software.
+#
+# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+# PURPOSE.
+#
+#
+# Magic markers
+#%# capabilities=autoconf suggest
+#%# family=auto
+
+## You may edit the following 3 variables
+# Increase verbosity (True/False)
+verbose=False
+# Suppress SMART warnings (True/False)
+report_warnings=True
+# You may not modify anything below this line
+
+import os, sys, string, pickle
+from math import log
+plugin_version="2.1"
+statefiledir=os.environ['MUNIN_PLUGSTATE']
+
+def verboselog(s):
+ global plugin_name
+ sys.stderr.write(plugin_name+': '+s+'\n')
+
+if not verbose :
+ verboselog = lambda s: None
+
+def read_values(hard_drive):
+ global smart_values, emptyoutput
+ try :
+ verboselog('Reading S.M.A.R.T values')
+ os.putenv('LC_ALL','C')
+ device='/dev/'+hard_drive
+ if not os.path.exists(device):
+ device='/dev/disk/by-id/'+hard_drive
+ smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i '+device)
+ read_values=0
+ lastline=None
+ for l in smart_output :
+ lastline=l
+ if l[:-1]=='' :
+ read_values=0
+ elif l[:13]=='Device Model:' or l[:7]=='Device:' :
+ model_list=string.split(string.split(l,':')[1])
+ try: model_list.remove('Version')
+ except : None
+ model=string.join(model_list)
+ if read_values==1 :
+ smart_attribute=string.split(l)
+ smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
+ elif l[:18]=="ID# ATTRIBUTE_NAME" :
+ # Start reading the Attributes block
+ read_values=1
+ exit_status=smart_output.close()
+ if exit_status!=None :
+ # smartctl exit code is a bitmask, check man page.
+ num_exit_status=int(exit_status/256) # Python convention
+ if int(log(num_exit_status,2))<=2 : # bit code
+ verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
+ verboselog(lastline[:-1])
+ else :
+ verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
+ else :
+ num_exit_status=0
+ except :
+ verboselog('Cannot access S.M.A.R.T values! Check user rights or proper smartmontools installation/arguments.')
+ sys.exit(1)
+ if smart_values=={} :
+ verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
+ emptyoutput=True
+ #sys.exit(1)
+ else : emptyoutput=False
+ smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
+ try : smart_values["model"]=model
+ # For some reason we may have no value for "model"
+ except : smart_values["model"]="unknown"
+ return(exit_status)
+
+def open_state_file(hard_drive,mode) :
+ global statefiledir
+ return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
+
+def update_state_file(hard_drive) :
+ try:
+ verboselog('Saving statefile')
+ pickle.dump(smart_values,open_state_file(hard_drive,"w"))
+ except :
+ verboselog('Error trying to save state file! Check access rights')
+
+def print_plugin_values(hard_drive) :
+ global emptyoutput, smart_values
+ if not emptyoutput:
+ verboselog('Printing S.M.A.R.T values')
+ for key in smart_values.keys() :
+ if key=="model" : continue
+ print(key+".value "+smart_values[key]["value"])
+ else:
+ print_unknown_from_statefile(hard_drive,smart_values)
+
+def print_config(hard_drive) :
+ global report_warnings, smart_values, statefiledir
+ if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
+ try :
+ verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
+ smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
+ except :
+ verboselog('Error opening existing state file!')
+ sys.exit(1)
+ else :
+ verboselog('No state file, reading S.M.A.R.T values for the first time')
+ read_values(hard_drive[0])
+ pickle.dump(smart_values,open_state_file(hard_drive,"w"))
+ smart_values_state=smart_values
+
+ verboselog('Printing configuration')
+ print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
+ print('graph_vlabel Attribute S.M.A.R.T value')
+ print('graph_args --base 1000 --lower-limit 0')
+ print('graph_category disk')
+ print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
+ attributes=smart_values_state.keys()
+ attributes.sort()
+ for key in attributes :
+ if key in ['smartctl_exit_status','model'] : continue
+ print(key+'.label '+key)
+ if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':')
+ print('smartctl_exit_status.label smartctl_exit_status')
+ if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
+
+def print_unknown_from_statefile(hard_drive,smart_values) :
+ global statefiledir
+ if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
+ try :
+ verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
+ smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
+ except :
+ verboselog('Error opening existing state file!')
+ sys.exit(1)
+ else :
+ verboselog('No state file, reading S.M.A.R.T values for the first time')
+ sys.exit(1)
+
+ verboselog('Printing unknown values for all attributes in state file')
+ attributes=smart_values_state.keys()
+ attributes.sort()
+ for key in attributes :
+ if key=='model' : continue
+ print(key+'.value U')
+
+def get_hard_drive_name() :
+ global plugin_name
+ try :
+ name=[plugin_name[string.index(plugin_name,'_')+1:]]
+ if os.uname()[0]=="SunOS" :
+ try :
+ # if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
+ if name[0][0:4]=="rdsk":
+ name[0]=os.path.join("rdsk",name[0][4:])
+ elif name[0][0:3]=="rmt":
+ name[0]=os.path.join("rmt",name[0][3:])
+ except :
+ verboselog('Failed to find SunOS hard_drive')
+ if (not os.path.exists('/dev/'+name[0])) and (not os.path.exists('/dev/disk/by-id/'+name[0])):
+ # For 3ware cards, we have to set multiple plugins for the same hard drive name.
+ # Let's see if we find a '-' in the drive name.
+ if name[0].find('-')!=-1:
+ # Put the drive name and it's number in a list
+ name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]]
+ # Chech that the drive exists in /dev
+ if (not os.path.exists('/dev/'+name[0])) and (not os.path.exists('/dev/disk/by-id/'+name[0])):
+ verboselog('/dev/(disk/by-id/)?'+name[0]+' not found!')
+ sys.exit(1)
+ return(name)
+ except :
+ verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
+ sys.exit(1)
+
+def find_smart_drives() :
+ global emptyoutput
+ # Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
+ drives=[]
+ if os.uname()[0]=="Linux" :
+ if os.path.exists('/sys/block/'):
+ # Running 2.6
+ try :
+ for drive in os.listdir('/sys/block/') :
+ if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
+ try :
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list devices in /sys/block')
+ else :
+ verboselog('Not running linux2.6, failing back to /proc/partitions')
+ try :
+ partitions=open('/proc/partitions','r')
+ L=partitions.readlines()
+ for l in L :
+ words=string.split(l)
+ if len(words)==0 or words[0][0] not in string.digits : continue
+ if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
+ if words[-1][-1] not in string.digits :
+ try :
+ verboselog('Trying '+words[-1]+'...')
+ exit_status=read_values(words[-1])
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(words[-1])
+ except :
+ continue
+ verboselog('Found drives in /proc/partitions ! '+str(drives))
+ except :
+ verboselog('Failed to list devices in /proc/partitions')
+ elif os.uname()[0]=="OpenBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'c...')
+ exit_status=read_values(drive+'c')
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list OpenBSD disks')
+ elif os.uname()[0]=="FreeBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl kern.disks')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[1:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list FreeBSD disks')
+ elif os.uname()[0]=="Darwin" :
+ try :
+ from glob import glob
+ for drivepath in glob('/dev/disk[0-9]') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list Darwin disks')
+ elif os.uname()[0]=="NetBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[2:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'c...')
+ exit_status=read_values(drive+'c')
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list NetBSD disks')
+ elif os.uname()[0]=="SunOS" :
+ try :
+ from glob import glob
+ for drivepath in glob('/dev/rdsk/*s2') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying rdsk'+drive+'...')
+ exit_status=read_values('rdsk'+drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append('rdsk'+drive)
+ except :
+ continue
+ for drivepath in glob('/dev/rmt/*') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying rmt'+drive+'...')
+ exit_status=read_values('rmt'+drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append('rmt'+drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list SunOS disks')
+ return(drives)
+
+### Main part ###
+
+smart_values={}
+emptyoutput=False
+plugin_name=list(os.path.split(sys.argv[0]))[1]
+verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
+
+# Parse arguments
+if len(sys.argv)>1 :
+ if sys.argv[1]=="config" :
+ hard_drive=get_hard_drive_name()
+ print_config(hard_drive)
+ sys.exit(0)
+ elif sys.argv[1]=="autoconf" :
+ if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
+ if not find_smart_drives():
+ print('no (no drives accessible)')
+ else :
+ print('yes')
+ sys.exit(0)
+ else :
+ print('no (smartmontools not found)')
+ sys.exit(0)
+ elif sys.argv[1]=="suggest" :
+ for drive in find_smart_drives() :
+ print(drive)
+ sys.exit(0)
+ elif sys.argv[1]=="version" :
+ print('smart_ Munin plugin, version '+plugin_version)
+ sys.exit(0)
+ elif sys.argv[1]!="" :
+ verboselog('unknown argument "'+sys.argv[1]+'"')
+ sys.exit(1)
+
+# No argument given, doing the real job:
+hard_drive=get_hard_drive_name()
+read_values(hard_drive[0])
+if not emptyoutput: update_state_file(hard_drive)
+print_plugin_values(hard_drive)
+sys.exit(0)
+
+
+### The following is the smart_ plugin documentation, intended to be used with munindoc
+"""
+=head1 NAME
+
+smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl
+
+=head1 APPLICABLE SYSTEMS
+
+Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/)
+installed and in function.
+
+=head1 CONFIGURATION
+
+=head2 Create link in service directory
+
+To monitor a S.M.A.R.T device, create a link in the service directory
+of the munin-node named smart_<device>, which is pointing to this file.
+
+E.g.
+
+ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
+
+...will monitor /dev/hda.
+
+=head2 Grant privileges in munin-node
+
+The plugin must be run under high privileged user B<root>, to get access to the raw device.
+
+So following minimal configuration in plugin-conf.d/munin-node is needed.
+
+=over 2
+
+ [smart_*]
+ user root
+ group disk
+
+=back
+
+=head2 Set Parameter if needed
+
+ smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
+ smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
+ ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
+
+Parameters can be specified on a per-drive basis, eg:
+
+=over 2
+
+ [smart_hda]
+ user root
+ env.smartargs -H -c -l error -l selftest -l selective -d ata
+ env.smartpath /usr/local/sbin/smartctl
+
+=back
+
+In particular, for SATA drives, with older versions of smartctl:
+
+=over 2
+
+ [smart_sda]
+ user root
+ env.smartargs -d ata -a
+
+ [smart_twa0-1]
+ user root
+ env.smartargs -H -l error -d 3ware,1
+ env.ignorestandby True
+
+ [smart_twa0-2]
+ user root
+ env.smartargs -H -l error -d 3ware,2
+
+=back
+
+=head1 INTERPRETATION
+
+If a device supports the B<Self-Monitoring, Analysis
+and Reporting Technology (S.M.A.R.T.)> it offers readable
+access to the attribute table. There you find the B<raw value>,
+a B<normalised value> and a B<threshold> (set by the vendor)
+for each attribute, that is supported by that device.
+
+The meaning and handling of the raw value is a secret of the
+vendors embedded S.M.A.R.T.-Software on the disk. The only
+relevant info from our external view is the B<normalised value>
+in comparison with the B<threshold>. If the attributes value is
+equal or below the threshold, it signals its failure and
+the B<health status> of the device will switch from B<passed> to B<failed>.
+
+This plugin fetches the B<normalised values of all SMART-Attributes>
+and draw a curve for each of them.
+It takes the vendors threshold as critical limit for the munin datafield.
+So you will see an alarm, if the value reaches the vendors threshold.
+
+Looking at the graph: It is a bad sign, if the curve starts
+to curl or to meander. The more horizontal it runs,
+the better. Of course it is normal, that the temperatures
+curve swings a bit. But the others should stay steady on
+their level if everything is ok.
+
+S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age>
+Attributes. An old disk will have more curling curves
+because of degradation, especially for the B<Old-age> Attributes.
+You should then backup more often, run more selftests[1] and prepare
+the disks replacement.
+
+B<Act directly>, if a <Pre-Fail> Attribute goes below threshold.
+Immediately back-up your data and replace your hard disk drive.
+A failure may be imminent..
+
+[1] Consult the smartmontools manpages to learn about
+offline tests and automated selftests with smartd.
+Only with both activated, the values of the SMART-Attributes
+reflect the all over state of the device.
+
+Tutorials and articles about S.M.A.R.T. and smartmontools:
+http://smartmontools.sourceforge.net/doc.html#tutorials
+
+=head1 MAGIC MARKERS
+
+ #%# family=auto
+ #%# capabilities=autoconf suggest
+
+=head1 CALL OPTIONS
+
+B<none>
+
+=over 2
+
+Fetches values if called without arguments:
+
+E.g.: munin-run smart_hda
+
+=back
+
+B<config>
+
+=over 2
+
+Prints plugins configuration.
+
+E.g.: munin-run smart_hda config
+
+=back
+
+B<autoconf>
+
+=over 2
+
+Tries to find smartctl and outputs value 'yes' for success, 'no' if not.
+
+It's used by B<munin-node-configure> to see wether autoconfiguration is possible.
+
+=back
+
+B<suggest>
+
+=over 2
+
+Outputs the list of device names, that it found plugged to the system.
+
+B<munin-node-configure> use this to build the service links for this wildcard-plugin.
+
+=back
+
+=head1 VERSION
+
+Version 2.1
+
+=head1 BUGS
+
+None known
+
+=head1 AUTHOR
+
+(C) 2004-2009 Nicolas Stransky <Nico@stransky.cx>
+
+(C) 2008 Gabriele Pohl <contact@dipohl.de>
+Reformated existent documentation to POD-Style, added section Interpretation to the documentation.
+
+=head1 LICENSE
+
+GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
+
+=cut
+
+
+"""
--- /dev/null
+#!/usr/bin/env python
+# -*- python -*-
+#
+# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
+# which is part of smartmontools package:
+# http://smartmontools.sourceforge.net/
+#
+# To monitor a S.M.A.R.T device, link smart_<device> to this file.
+# E.g.
+# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
+# ...will monitor /dev/hda.
+#
+# Needs following minimal configuration in plugin-conf.d/munin-node:
+# [smart_*]
+# user root
+# group disk
+#
+# Parameters
+# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
+# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
+# ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
+#
+# Parameters can be specified on a per-drive basis, eg:
+# [smart_hda]
+# user root
+# group disk
+# env.smartargs -H -c -l error -l selftest -l selective -d ata
+# env.smartpath /usr/local/sbin/smartctl
+#
+# [smart_twa0-1]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,1
+# env.ignorestandby True
+#
+# [smart_twa0-2]
+# user root
+# group disk
+# env.smartargs -H -l error -d 3ware,2
+#
+# Author: Nicolas Stransky <Nico@stransky.cx>
+#
+# v1.0 22/08/2004 - First draft
+# v1.2 28/08/2004 - Clean up the code, add a verbose option
+# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
+# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
+# - config now prints the critical thresholds, as reported by smartctl
+# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
+# v1.6 21/11/2004 - Add autoconf and suggest capabilities
+# - smartctl path can be passed through "smartpath" environment variable
+# - Additional smartctl args can be passed through "smartargs" environment variable
+# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
+# - Allow to override completely the smartctl arguments with "smartargs"
+# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
+# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
+# - Correct a bug when '-i' was not listed in smartargs
+# - Don't fail if no value was obtained for hard drive model
+# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
+# v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code
+# - New option to suppress SMART warnings in munin
+# - Temporary lack of output for previously existing drive now reports U
+# - The plugin now contains its own documentation for use with munindoc
+# - Removed python<2.2 compatibility comments
+# - Better autodetection of drives
+# - Don't spin up devices in a low-power mode.
+# v2.1 2012-02-14 - Add support for Darwin (Mac OS X).
+# - Print the last line of smartctl output to verbose
+# log to aid in understanding why smartctl exited with
+# a nonzero status.
+#
+# Copyright (c) 2004-2009 Nicolas Stransky.
+#
+# Permission to use, copy, and modify this software with or without fee
+# is hereby granted, provided that this entire notice is included in
+# all source code copies of any software which is or includes a copy or
+# modification of this software.
+#
+# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
+# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
+# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
+# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
+# PURPOSE.
+#
+#
+# Magic markers
+#%# capabilities=autoconf suggest
+#%# family=auto
+
+## You may edit the following 3 variables
+# Increase verbosity (True/False)
+verbose=False
+# Suppress SMART warnings (True/False)
+report_warnings=True
+# You may not modify anything below this line
+
+import os, sys, string, pickle
+from math import log
+plugin_version="2.1"
+statefiledir=os.environ['MUNIN_PLUGSTATE']
+
+def verboselog(s):
+ global plugin_name
+ sys.stderr.write(plugin_name+': '+s+'\n')
+
+if not verbose :
+ verboselog = lambda s: None
+
+def read_values(hard_drive):
+ global smart_values, emptyoutput
+ try :
+ verboselog('Reading S.M.A.R.T values')
+ os.putenv('LC_ALL','C')
+ device='/dev/'+hard_drive
+ if not os.path.exists(device):
+ device='/dev/disk/by-id/'+hard_drive
+ smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i '+device)
+ read_values=0
+ lastline=None
+ for l in smart_output :
+ lastline=l
+ if l[:-1]=='' :
+ read_values=0
+ elif l[:13]=='Device Model:' or l[:7]=='Device:' :
+ model_list=string.split(string.split(l,':')[1])
+ try: model_list.remove('Version')
+ except : None
+ model=string.join(model_list)
+ if read_values==1 :
+ smart_attribute=string.split(l)
+ smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[9]}
+ elif l[:18]=="ID# ATTRIBUTE_NAME" :
+ # Start reading the Attributes block
+ read_values=1
+ exit_status=smart_output.close()
+ if exit_status!=None :
+ # smartctl exit code is a bitmask, check man page.
+ num_exit_status=int(exit_status/256) # Python convention
+ if int(log(num_exit_status,2))<=2 : # bit code
+ verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
+ verboselog(lastline[:-1])
+ else :
+ verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
+ else :
+ num_exit_status=0
+ except :
+ verboselog('Cannot access S.M.A.R.T values! Check user rights or proper smartmontools installation/arguments.')
+ sys.exit(1)
+ if smart_values=={} :
+ verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
+ emptyoutput=True
+ #sys.exit(1)
+ else : emptyoutput=False
+ smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
+ try : smart_values["model"]=model
+ # For some reason we may have no value for "model"
+ except : smart_values["model"]="unknown"
+ return(exit_status)
+
+def open_state_file(hard_drive,mode) :
+ global statefiledir
+ return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
+
+def update_state_file(hard_drive) :
+ try:
+ verboselog('Saving statefile')
+ pickle.dump(smart_values,open_state_file(hard_drive,"w"))
+ except :
+ verboselog('Error trying to save state file! Check access rights')
+
+def print_plugin_values(hard_drive) :
+ global emptyoutput, smart_values
+ if not emptyoutput:
+ verboselog('Printing S.M.A.R.T values')
+ for key in smart_values.keys() :
+ if key=="model" : continue
+ print(key+".value "+smart_values[key]["value"])
+ else:
+ print_unknown_from_statefile(hard_drive,smart_values)
+
+def print_config(hard_drive) :
+ global report_warnings, smart_values, statefiledir
+ if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
+ try :
+ verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
+ smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
+ except :
+ verboselog('Error opening existing state file!')
+ sys.exit(1)
+ else :
+ verboselog('No state file, reading S.M.A.R.T values for the first time')
+ read_values(hard_drive[0])
+ pickle.dump(smart_values,open_state_file(hard_drive,"w"))
+ smart_values_state=smart_values
+
+ verboselog('Printing configuration')
+ print('graph_title Raw S.M.A.R.T values for drive '+string.join(hard_drive,","))
+ print('graph_vlabel Attribute raw S.M.A.R.T value')
+ print('graph_args --base 1000 --lower-limit 0')
+ print('graph_category disk')
+ print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
+ attributes=smart_values_state.keys()
+ attributes.sort()
+ for key in attributes :
+ if key in ['smartctl_exit_status','model'] : continue
+ print(key+'.label '+key)
+# if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':')
+ print('smartctl_exit_status.label smartctl_exit_status')
+ if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
+
+def print_unknown_from_statefile(hard_drive,smart_values) :
+ global statefiledir
+ if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
+ try :
+ verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
+ smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
+ except :
+ verboselog('Error opening existing state file!')
+ sys.exit(1)
+ else :
+ verboselog('No state file, reading S.M.A.R.T values for the first time')
+ sys.exit(1)
+
+ verboselog('Printing unknown values for all attributes in state file')
+ attributes=smart_values_state.keys()
+ attributes.sort()
+ for key in attributes :
+ if key=='model' : continue
+ print(key+'.value U')
+
+def get_hard_drive_name() :
+ global plugin_name
+ try :
+ name=[plugin_name[string.index(plugin_name,'_')+1:]]
+ if os.uname()[0]=="SunOS" :
+ try :
+ # if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
+ if name[0][0:4]=="rdsk":
+ name[0]=os.path.join("rdsk",name[0][4:])
+ elif name[0][0:3]=="rmt":
+ name[0]=os.path.join("rmt",name[0][3:])
+ except :
+ verboselog('Failed to find SunOS hard_drive')
+ if (not os.path.exists('/dev/'+name[0])) and (not os.path.exists('/dev/disk/by-id/'+name[0])):
+ # For 3ware cards, we have to set multiple plugins for the same hard drive name.
+ # Let's see if we find a '-' in the drive name.
+ if name[0].find('-')!=-1:
+ # Put the drive name and it's number in a list
+ name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]]
+ # Chech that the drive exists in /dev
+ if (not os.path.exists('/dev/'+name[0])) and (not os.path.exists('/dev/disk/by-id/'+name[0])):
+ verboselog('/dev/(disk/by-id/)?'+name[0]+' not found!')
+ sys.exit(1)
+ return(name)
+ except :
+ verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
+ sys.exit(1)
+
+def find_smart_drives() :
+ global emptyoutput
+ # Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
+ drives=[]
+ if os.uname()[0]=="Linux" :
+ if os.path.exists('/sys/block/'):
+ # Running 2.6
+ try :
+ for drive in os.listdir('/sys/block/') :
+ if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
+ try :
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list devices in /sys/block')
+ else :
+ verboselog('Not running linux2.6, failing back to /proc/partitions')
+ try :
+ partitions=open('/proc/partitions','r')
+ L=partitions.readlines()
+ for l in L :
+ words=string.split(l)
+ if len(words)==0 or words[0][0] not in string.digits : continue
+ if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
+ if words[-1][-1] not in string.digits :
+ try :
+ verboselog('Trying '+words[-1]+'...')
+ exit_status=read_values(words[-1])
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(words[-1])
+ except :
+ continue
+ verboselog('Found drives in /proc/partitions ! '+str(drives))
+ except :
+ verboselog('Failed to list devices in /proc/partitions')
+ elif os.uname()[0]=="OpenBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'c...')
+ exit_status=read_values(drive+'c')
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list OpenBSD disks')
+ elif os.uname()[0]=="FreeBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl kern.disks')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[1:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list FreeBSD disks')
+ elif os.uname()[0]=="Darwin" :
+ try :
+ from glob import glob
+ for drivepath in glob('/dev/disk[0-9]') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying '+drive+'...')
+ exit_status=read_values(drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list Darwin disks')
+ elif os.uname()[0]=="NetBSD" :
+ try :
+ sysctl_kerndisks=os.popen('sysctl hw.disknames')
+ kerndisks=string.strip(sysctl_kerndisks.readline())
+ for drive in string.split(kerndisks)[2:] :
+ if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
+ try :
+ verboselog('Trying '+drive+'c...')
+ exit_status=read_values(drive+'c')
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append(drive+'c')
+ except :
+ continue
+ except :
+ verboselog('Failed to list NetBSD disks')
+ elif os.uname()[0]=="SunOS" :
+ try :
+ from glob import glob
+ for drivepath in glob('/dev/rdsk/*s2') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying rdsk'+drive+'...')
+ exit_status=read_values('rdsk'+drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append('rdsk'+drive)
+ except :
+ continue
+ for drivepath in glob('/dev/rmt/*') :
+ try :
+ drive=os.path.basename(drivepath)
+ verboselog('Trying rmt'+drive+'...')
+ exit_status=read_values('rmt'+drive)
+ if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
+ drives.append('rmt'+drive)
+ except :
+ continue
+ except :
+ verboselog('Failed to list SunOS disks')
+ return(drives)
+
+### Main part ###
+
+smart_values={}
+emptyoutput=False
+plugin_name=list(os.path.split(sys.argv[0]))[1]
+verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
+
+# Parse arguments
+if len(sys.argv)>1 :
+ if sys.argv[1]=="config" :
+ hard_drive=get_hard_drive_name()
+ print_config(hard_drive)
+ sys.exit(0)
+ elif sys.argv[1]=="autoconf" :
+ if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
+ if not find_smart_drives():
+ print('no (no drives accessible)')
+ else :
+ print('yes')
+ sys.exit(0)
+ else :
+ print('no (smartmontools not found)')
+ sys.exit(0)
+ elif sys.argv[1]=="suggest" :
+ for drive in find_smart_drives() :
+ print(drive)
+ sys.exit(0)
+ elif sys.argv[1]=="version" :
+ print('smart_ Munin plugin, version '+plugin_version)
+ sys.exit(0)
+ elif sys.argv[1]!="" :
+ verboselog('unknown argument "'+sys.argv[1]+'"')
+ sys.exit(1)
+
+# No argument given, doing the real job:
+hard_drive=get_hard_drive_name()
+read_values(hard_drive[0])
+if not emptyoutput: update_state_file(hard_drive)
+print_plugin_values(hard_drive)
+sys.exit(0)
+
+
+### The following is the smart_ plugin documentation, intended to be used with munindoc
+"""
+=head1 NAME
+
+smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl
+
+=head1 APPLICABLE SYSTEMS
+
+Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/)
+installed and in function.
+
+=head1 CONFIGURATION
+
+=head2 Create link in service directory
+
+To monitor a S.M.A.R.T device, create a link in the service directory
+of the munin-node named smart_<device>, which is pointing to this file.
+
+E.g.
+
+ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
+
+...will monitor /dev/hda.
+
+=head2 Grant privileges in munin-node
+
+The plugin must be run under high privileged user B<root>, to get access to the raw device.
+
+So following minimal configuration in plugin-conf.d/munin-node is needed.
+
+=over 2
+
+ [smart_*]
+ user root
+ group disk
+
+=back
+
+=head2 Set Parameter if needed
+
+ smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
+ smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
+ ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
+
+Parameters can be specified on a per-drive basis, eg:
+
+=over 2
+
+ [smart_hda]
+ user root
+ env.smartargs -H -c -l error -l selftest -l selective -d ata
+ env.smartpath /usr/local/sbin/smartctl
+
+=back
+
+In particular, for SATA drives, with older versions of smartctl:
+
+=over 2
+
+ [smart_sda]
+ user root
+ env.smartargs -d ata -a
+
+ [smart_twa0-1]
+ user root
+ env.smartargs -H -l error -d 3ware,1
+ env.ignorestandby True
+
+ [smart_twa0-2]
+ user root
+ env.smartargs -H -l error -d 3ware,2
+
+=back
+
+=head1 INTERPRETATION
+
+If a device supports the B<Self-Monitoring, Analysis
+and Reporting Technology (S.M.A.R.T.)> it offers readable
+access to the attribute table. There you find the B<raw value>,
+a B<normalised value> and a B<threshold> (set by the vendor)
+for each attribute, that is supported by that device.
+
+The meaning and handling of the raw value is a secret of the
+vendors embedded S.M.A.R.T.-Software on the disk. The only
+relevant info from our external view is the B<normalised value>
+in comparison with the B<threshold>. If the attributes value is
+equal or below the threshold, it signals its failure and
+the B<health status> of the device will switch from B<passed> to B<failed>.
+
+This plugin fetches the B<normalised values of all SMART-Attributes>
+and draw a curve for each of them.
+It takes the vendors threshold as critical limit for the munin datafield.
+So you will see an alarm, if the value reaches the vendors threshold.
+
+Looking at the graph: It is a bad sign, if the curve starts
+to curl or to meander. The more horizontal it runs,
+the better. Of course it is normal, that the temperatures
+curve swings a bit. But the others should stay steady on
+their level if everything is ok.
+
+S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age>
+Attributes. An old disk will have more curling curves
+because of degradation, especially for the B<Old-age> Attributes.
+You should then backup more often, run more selftests[1] and prepare
+the disks replacement.
+
+B<Act directly>, if a <Pre-Fail> Attribute goes below threshold.
+Immediately back-up your data and replace your hard disk drive.
+A failure may be imminent..
+
+[1] Consult the smartmontools manpages to learn about
+offline tests and automated selftests with smartd.
+Only with both activated, the values of the SMART-Attributes
+reflect the all over state of the device.
+
+Tutorials and articles about S.M.A.R.T. and smartmontools:
+http://smartmontools.sourceforge.net/doc.html#tutorials
+
+=head1 MAGIC MARKERS
+
+ #%# family=auto
+ #%# capabilities=autoconf suggest
+
+=head1 CALL OPTIONS
+
+B<none>
+
+=over 2
+
+Fetches values if called without arguments:
+
+E.g.: munin-run smart_hda
+
+=back
+
+B<config>
+
+=over 2
+
+Prints plugins configuration.
+
+E.g.: munin-run smart_hda config
+
+=back
+
+B<autoconf>
+
+=over 2
+
+Tries to find smartctl and outputs value 'yes' for success, 'no' if not.
+
+It's used by B<munin-node-configure> to see wether autoconfiguration is possible.
+
+=back
+
+B<suggest>
+
+=over 2
+
+Outputs the list of device names, that it found plugged to the system.
+
+B<munin-node-configure> use this to build the service links for this wildcard-plugin.
+
+=back
+
+=head1 VERSION
+
+Version 2.1
+
+=head1 BUGS
+
+None known
+
+=head1 AUTHOR
+
+(C) 2004-2009 Nicolas Stransky <Nico@stransky.cx>
+
+(C) 2008 Gabriele Pohl <contact@dipohl.de>
+Reformated existent documentation to POD-Style, added section Interpretation to the documentation.
+
+=head1 LICENSE
+
+GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
+
+=cut
+
+
+"""