[csw-devel] SF.net SVN: opencsw:[650] mirror
dmichelsen at users.sourceforge.net
dmichelsen at users.sourceforge.net
Mon Feb 11 11:40:50 CET 2013
Revision: 650
http://opencsw.svn.sourceforge.net/opencsw/?rev=650&view=rev
Author: dmichelsen
Date: 2013-02-11 10:40:49 +0000 (Mon, 11 Feb 2013)
Log Message:
-----------
Add check-zpool monitoring script for nagios
Added Paths:
-----------
mirror/
mirror/check-zpool
Added: mirror/check-zpool
===================================================================
--- mirror/check-zpool (rev 0)
+++ mirror/check-zpool 2013-02-11 10:40:49 UTC (rev 650)
@@ -0,0 +1,229 @@
+#!/opt/csw/bin/perl
+
+use strict;
+use warnings;
+use Time::Duration::Parse;
+use DateTime::Format::DateParse;
+use DateTime::Format::Human::Duration;
+use Nagios::Plugin;
+
+use Data::Dumper;
+
+# Pass:
+# - Name of pool
+
+# Actions:
+# 1. warn if scrub takes longer than x
+# 2. warn if scrub finished longer than x days ago
+# 3. warn if there were correctable errors in the pool
+# 4. warn if status is at least degraded, critical if status is faulted
+
+# -p <pool>
+# -d <scrub duration in hours>
+# -D <days since last scrub>
+
+package ZFS::Pool;
+
+use strict;
+use warnings;
+
+sub new {
+ my ($this, %options) = @_;
+ my $pool = $options{pool};
+
+ if( -f $pool . '.out' ) {
+ open F, $pool . '.out';
+ } else {
+ open F, "LC_ALL=C /usr/sbin/zpool status $pool |";
+ }
+ my $mode;
+ my %status;
+ while( <F> ) {
+ chomp;
+ # scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov 5 21:18:23 2012
+
+ # scan: scrub in progress since Tue Feb 5 16:15:54 2013
+ # 561G scanned out of 1.18T at 6.48M/s, 28h35m to go
+ # 561G scanned out of 1.18T at 6.48M/s, 28h35m to go
+ # 54K repaired, 46.31% done
+
+ if( s/^\s*(\S+):\s+// ) {
+ $mode = $1;
+ $status{$mode} = $_;
+ } else {
+ $status{$mode} .= "\n" . $_;
+ }
+ }
+ close F;
+
+ return bless {
+ pool => $pool,
+ status => \%status,
+ }, $this;
+}
+
+sub scrub_in_progress {
+ my ($this) = @_;
+ # scan: scrub in progress since Tue Feb 5 16:15:54 2013
+ my $scan = $this->{status}->{scan};
+ my $since;
+ if( $scan =~ /scrub in progress since (.*)$/m ) {
+ $since = DateTime::Format::DateParse->parse_datetime( $1 );
+ }
+ return $since;
+}
+
+sub last_scrub {
+ my ($this) = @_;
+ my $last_scrub;
+
+ my $scan = $this->{status}->{scan};
+ if( $scan =~ /none requested/ ) {
+ # scan: none requested
+ $last_scrub = undef;
+ } elsif( $scan =~ /scrub repaired (\S+) in (\S+) with (\d+) errors on (.*)/ ) {
+ # scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov 5 21:18:23 2012
+ $last_scrub = DateTime::Format::DateParse->parse_datetime( $4 );
+ } elsif( $scan ||= $this->scrub_in_progress ) {
+ } else {
+ # Could not parse format
+ }
+
+ return $last_scrub;
+}
+
+sub scrub_repairs {
+ my ($this) = @_;
+ my $scan = $this->{status}->{scan};
+ if( $scan =~ /scrub repaired (\S+) in (\S+) with (\d+) errors on (.*)/ ) {
+ return $1 if( $1 ne '0' );
+ }
+ return 0;
+}
+
+sub state {
+ my ($this) = @_;
+ return $this->{status}->{state};
+}
+
+package main;
+
+my $np = Nagios::Plugin->new(
+ usage => 'Usage: %s [-v|--verbose] [-p|--pool <pool>] [-t|--timeout <timeout>] '
+ . '[--warn-scrub-duration <duration>] [--warn-scrub-age <duration>]',
+ );
+
+$np->add_arg(
+ spec => 'pool|p=s',
+ help => '-p | --pool <pool>[,<pool>]* . The name of the pool to check. Defaults to all pools. ',
+);
+
+$np->add_arg(
+ spec => 'warn-scrub-duration|D=s',
+ help => '--warn-scrub-duration=<hours> . Warn if the scrub takes longer than the specified time. ',
+);
+
+$np->add_arg(
+ spec => 'warn-scrub-age|A=s',
+ help => '--warn-scrub-age=<days> . Warn if the last scrub finished more than the specified days ago. ',
+);
+
+# Parse @ARGV and process standard arguments (e.g. usage, help, version)
+$np->getopts;
+
+my @pools;
+
+if( $np->opts->pool ) {
+ @pools = split( /,/, $np->opts->pool );
+} else {
+ chomp( @pools = `zpool list -Ho name` );
+}
+
+my $sdt = $np->opts->get('warn-scrub-duration');
+my $sd;
+if( $sdt ) {
+ $sd = parse_duration( $sdt );
+}
+
+my $sat = $np->opts->get('warn-scrub-age');
+my $sa;
+if( $sat ) {
+ $sa = parse_duration( $sat );
+}
+
+my $fmt = DateTime::Format::Human::Duration->new();
+
+foreach my $poolname (@pools) {
+ my $p = ZFS::Pool->new( pool => $poolname );
+
+ # Warn if scrub takes longer then -D <duration>
+ my $since = $p->scrub_in_progress;
+ if( $sd && $since ) {
+ my $should_have_finished = $since->clone()->add( seconds => $sd );
+ if( DateTime->compare( DateTime->now, $should_have_finished ) > 0 ) {
+ $np->nagios_exit( WARNING, "Scrub is running for the pool '$poolname' for "
+ . $fmt->format_duration( DateTime->now->subtract_datetime( $since ), significant_units => 2 ) . ' which is longer than ' . $sdt
+ );
+ }
+ }
+
+ # Warn if last scrub finished more -A <duration> ago
+ my $last = $p->last_scrub;
+ if( $sa && $last ) {
+ my $should_have_scrubbed = $last->clone()->add( seconds => $sa );
+ if( $sa && $last && DateTime->compare( DateTime->now, $should_have_scrubbed ) > 0 ) {
+ $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' was run "
+ . $fmt->format_duration( DateTime->now->subtract_datetime( $last ), significant_units => 2 ) . ' ago which is older than ' . $sat
+ );
+ }
+ }
+
+ # 3. warn if there were correctable errors in the pool
+ my $repairs = $p->scrub_repairs;
+ if( $repairs ) {
+ $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' repaired $repairs" );
+ }
+
+ # 4. warn if status is at least degraded, critical if faulted
+ my $state = $p->state;
+ if( $state eq 'ONLINE' ) {
+ # ok
+ } elsif( $state eq 'FAULTED' ) {
+ $np->nagios_exit( CRITICAL, "State of the pool '$poolname' is FAULTED" );
+ } elsif( $state eq 'DEGRADED' ) {
+ $np->nagios_exit( WARNING, "State of the pool '$poolname' is DEGRADED" );
+ } else {
+ # State is UNKNOWN or... who knows?
+ $np->nagios_exit( WARNING, "State of the pool '$poolname' is $state" );
+ }
+
+# print "Last: ", $last || 'undef', ' ', ($last ? DateTime->now->delta_days( $last)->in_units( 'days' ) : ''), "\n";
+# print "State: ", $p->state, "\n";
+}
+
+
+__END__
+# scan: scrub in progress since Tue Feb 5 16:15:54 2013
+my $scrub = $status{'scan'};
+if( $scrub =~ /scrub in progress since (.*)$/m ) {
+ # scan: scrub in progress since Tue Feb 5 16:15:54 2013
+ # 58.5K repaired, 46.54% done
+ my $since = DateTime::Format::DateParse->parse_datetime( $1 );
+ my $howlong = DateTime->now - $since;
+ my ( $days, $hours, $minutes ) = $howlong->in_units('days','hours', 'minutes');
+ $hours += $days * 24;
+ my ($done) = ($scrub =~ / (\S+)% done/);
+ print "Scrub since ", sprintf( "%d:%02d", ${hours}, ${minutes} ), " hours, $done % done\n";
+} elsif( $scrub =~ /none requested/ ) {
+ # scan: none requested
+ print "No scrub\n";
+} elsif( $scrub =~ /scrub repaired (\d+) in (\S+) with (\d+) errors on (.*)/ ) {
+ # scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov 5 21:18:23 2012
+ my ($repairs, $scrubtime, $errors, $finished) = ($1, $2, $3, $4);
+ my $lastfinished = DateTime::Format::DateParse->parse_datetime( $finished );
+ my $days = DateTime->now->delta_days( $lastfinished );
+ print "Last scrub was ", $days->in_units( 'days' ), " days ago\n";
+} else {
+ print "Could not parse scan status:\n";
+ print $scrub;
+}
Property changes on: mirror/check-zpool
___________________________________________________________________
Added: svn:executable
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list