[csw-devel] SF.net SVN: opencsw:[650] mirror

dmichelsen at users.sourceforge.net dmichelsen at users.sourceforge.net
Mon Feb 11 11:40:50 CET 2013


Revision: 650
          http://opencsw.svn.sourceforge.net/opencsw/?rev=650&view=rev
Author:   dmichelsen
Date:     2013-02-11 10:40:49 +0000 (Mon, 11 Feb 2013)
Log Message:
-----------
Add check-zpool monitoring script for nagios

Added Paths:
-----------
    mirror/
    mirror/check-zpool

Added: mirror/check-zpool
===================================================================
--- mirror/check-zpool	                        (rev 0)
+++ mirror/check-zpool	2013-02-11 10:40:49 UTC (rev 650)
@@ -0,0 +1,229 @@
+#!/opt/csw/bin/perl
+
+use strict;
+use warnings;
+use Time::Duration::Parse;
+use DateTime::Format::DateParse;
+use DateTime::Format::Human::Duration;
+use Nagios::Plugin;
+
+use Data::Dumper;
+
+# Pass:
+# - Name of pool
+
+# Actions:
+# 1. warn if scrub takes longer than x
+# 2. warn if scrub finished longer than x days ago
+# 3. warn if there were correctable errors in the pool
+# 4. warn if status is at least degraded, critical if status is faulted
+
+# -p <pool>
+# -d <scrub duration in hours>
+# -D <days since last scrub>
+
+package ZFS::Pool;
+
+use strict;
+use warnings;
+
+sub new {
+  my ($this, %options) = @_;
+  my $pool = $options{pool};
+
+  if( -f $pool . '.out' ) {
+    open F, $pool . '.out';
+  } else {
+    open F, "LC_ALL=C /usr/sbin/zpool status $pool |";
+  }
+  my $mode;
+  my %status;
+  while( <F> ) {
+    chomp;
+    #  scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov  5 21:18:23 2012
+
+    #  scan: scrub in progress since Tue Feb  5 16:15:54 2013
+    #    561G scanned out of 1.18T at 6.48M/s, 28h35m to go
+    #    561G scanned out of 1.18T at 6.48M/s, 28h35m to go
+    #    54K repaired, 46.31% done
+  
+    if( s/^\s*(\S+):\s+// ) {
+      $mode = $1;
+      $status{$mode} = $_;
+    } else {
+      $status{$mode} .= "\n" . $_;
+    }
+  }
+  close F;
+
+  return bless {
+    pool => $pool,
+    status => \%status,
+  }, $this;
+}
+
+sub scrub_in_progress {
+  my ($this) = @_;
+  # scan: scrub in progress since Tue Feb  5 16:15:54 2013
+  my $scan = $this->{status}->{scan};
+  my $since;
+  if( $scan =~ /scrub in progress since (.*)$/m ) {
+    $since = DateTime::Format::DateParse->parse_datetime( $1 );
+  }
+  return $since;
+}
+
+sub last_scrub {
+  my ($this) = @_;
+  my $last_scrub;
+
+  my $scan = $this->{status}->{scan};
+  if( $scan =~ /none requested/ ) {
+    # scan: none requested
+    $last_scrub = undef;
+  } elsif( $scan =~ /scrub repaired (\S+) in (\S+) with (\d+) errors on (.*)/ ) {
+    # scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov  5 21:18:23 2012
+    $last_scrub = DateTime::Format::DateParse->parse_datetime( $4 );
+  } elsif( $scan ||= $this->scrub_in_progress ) {
+  } else {
+    # Could not parse format
+  }
+
+  return $last_scrub;
+}
+
+sub scrub_repairs {
+  my ($this) = @_;
+  my $scan = $this->{status}->{scan};
+  if( $scan =~ /scrub repaired (\S+) in (\S+) with (\d+) errors on (.*)/ ) {
+    return $1 if( $1 ne '0' );
+  }
+  return 0;
+}
+
+sub state {
+  my ($this) = @_;
+  return $this->{status}->{state};
+}
+
+package main;
+
+my $np = Nagios::Plugin->new(  
+  usage => 'Usage: %s [-v|--verbose]  [-p|--pool <pool>] [-t|--timeout <timeout>] '
+       . '[--warn-scrub-duration <duration>] [--warn-scrub-age <duration>]',
+   );
+
+$np->add_arg(
+  spec => 'pool|p=s',
+  help => '-p | --pool <pool>[,<pool>]* .  The name of the pool to check. Defaults to all pools. ',
+);
+
+$np->add_arg(
+  spec => 'warn-scrub-duration|D=s',
+  help => '--warn-scrub-duration=<hours> .  Warn if the scrub takes longer than the specified time. ',
+);
+
+$np->add_arg(
+  spec => 'warn-scrub-age|A=s',
+  help => '--warn-scrub-age=<days> .  Warn if the last scrub finished more than the specified days ago. ',
+);
+
+# Parse @ARGV and process standard arguments (e.g. usage, help, version)
+$np->getopts;
+
+my @pools;
+
+if( $np->opts->pool ) {
+  @pools = split( /,/, $np->opts->pool );
+} else {
+  chomp( @pools = `zpool list -Ho name` );
+} 
+
+my $sdt = $np->opts->get('warn-scrub-duration');
+my $sd;
+if( $sdt ) {
+  $sd = parse_duration( $sdt );
+}
+
+my $sat = $np->opts->get('warn-scrub-age');
+my $sa;
+if( $sat ) {
+  $sa = parse_duration( $sat );
+}
+
+my $fmt = DateTime::Format::Human::Duration->new();
+
+foreach my $poolname (@pools) {
+  my $p = ZFS::Pool->new( pool => $poolname );
+
+  # Warn if scrub takes longer then -D <duration>
+  my $since = $p->scrub_in_progress;
+  if( $sd && $since ) {
+    my $should_have_finished = $since->clone()->add( seconds => $sd );
+    if( DateTime->compare( DateTime->now, $should_have_finished ) > 0 ) {
+      $np->nagios_exit( WARNING, "Scrub is running for the pool '$poolname' for "
+        . $fmt->format_duration( DateTime->now->subtract_datetime( $since ), significant_units => 2 ) . ' which is longer than ' . $sdt
+      );
+    }
+  }
+
+  # Warn if last scrub finished more -A <duration> ago
+  my $last = $p->last_scrub;
+  if( $sa && $last ) {
+    my $should_have_scrubbed = $last->clone()->add( seconds => $sa );
+    if( $sa && $last && DateTime->compare( DateTime->now, $should_have_scrubbed ) > 0 ) {
+      $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' was run "
+        . $fmt->format_duration( DateTime->now->subtract_datetime( $last ), significant_units => 2 ) . ' ago which is older than ' . $sat
+      );
+    }
+  }
+
+  # 3. warn if there were correctable errors in the pool
+  my $repairs = $p->scrub_repairs;
+  if( $repairs ) {
+    $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' repaired $repairs" );
+  }
+
+  # 4. warn if status is at least degraded, critical if faulted
+  my $state = $p->state;
+  if( $state eq 'ONLINE' ) {
+    # ok
+  } elsif( $state eq 'FAULTED' ) {
+    $np->nagios_exit( CRITICAL, "State of the pool '$poolname' is FAULTED" );
+  } elsif( $state eq 'DEGRADED' ) {
+    $np->nagios_exit( WARNING, "State of the pool '$poolname' is DEGRADED" );
+  } else {
+    # State is UNKNOWN or... who knows?
+    $np->nagios_exit( WARNING, "State of the pool '$poolname' is $state" );
+  }
+
+#  print "Last: ", $last || 'undef', ' ', ($last ? DateTime->now->delta_days( $last)->in_units( 'days' ) : ''), "\n";
+#  print "State: ", $p->state, "\n";
+}
+
+
+__END__
+# scan: scrub in progress since Tue Feb  5 16:15:54 2013
+my $scrub = $status{'scan'};
+if( $scrub =~ /scrub in progress since (.*)$/m ) {
+  #  scan: scrub in progress since Tue Feb  5 16:15:54 2013
+  #     58.5K repaired, 46.54% done
+  my $since = DateTime::Format::DateParse->parse_datetime( $1 );
+  my $howlong = DateTime->now - $since;
+  my ( $days, $hours, $minutes ) = $howlong->in_units('days','hours', 'minutes');
+  $hours += $days * 24;
+  my ($done) = ($scrub =~ / (\S+)% done/);
+  print "Scrub since ", sprintf( "%d:%02d", ${hours}, ${minutes} ), " hours, $done % done\n";
+} elsif( $scrub =~ /none requested/ ) {
+  # scan: none requested
+  print "No scrub\n";
+} elsif( $scrub =~ /scrub repaired (\d+) in (\S+) with (\d+) errors on (.*)/ ) {
+  # scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov  5 21:18:23 2012
+  my ($repairs, $scrubtime, $errors, $finished) = ($1, $2, $3, $4);
+  my $lastfinished = DateTime::Format::DateParse->parse_datetime( $finished );
+  my $days = DateTime->now->delta_days( $lastfinished );
+  print "Last scrub was ", $days->in_units( 'days' ), " days ago\n";
+} else {
+  print "Could not parse scan status:\n";
+  print $scrub;
+}


Property changes on: mirror/check-zpool
___________________________________________________________________
Added: svn:executable
   + *

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list