[csw-devel] SF.net SVN: opencsw:[658] mirror/check-zpool
dmichelsen at users.sourceforge.net
dmichelsen at users.sourceforge.net
Wed Feb 20 16:17:29 CET 2013
Revision: 658
http://opencsw.svn.sourceforge.net/opencsw/?rev=658&view=rev
Author: dmichelsen
Date: 2013-02-20 15:17:27 +0000 (Wed, 20 Feb 2013)
Log Message:
-----------
check-zpool: Collect messages, add performance data
Modified Paths:
--------------
mirror/check-zpool
Modified: mirror/check-zpool
===================================================================
--- mirror/check-zpool 2013-02-19 14:57:26 UTC (rev 657)
+++ mirror/check-zpool 2013-02-20 15:17:27 UTC (rev 658)
@@ -63,7 +63,7 @@
}, $this;
}
-sub scrub_in_progress {
+sub scrub_since {
my ($this) = @_;
my $scan = $this->{status}->{scan};
@@ -76,6 +76,33 @@
return $since;
}
+sub scrub_speed {
+ my ($this) = @_;
+ my $scan = $this->{status}->{scan};
+
+ # 993G scanned out of 1.20T at 9.65M/s, 7h3m to go
+ my $speed;
+ if( $scan =~ /scanned out of \S+ at (\d+(?:\.\d+)?)([^\/]*)\/s, /m ) {
+ $speed = $1;
+ $speed *= 1024 if( $2 eq 'K' );
+ $speed *= 1024 * 1024 if( $2 eq 'M' );
+ $speed *= 1024 * 1024 * 1024 if( $2 eq 'G' );
+ }
+ return int( $speed );
+}
+
+sub scrub_done {
+ my ($this) = @_;
+ my $scan = $this->{status}->{scan};
+
+ # 0 repaired, 80.58% done
+ my $done;
+ if( $scan =~ /, (\d+(?:\.\d+)?)\% done/m ) {
+ $done = $1;
+ }
+ return $done;
+}
+
sub last_scrub {
my ($this) = @_;
my $last_scrub;
@@ -87,7 +114,7 @@
} elsif( $scan =~ /scrub repaired (\S+) in (\S+) with (\d+) errors on (.*)/ ) {
# scan: scrub repaired 0 in 5h21m with 0 errors on Mon Nov 5 21:18:23 2012
$last_scrub = DateTime::Format::DateParse->parse_datetime( $4 );
- } elsif( $scan ||= $this->scrub_in_progress ) {
+ } elsif( $scan ||= $this->scrub_since ) {
} else {
# Could not parse format
}
@@ -158,50 +185,63 @@
my @status;
+my @levels = (qw(CRITICAL WARNING UNKNOWN OK));
+
+my %exit = map { $_ => [] } @levels;
+
foreach my $poolname (@pools) {
my $p = ZFS::Pool->new( pool => $poolname );
- # Warn if scrub takes longer then -D <duration>
- my $since = $p->scrub_in_progress;
+ # Scrub is in progress
+ my $since = $p->scrub_since;
my $scrub_duration;
if( $since ) {
+ my $scrub_speed = $p->scrub_speed;
+ if( $scrub_speed ) {
+ $np->add_perfdata( label => "${poolname}_scrub_speed", value => $scrub_speed, uom => "B" );
+ }
+
+ my $scrub_done = $p->scrub_done;
+ if( $scrub_done ) {
+ $np->add_perfdata( label => "${poolname}_scrub_done", value => $scrub_done, uom => "%" );
+ }
+
$scrub_duration = $fmt->format_duration( DateTime->now->subtract_datetime( $since ), significant_units => 2 );
if( $sd ) {
+ # Warn if scrub takes longer then -D <duration>
my $should_have_finished = $since->clone()->add_duration( $sd );
if( DateTime->compare( DateTime->now, $should_have_finished ) > 0 ) {
- $np->nagios_exit( WARNING, "Scrub is running for the pool '$poolname' for " . $scrub_duration
- . ' which is longer than ' . $fmt->format_duration( $sd, significant_units => 2 )
- );
+ push @{$exit{WARNING}}, "Scrub is running for the pool '$poolname' for " . $scrub_duration
+ . ' which is longer than ' . $fmt->format_duration( $sd, significant_units => 2 );
}
}
- push @status, "pool '$poolname' is scrubbing for " . $scrub_duration;
+ push @{$exit{OK}}, "Pool '$poolname' is scrubbing for " . $scrub_duration;
}
- # Warn if last scrub finished more -A <duration> ago
+ # Scrub has been run in the past
my $last = $p->last_scrub;
if( $last ) {
my $no_scrub_since = $fmt->format_duration( DateTime->now->subtract_datetime( $last ), significant_units => 2 );
if( $sa ) {
+ # Warn if last scrub finished more -A <duration> ago
my $should_have_scrubbed = $last->clone()->add_duration( $sa );
if( $sa && $last && DateTime->compare( DateTime->now, $should_have_scrubbed ) > 0 ) {
- $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' was run " . $no_scrub_since
- . ' ago which is older than ' . $fmt->format_duration( $sa, significant_units => 2 )
- );
+ push @{$exit{WARNING}}, "Scrub for the pool '$poolname' was run " . $no_scrub_since
+ . ' ago which is older than ' . $fmt->format_duration( $sa, significant_units => 2 );
}
}
- push @status, "pool '$poolname' was scrubbed $no_scrub_since ago";
+ push @{$exit{OK}}, "Pool '$poolname' was scrubbed $no_scrub_since ago";
} else {
if( !$since ) {
- $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' was never run"
- . ' but must not be older than ' . $fmt->format_duration( $sa, significant_units => 2 )
- );
+ push @{$exit{WARNING}}, "Scrub for the pool '$poolname' was never run"
+ . ' but must not be older than ' . $fmt->format_duration( $sa, significant_units => 2 );
}
}
# 3. warn if there were correctable errors in the pool
my $repairs = $p->scrub_repairs;
if( $repairs ) {
- $np->nagios_exit( WARNING, "Scrub for the pool '$poolname' repaired $repairs" );
+ push @{$exit{WARNING}}, "Scrub for the pool '$poolname' repaired $repairs";
}
# 4. warn if status is at least degraded, critical if faulted
@@ -209,14 +249,19 @@
if( $state eq 'ONLINE' ) {
# Ok
} elsif( $state eq 'FAULTED' ) {
- $np->nagios_exit( CRITICAL, "State of the pool '$poolname' is FAULTED" );
+ push @{$exit{CRITICAL}}, "State of the pool '$poolname' is FAULTED";
} elsif( $state eq 'DEGRADED' ) {
- $np->nagios_exit( WARNING, "State of the pool '$poolname' is DEGRADED" );
+ push @{$exit{WARNING}}, "State of the pool '$poolname' is DEGRADED";
} else {
# State is UNKNOWN or... who knows?
- $np->nagios_exit( WARNING, "State of the pool '$poolname' is $state" );
+ push @{$exit{UNKNOWN}}, "State of the pool '$poolname' is $state";
}
}
-$np->nagios_exit( OK, 'Pool status ' . join( ', ', @status ) );
+# Report higher levels of problems first
+foreach my $l (@levels) {
+ if( @{$exit{$l}} ) {
+ $np->nagios_exit( $l, join( ', ', @{$exit{$l}} ) );
+ }
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list