[csw-devel] SF.net SVN: opencsw:[647] buildfarm/bin/rest-interface/getcatalog
dmichelsen at users.sourceforge.net
dmichelsen at users.sourceforge.net
Sun Jan 27 13:32:44 CET 2013
Revision: 647
http://opencsw.svn.sourceforge.net/opencsw/?rev=647&view=rev
Author: dmichelsen
Date: 2013-01-27 12:32:44 +0000 (Sun, 27 Jan 2013)
Log Message:
-----------
Dependency graph: Enhance robustness, make single threaded again, the code is already too complex for the task
Modified Paths:
--------------
buildfarm/bin/rest-interface/getcatalog
Modified: buildfarm/bin/rest-interface/getcatalog
===================================================================
--- buildfarm/bin/rest-interface/getcatalog 2012-12-29 13:05:03 UTC (rev 646)
+++ buildfarm/bin/rest-interface/getcatalog 2013-01-27 12:32:44 UTC (rev 647)
@@ -1,5 +1,12 @@
#!/opt/csw/bin/perl
+# This script generates an enhanced catalog which also contains the bundle for each package.
+# It does so by quering the REST interface for the current Solaris 10 Sparc Unstable catalog
+# and following up on the md5s of all packages to query for the package stats including the
+# bundle.
+# As the whole querying is really slow the package stats in JSON are cashed in a simple
+# DB_File tied hash which maps md5s to the JSON stats as string.
+
use strict;
use warnings;
@@ -7,7 +14,6 @@
use JSON;
use DB_File;
use Data::Dumper;
-use Parallel::ForkManager;
use LWP::UserAgent;
my $start = time;
@@ -19,23 +25,30 @@
my $db = tie %md5cache, "DB_File", "pkgcache.dbfile";
-print "Grabbing catalog...\n";
+local $| = 1;
+
+my $ctime = time;
+
+# Phase 1: Get the md5s for all packages in the catalog
+# --------
+
+print "Grabbing catalog";
my $response = $ua->get( 'http://buildfarm.opencsw.org/pkgdb/rest/catalogs/unstable/sparc/SunOS5.10/?quick=true' );
+print " (", time - $ctime, " seconds)";
my $json_string;
if ($response->is_success) {
-print "Decoding JSON...\n";
$json_string = $response->decoded_content; # or whatever
} else {
- print Dumper( $response );
- die $response->status_line;
+ die " failed with " . $response->status_line;
}
-#my $url = 'http://buildfarm.opencsw.org/pkgdb/rest/catalogs/unstable/sparc/SunOS5.10/';
-#my $json_string = get $url;
-# die "Couldn't get $url" unless defined $json_string;
+print " -> JSON";
+$ctime = time;
my $catalog_data = decode_json $json_string;
+print " (", time - $ctime, " seconds)";
+print "\n";
-# print Dumper($catalog_data);
+# The retreived catalog data looks like this:
#$VAR1 = {
# 'catalogname' => 'dovecot_dev',
@@ -58,71 +71,86 @@
# 'rev' => '2012.01.20'
# };
+# Phase 2: Iterate over all md5s in the catalog and get the additional information from the JSON data
+# --------
-my $pm = new Parallel::ForkManager(10, '/home/web/bin/rest-interface/tmp' );
+open( C, ">catalog.new" );
-# data structure retrieval and handling
-$pm -> run_on_finish ( # called BEFORE the first call to start()
- sub {
- my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
-
- # retrieve data structure from child
- if (defined($data_structure_reference)) {
- my ($md5, $stats) = @{$data_structure_reference};
- $md5cache{$md5} = $stats;
- $db->sync;
- } else {
- print qq|No message received from child process $pid!\n|;
- }
- }
-);
-
my ($having, $retrieving) = (0, 0);
+my $json_stats;
# Retreive the missing data
foreach my $pkg (@$catalog_data) {
my $md5 = $pkg->{md5_sum};
+ print $md5;
+
my $url = "http://buildfarm.opencsw.org/pkgdb/rest/srv4/${md5}/pkg-stats/";
+ # See if we have the data in the cache
+ my $getit = 1;
if( exists $md5cache{$md5} ) {
- $having++;
- print "Having $md5\n";
- } else {
+ $getit = 0;
+ my $stats = $md5cache{$md5};
+ eval {
+ $json_stats = decode_json( $stats );
+ };
+ # There have been times where cached data was invalid.
+ # Make sure we can decode it or retreive it again.
+ if( $@ ) {
+ print " -> Cache invalid ($@)";
+ delete $md5cache{$md5};
+ $db->sync;
+ $getit = 1;
+ } else {
+ print " -> Cached";
+ $having++;
+ }
+ }
+
+ # The data is not in the cache, retreive it via REST
+ if( $getit ) {
$retrieving++;
- $pm->start() and next;
-
- print "Retreiving $md5\n";
- my $stats = get $url;
- print "Done $md5\n";
- $pm->finish( 0, [ $md5, $stats ] );
+ print " -> Retreiving";
+ my $rtime = time;
+ my $response = $ua->get( $url );
+ if ($response->is_success) {
+ my $stats = $response->decoded_content;
+ print " (", time - $rtime, " seconds)";
+ print " -> JSON";
+ $rtime = time;
+ eval {
+ $json_stats = decode_json( $stats );
+ };
+ if( $@ ) {
+ die "failed ($@ after ", time - $rtime, " seconds)";
+ }
+ $md5cache{$md5} = $stats;
+ $db->sync;
+ } else {
+ die " -> Failed for $url with ", $response->status_line;
+ }
}
-}
-$pm->wait_all_children;
+ # The data is now available in the Perl struct $json_stats.
+ # If there has been a fatal error the error is not available the script has died before.
-open( C, ">catalog.new" );
-
-foreach my $pkg (@$catalog_data) {
- my $md5 = $pkg->{md5_sum};
- my $stats = $md5cache{$md5};
- my $json_stats = decode_json( $stats );
-
my @depends = map { $_->[0] } @{$json_stats->{depends}};
my @i_depends = @{$json_stats->{i_depends}};
-
-# print C join( ' ', $pkg->{catalogname}, $pkg->{version}, $pkg->{pkgname}, $pkg->{file_basename}, $pkg->{md5_sum}, $pkg->{size},
print C join( ' ', $pkg->{catalogname}, $pkg->{version}, $json_stats->{pkginfo}->{PKG}, $pkg->{file_basename}, $pkg->{md5_sum}, $pkg->{size},
join( '|', @depends ) || 'none', join( '|', @i_depends ) || 'none', 'none', $json_stats->{pkginfo}->{OPENCSW_BUNDLE} || 'none' ), "\n";
+ print "\n";
}
close C;
+# Make the new enhanced catalog the current one
unlink( "catalog" );
link( "catalog.new", "catalog" );
unlink( "catalog.new" );
+# Send result as pssive check to Nagios
my $runtime = time - $start;
open S, "| /opt/csw/nagios/bin/send_nsca -H 192.168.1.50 -d ';' -c /etc/opt/csw/nagios/send_nsca.cfg";
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list