[csw-devel] SF.net SVN: opencsw:[647] buildfarm/bin/rest-interface/getcatalog

dmichelsen at users.sourceforge.net dmichelsen at users.sourceforge.net
Sun Jan 27 13:32:44 CET 2013


Revision: 647
          http://opencsw.svn.sourceforge.net/opencsw/?rev=647&view=rev
Author:   dmichelsen
Date:     2013-01-27 12:32:44 +0000 (Sun, 27 Jan 2013)
Log Message:
-----------
Dependency graph: Enhance robustness, make single threaded again, the code is already too complex for the task

Modified Paths:
--------------
    buildfarm/bin/rest-interface/getcatalog

Modified: buildfarm/bin/rest-interface/getcatalog
===================================================================
--- buildfarm/bin/rest-interface/getcatalog	2012-12-29 13:05:03 UTC (rev 646)
+++ buildfarm/bin/rest-interface/getcatalog	2013-01-27 12:32:44 UTC (rev 647)
@@ -1,5 +1,12 @@
 #!/opt/csw/bin/perl
 
+# This script generates an enhanced catalog which also contains the bundle for each package.
+# It does so by quering the REST interface for the current Solaris 10 Sparc Unstable catalog
+# and following up on the md5s of all packages to query for the package stats including the
+# bundle.
+# As the whole querying is really slow the package stats in JSON are cashed in a simple
+# DB_File tied hash which maps md5s to the JSON stats as string.
+
 use strict;
 use warnings;
 
@@ -7,7 +14,6 @@
 use JSON;
 use DB_File;
 use Data::Dumper;
-use Parallel::ForkManager;
 use LWP::UserAgent;
 
 my $start = time;
@@ -19,23 +25,30 @@
 
 my $db = tie %md5cache, "DB_File", "pkgcache.dbfile";
 
-print "Grabbing catalog...\n";
+local $| = 1;
+
+my $ctime = time;
+
+# Phase 1: Get the md5s for all packages in the catalog
+# --------
+
+print "Grabbing catalog";
 my $response = $ua->get( 'http://buildfarm.opencsw.org/pkgdb/rest/catalogs/unstable/sparc/SunOS5.10/?quick=true' );
+print " (", time - $ctime, " seconds)";
 my $json_string;
 if ($response->is_success) {
-print "Decoding JSON...\n";
   $json_string = $response->decoded_content;  # or whatever
 } else {
-  print Dumper( $response );
-  die $response->status_line;
+  die " failed with " . $response->status_line;
 }
 
-#my $url = 'http://buildfarm.opencsw.org/pkgdb/rest/catalogs/unstable/sparc/SunOS5.10/';
-#my $json_string = get $url;
-# die "Couldn't get $url" unless defined $json_string;
+print " -> JSON";
+$ctime = time;
 my $catalog_data = decode_json $json_string;
+print " (", time - $ctime, " seconds)";
+print "\n";
 
-# print Dumper($catalog_data);
+# The retreived catalog data looks like this:
 
 #$VAR1 = {
 #          'catalogname' => 'dovecot_dev',
@@ -58,71 +71,86 @@
 #          'rev' => '2012.01.20'
 #        };
 
+# Phase 2: Iterate over all md5s in the catalog and get the additional information from the JSON data
+# --------
 
-my $pm = new Parallel::ForkManager(10, '/home/web/bin/rest-interface/tmp' );
+open( C, ">catalog.new" );
 
-# data structure retrieval and handling
-$pm -> run_on_finish ( # called BEFORE the first call to start()
-  sub {
-    my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
-
-    # retrieve data structure from child
-    if (defined($data_structure_reference)) {
-      my ($md5, $stats) = @{$data_structure_reference};
-      $md5cache{$md5} = $stats;
-      $db->sync;
-    } else {
-      print qq|No message received from child process $pid!\n|;
-    }
-  }
-);
-
 my ($having, $retrieving) = (0, 0);
+my $json_stats;
 
 # Retreive the missing data
 foreach my $pkg (@$catalog_data) {
   my $md5 = $pkg->{md5_sum};
 
+  print $md5;
+
   my $url = "http://buildfarm.opencsw.org/pkgdb/rest/srv4/${md5}/pkg-stats/";
 
+  # See if we have the data in the cache
+  my $getit = 1;
   if( exists $md5cache{$md5} ) {
-    $having++;
-    print "Having $md5\n";
-  } else {
+    $getit = 0;
+    my $stats = $md5cache{$md5};
+    eval {
+      $json_stats = decode_json( $stats );
+    };
+    # There have been times where cached data was invalid.
+    # Make sure we can decode it or retreive it again.
+    if( $@ ) {
+      print " -> Cache invalid ($@)";
+      delete $md5cache{$md5};
+      $db->sync;
+      $getit = 1;
+    } else {
+      print " -> Cached";
+      $having++;
+    }
+  }
+
+  # The data is not in the cache, retreive it via REST
+  if( $getit ) {
     $retrieving++;
-    $pm->start() and next;
-
-    print "Retreiving $md5\n";
-    my $stats = get $url;
-    print "Done $md5\n";
-    $pm->finish( 0, [ $md5, $stats ] );
+    print " -> Retreiving";
+    my $rtime = time;
+    my $response = $ua->get( $url );
+    if ($response->is_success) {
+      my $stats = $response->decoded_content;
+      print " (", time - $rtime, " seconds)";
+      print " -> JSON";
+      $rtime = time;
+      eval {
+        $json_stats = decode_json( $stats );
+      };
+      if( $@ ) {
+        die "failed ($@ after ", time - $rtime, " seconds)";
+      }
+      $md5cache{$md5} = $stats;
+      $db->sync;
+    } else {
+      die " -> Failed for $url with ", $response->status_line;
+    }
   }
-}
 
-$pm->wait_all_children;
+  # The data is now available in the Perl struct $json_stats.
+  # If there has been a fatal error the error is not available the script has died before.
 
-open( C, ">catalog.new" );
-
-foreach my $pkg (@$catalog_data) {
-  my $md5 = $pkg->{md5_sum};
-  my $stats = $md5cache{$md5};
-  my $json_stats = decode_json( $stats );
-
   my @depends = map { $_->[0] } @{$json_stats->{depends}};
   my @i_depends = @{$json_stats->{i_depends}};
 
-
-#  print C join( ' ', $pkg->{catalogname}, $pkg->{version}, $pkg->{pkgname}, $pkg->{file_basename}, $pkg->{md5_sum}, $pkg->{size}, 
   print C join( ' ', $pkg->{catalogname}, $pkg->{version}, $json_stats->{pkginfo}->{PKG}, $pkg->{file_basename}, $pkg->{md5_sum}, $pkg->{size}, 
     join( '|', @depends ) || 'none', join( '|', @i_depends ) || 'none', 'none', $json_stats->{pkginfo}->{OPENCSW_BUNDLE} || 'none' ), "\n";
+  print "\n";
 }
 
 close C;
 
+# Make the new enhanced catalog the current one
 unlink( "catalog" );
 link( "catalog.new", "catalog" );
 unlink( "catalog.new" );
 
+# Send result as pssive check to Nagios
 my $runtime = time - $start;
 
 open S, "| /opt/csw/nagios/bin/send_nsca -H 192.168.1.50 -d ';' -c /etc/opt/csw/nagios/send_nsca.cfg";

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list