[csw-devel] SF.net SVN: gar:[17664] csw/mgar/gar/v2/lib

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sat Apr 14 13:32:23 CEST 2012


Revision: 17664
          http://gar.svn.sourceforge.net/gar/?rev=17664&view=rev
Author:   wahwah
Date:     2012-04-14 11:32:23 +0000 (Sat, 14 Apr 2012)
Log Message:
-----------
pkgdb: Use cjson, it's 10 times faster

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/integrate_catalogs.py
    csw/mgar/gar/v2/lib/python/rest.py
    csw/mgar/gar/v2/lib/web/pkgdb_web.py

Modified: csw/mgar/gar/v2/lib/python/integrate_catalogs.py
===================================================================
--- csw/mgar/gar/v2/lib/python/integrate_catalogs.py	2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/python/integrate_catalogs.py	2012-04-14 11:32:23 UTC (rev 17664)
@@ -11,6 +11,7 @@
 """
 
 from Cheetah import Template
+import cjson
 import json
 import catalog
 import common_constants
@@ -189,12 +190,51 @@
   if options.from_json:
     with open(options.from_json, "rb") as fd:
       logging.info("Loading %s", options.from_json)
-      diffs_by_catalogname = json.load(fd)
+      bundles_by_md5, jsonable_catalogs, diffs_by_catalogname = cjson.decode(fd.read())
+      catalogs = dict((tuple(cjson.decode(x)), jsonable_catalogs[x]) for x in jsonable_catalogs)
   else:
-    diffs_by_catalogname = GetDiffsByCatalogname(
+    catalogs, diffs_by_catalogname = GetDiffsByCatalogname(
         catrel_from, catrel_to, options.include_downgrades,
         options.include_version_changes)
+    bundles_by_md5 = {}
+    bundles_missing = set()
+    cp = CachedPkgstats("pkgstats.db")
+    for key in catalogs:
+      for pkg in catalogs[key]:
+        # logging.debug("%r", pkg)
+        md5 = pkg["md5_sum"]
+        if md5 not in bundles_by_md5 and md5 not in bundles_missing:
+          stats = cp.GetPkgstats(md5)
+          bundle_key = "OPENCSW_BUNDLE"
+          # pprint.pprint(stats)
+          if stats:
+            if bundle_key in stats["pkginfo"]:
+              bundles_by_md5[md5] = stats["pkginfo"][bundle_key]
+            else:
+              logging.info(
+                  "%r (%r) does not have the bundle set",
+                  stats["basic_stats"]["pkg_basename"], md5)
+              bundles_missing.add(md5)
+  # Here's a good place to calculate the mapping between catalognames and
+  # bundle names.
+  change_types = "new_pkgs", "removed_pkgs", "updated_pkgs"
+  bundles_by_catalogname = {}
+  for catalogname in diffs_by_catalogname:
+    l = bundles_by_catalogname.setdefault(catalogname, set())
+    for change_type in change_types:
+      if change_type in diffs_by_catalogname[catalogname]:
+        for change_info in diffs_by_catalogname[catalogname][change_type]:
+          pkg = change_info[2]
+          if "to" in pkg:
+            md5s = [x["md5_sum"] for x in (pkg["from"], pkg["to"])]
+          else:
+            md5s = [pkg["md5_sum"]]
+          for md5 in md5s:
+            if md5 in bundles_by_md5:
+              l.add(bundles_by_md5[md5])
   namespace = {
+      "bundles_by_catalogname": bundles_by_catalogname,
+      "bundles_by_md5": bundles_by_md5,
       "diffs_by_catalogname": diffs_by_catalogname,
       "catrel_to": catrel_to,
       "catrel_from": catrel_from,
@@ -202,7 +242,9 @@
   }
   if options.save_json:
     with open(options.save_json, "wb") as fd:
-      json.dump(diffs_by_catalogname, fd)
+      jsonable_catalogs = dict((cjson.encode(x), catalogs[x]) for x in catalogs)
+      fd.write(cjson.encode(
+        (bundles_by_md5, jsonable_catalogs, diffs_by_catalogname)))
   t = Template.Template(CATALOG_MOD_TMPL, searchList=[namespace])
   if options.output_file:
     logging.info("Saving output to %s", options.output_file)

Modified: csw/mgar/gar/v2/lib/python/rest.py
===================================================================
--- csw/mgar/gar/v2/lib/python/rest.py	2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/python/rest.py	2012-04-14 11:32:23 UTC (rev 17664)
@@ -2,7 +2,7 @@
 
 import logging
 import urllib2
-import json
+import cjson
 
 DEFAULT_URL = "http://buildfarm.opencsw.org"
 
@@ -27,7 +27,7 @@
     logging.debug("GetPkgByMd5(): GET %s", url)
     try:
       data = urllib2.urlopen(url).read()
-      return json.loads(data)
+      return cjson.decode(data)
     except urllib2.HTTPError, e:
       logging.warning("%s -- %s", url, e)
       if e.code == 404:
@@ -53,7 +53,7 @@
     logging.debug("GetCatalog(): GET %s", url)
     try:
       data = urllib2.urlopen(url).read()
-      return json.loads(data)
+      return cjson.decode(data)
     except urllib2.HTTPError, e:
       logging.warning("%s -- %s", url, e)
       return None
@@ -67,4 +67,4 @@
     # The server is no longer returning 404 when the package is absent.  If
     # a HTTP error code is returned, we're letting the application fail.
     data = urllib2.urlopen(url).read()
-    return json.loads(data)
+    return cjson.decode(data)

Modified: csw/mgar/gar/v2/lib/web/pkgdb_web.py
===================================================================
--- csw/mgar/gar/v2/lib/web/pkgdb_web.py	2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/web/pkgdb_web.py	2012-04-14 11:32:23 UTC (rev 17664)
@@ -6,11 +6,12 @@
 import os
 sys.path.append(os.path.join(os.path.split(__file__)[0], "..", ".."))
 
-import web
-import sqlobject
+import cjson
 import json
 import logging
 import pprint
+import sqlobject
+import web
 
 from lib.python import models
 from lib.python import configuration
@@ -56,6 +57,7 @@
 render = web.template.render(templatedir)
 
 
+# TODO(maciej): Convert this extension to cjson.
 class PkgStatsEncoder(json.JSONEncoder):
   """Maps frozensets to lists."""
   def default(self, obj):
@@ -211,7 +213,7 @@
 class RestMaintainerDetail(object):
   def GET(self, id):
     maintainer = models.Maintainer.selectBy(id=id).getOne()
-    return json.dumps(maintainer.GetRestRepr())
+    return cjson.encode(maintainer.GetRestRepr())
 
 
 class MaintainerCheckpkgReport(object):
@@ -275,7 +277,7 @@
       raise web.notfound()
     web.header('Content-type', 'application/x-vnd.opencsw.pkg;type=srv4-list')
     pkgs_data = [p.GetRestRepr(quick)[1] for p in pkgs]
-    return json.dumps(pkgs_data)
+    return cjson.encode(pkgs_data)
 
 
 class PkgnameByFilename(object):
@@ -292,7 +294,7 @@
       web.header('X-Rest-Info', 'I could tell you about the format, but I won\'t')
       web.header('Content-Disposition',
                  'attachment; filename=%s' % send_filename)
-      return json.dumps(sorted(pkgs))
+      return cjson.encode(sorted(pkgs))
     except sqlobject.main.SQLObjectNotFound, e:
       raise web.notfound()
 
@@ -313,7 +315,7 @@
           'application/x-vnd.opencsw.pkg;type=pkgname-list')
       web.header('Content-Disposition',
                  'attachment; filename=%s' % send_filename)
-      return json.dumps(data)
+      return cjson.encode(data)
     except sqlobject.main.SQLObjectNotFound, e:
       raise web.notfound()
 
@@ -326,7 +328,7 @@
       mimetype, data_structure = pkg.GetRestRepr()
       web.header('Content-type', mimetype)
       web.header('Access-Control-Allow-Origin', '*')
-      return json.dumps(data_structure)
+      return cjson.encode(data_structure)
     except sqlobject.main.SQLObjectNotFound, e:
       raise web.notfound()
 
@@ -346,7 +348,7 @@
             "line": file_obj.line,
         }
       serializable_files = [FileDict(x) for x in files]
-      return json.dumps(serializable_files)
+      return cjson.encode(serializable_files)
     except sqlobject.main.SQLObjectNotFound, e:
       raise web.notfound()
 
@@ -389,9 +391,9 @@
       mimetype, data = srv4.GetRestRepr()
       web.header('Content-type', mimetype)
       web.header('Access-Control-Allow-Origin', '*')
-      return json.dumps(data)
+      return cjson.encode(data)
     except sqlobject.main.SQLObjectNotFound:
-      return json.dumps(None)
+      return cjson.encode(None)
     except sqlobject.dberrors.OperationalError, e:
       raise web.internalerror(e)
 
@@ -424,9 +426,9 @@
       srv4 = res.getOne()
       mimetype, data = srv4.GetRestRepr()
       web.header('Content-type', mimetype)
-      return json.dumps(data)
+      return cjson.encode(data)
     except sqlobject.main.SQLObjectNotFound:
-      return json.dumps(None)
+      return cjson.encode(None)
     except sqlobject.dberrors.OperationalError, e:
       raise web.internalerror(e)
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list