[csw-devel] SF.net SVN: gar:[17664] csw/mgar/gar/v2/lib
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Sat Apr 14 13:32:23 CEST 2012
Revision: 17664
http://gar.svn.sourceforge.net/gar/?rev=17664&view=rev
Author: wahwah
Date: 2012-04-14 11:32:23 +0000 (Sat, 14 Apr 2012)
Log Message:
-----------
pkgdb: Use cjson, it's 10 times faster
Modified Paths:
--------------
csw/mgar/gar/v2/lib/python/integrate_catalogs.py
csw/mgar/gar/v2/lib/python/rest.py
csw/mgar/gar/v2/lib/web/pkgdb_web.py
Modified: csw/mgar/gar/v2/lib/python/integrate_catalogs.py
===================================================================
--- csw/mgar/gar/v2/lib/python/integrate_catalogs.py 2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/python/integrate_catalogs.py 2012-04-14 11:32:23 UTC (rev 17664)
@@ -11,6 +11,7 @@
"""
from Cheetah import Template
+import cjson
import json
import catalog
import common_constants
@@ -189,12 +190,51 @@
if options.from_json:
with open(options.from_json, "rb") as fd:
logging.info("Loading %s", options.from_json)
- diffs_by_catalogname = json.load(fd)
+ bundles_by_md5, jsonable_catalogs, diffs_by_catalogname = cjson.decode(fd.read())
+ catalogs = dict((tuple(cjson.decode(x)), jsonable_catalogs[x]) for x in jsonable_catalogs)
else:
- diffs_by_catalogname = GetDiffsByCatalogname(
+ catalogs, diffs_by_catalogname = GetDiffsByCatalogname(
catrel_from, catrel_to, options.include_downgrades,
options.include_version_changes)
+ bundles_by_md5 = {}
+ bundles_missing = set()
+ cp = CachedPkgstats("pkgstats.db")
+ for key in catalogs:
+ for pkg in catalogs[key]:
+ # logging.debug("%r", pkg)
+ md5 = pkg["md5_sum"]
+ if md5 not in bundles_by_md5 and md5 not in bundles_missing:
+ stats = cp.GetPkgstats(md5)
+ bundle_key = "OPENCSW_BUNDLE"
+ # pprint.pprint(stats)
+ if stats:
+ if bundle_key in stats["pkginfo"]:
+ bundles_by_md5[md5] = stats["pkginfo"][bundle_key]
+ else:
+ logging.info(
+ "%r (%r) does not have the bundle set",
+ stats["basic_stats"]["pkg_basename"], md5)
+ bundles_missing.add(md5)
+ # Here's a good place to calculate the mapping between catalognames and
+ # bundle names.
+ change_types = "new_pkgs", "removed_pkgs", "updated_pkgs"
+ bundles_by_catalogname = {}
+ for catalogname in diffs_by_catalogname:
+ l = bundles_by_catalogname.setdefault(catalogname, set())
+ for change_type in change_types:
+ if change_type in diffs_by_catalogname[catalogname]:
+ for change_info in diffs_by_catalogname[catalogname][change_type]:
+ pkg = change_info[2]
+ if "to" in pkg:
+ md5s = [x["md5_sum"] for x in (pkg["from"], pkg["to"])]
+ else:
+ md5s = [pkg["md5_sum"]]
+ for md5 in md5s:
+ if md5 in bundles_by_md5:
+ l.add(bundles_by_md5[md5])
namespace = {
+ "bundles_by_catalogname": bundles_by_catalogname,
+ "bundles_by_md5": bundles_by_md5,
"diffs_by_catalogname": diffs_by_catalogname,
"catrel_to": catrel_to,
"catrel_from": catrel_from,
@@ -202,7 +242,9 @@
}
if options.save_json:
with open(options.save_json, "wb") as fd:
- json.dump(diffs_by_catalogname, fd)
+ jsonable_catalogs = dict((cjson.encode(x), catalogs[x]) for x in catalogs)
+ fd.write(cjson.encode(
+ (bundles_by_md5, jsonable_catalogs, diffs_by_catalogname)))
t = Template.Template(CATALOG_MOD_TMPL, searchList=[namespace])
if options.output_file:
logging.info("Saving output to %s", options.output_file)
Modified: csw/mgar/gar/v2/lib/python/rest.py
===================================================================
--- csw/mgar/gar/v2/lib/python/rest.py 2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/python/rest.py 2012-04-14 11:32:23 UTC (rev 17664)
@@ -2,7 +2,7 @@
import logging
import urllib2
-import json
+import cjson
DEFAULT_URL = "http://buildfarm.opencsw.org"
@@ -27,7 +27,7 @@
logging.debug("GetPkgByMd5(): GET %s", url)
try:
data = urllib2.urlopen(url).read()
- return json.loads(data)
+ return cjson.decode(data)
except urllib2.HTTPError, e:
logging.warning("%s -- %s", url, e)
if e.code == 404:
@@ -53,7 +53,7 @@
logging.debug("GetCatalog(): GET %s", url)
try:
data = urllib2.urlopen(url).read()
- return json.loads(data)
+ return cjson.decode(data)
except urllib2.HTTPError, e:
logging.warning("%s -- %s", url, e)
return None
@@ -67,4 +67,4 @@
# The server is no longer returning 404 when the package is absent. If
# a HTTP error code is returned, we're letting the application fail.
data = urllib2.urlopen(url).read()
- return json.loads(data)
+ return cjson.decode(data)
Modified: csw/mgar/gar/v2/lib/web/pkgdb_web.py
===================================================================
--- csw/mgar/gar/v2/lib/web/pkgdb_web.py 2012-04-14 10:38:14 UTC (rev 17663)
+++ csw/mgar/gar/v2/lib/web/pkgdb_web.py 2012-04-14 11:32:23 UTC (rev 17664)
@@ -6,11 +6,12 @@
import os
sys.path.append(os.path.join(os.path.split(__file__)[0], "..", ".."))
-import web
-import sqlobject
+import cjson
import json
import logging
import pprint
+import sqlobject
+import web
from lib.python import models
from lib.python import configuration
@@ -56,6 +57,7 @@
render = web.template.render(templatedir)
+# TODO(maciej): Convert this extension to cjson.
class PkgStatsEncoder(json.JSONEncoder):
"""Maps frozensets to lists."""
def default(self, obj):
@@ -211,7 +213,7 @@
class RestMaintainerDetail(object):
def GET(self, id):
maintainer = models.Maintainer.selectBy(id=id).getOne()
- return json.dumps(maintainer.GetRestRepr())
+ return cjson.encode(maintainer.GetRestRepr())
class MaintainerCheckpkgReport(object):
@@ -275,7 +277,7 @@
raise web.notfound()
web.header('Content-type', 'application/x-vnd.opencsw.pkg;type=srv4-list')
pkgs_data = [p.GetRestRepr(quick)[1] for p in pkgs]
- return json.dumps(pkgs_data)
+ return cjson.encode(pkgs_data)
class PkgnameByFilename(object):
@@ -292,7 +294,7 @@
web.header('X-Rest-Info', 'I could tell you about the format, but I won\'t')
web.header('Content-Disposition',
'attachment; filename=%s' % send_filename)
- return json.dumps(sorted(pkgs))
+ return cjson.encode(sorted(pkgs))
except sqlobject.main.SQLObjectNotFound, e:
raise web.notfound()
@@ -313,7 +315,7 @@
'application/x-vnd.opencsw.pkg;type=pkgname-list')
web.header('Content-Disposition',
'attachment; filename=%s' % send_filename)
- return json.dumps(data)
+ return cjson.encode(data)
except sqlobject.main.SQLObjectNotFound, e:
raise web.notfound()
@@ -326,7 +328,7 @@
mimetype, data_structure = pkg.GetRestRepr()
web.header('Content-type', mimetype)
web.header('Access-Control-Allow-Origin', '*')
- return json.dumps(data_structure)
+ return cjson.encode(data_structure)
except sqlobject.main.SQLObjectNotFound, e:
raise web.notfound()
@@ -346,7 +348,7 @@
"line": file_obj.line,
}
serializable_files = [FileDict(x) for x in files]
- return json.dumps(serializable_files)
+ return cjson.encode(serializable_files)
except sqlobject.main.SQLObjectNotFound, e:
raise web.notfound()
@@ -389,9 +391,9 @@
mimetype, data = srv4.GetRestRepr()
web.header('Content-type', mimetype)
web.header('Access-Control-Allow-Origin', '*')
- return json.dumps(data)
+ return cjson.encode(data)
except sqlobject.main.SQLObjectNotFound:
- return json.dumps(None)
+ return cjson.encode(None)
except sqlobject.dberrors.OperationalError, e:
raise web.internalerror(e)
@@ -424,9 +426,9 @@
srv4 = res.getOne()
mimetype, data = srv4.GetRestRepr()
web.header('Content-type', mimetype)
- return json.dumps(data)
+ return cjson.encode(data)
except sqlobject.main.SQLObjectNotFound:
- return json.dumps(None)
+ return cjson.encode(None)
except sqlobject.dberrors.OperationalError, e:
raise web.internalerror(e)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list