[csw-devel] SF.net SVN: gar:[20893] csw/mgar/gar/v2/lib

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sun Apr 28 20:23:20 CEST 2013


Revision: 20893
          http://gar.svn.sourceforge.net/gar/?rev=20893&view=rev
Author:   wahwah
Date:     2013-04-28 18:23:19 +0000 (Sun, 28 Apr 2013)
Log Message:
-----------
pkgdb: Catalog generation information

Storing catalog information in a small table.

This change allows to speed up catalog generation thanks to not fetching the
currently gigantic full package stats.

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/database.py
    csw/mgar/gar/v2/lib/python/generate_catalog_file.py
    csw/mgar/gar/v2/lib/python/models.py
    csw/mgar/gar/v2/lib/python/package_stats.py
    csw/mgar/gar/v2/lib/python/rest.py
    csw/mgar/gar/v2/lib/sh/db_privileges.sh
    csw/mgar/gar/v2/lib/web/pkgdb_web.py

Modified: csw/mgar/gar/v2/lib/python/database.py
===================================================================
--- csw/mgar/gar/v2/lib/python/database.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/python/database.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -10,7 +10,7 @@
 import system_pkgmap
 
 CONFIG_DB_SCHEMA = "db_schema_version"
-DB_SCHEMA_VERSION = 12L
+DB_SCHEMA_VERSION = 13L
 TABLES_THAT_NEED_UPDATES = (m.CswFile,)
 
 # This list of tables is sensitive to the order in which tables are created.
@@ -25,6 +25,7 @@
           m.OsRelease,
           m.Pkginst,
           m.Srv4FileStatsBlob,
+          m.CatalogGenData,
           m.Srv4FileStats,
           m.CheckpkgErrorTag,
 ) + TABLES_THAT_NEED_UPDATES + (

Modified: csw/mgar/gar/v2/lib/python/generate_catalog_file.py
===================================================================
--- csw/mgar/gar/v2/lib/python/generate_catalog_file.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/python/generate_catalog_file.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -50,15 +50,14 @@
     return self._catalog
 
   def ComposeCatalogLine(self, pkg_data):
-    deps_data = self.pkgcache.GetDeps(pkg_data["md5_sum"])
-    pkg_stats = self.pkgcache.GetPkgstats(pkg_data["md5_sum"])
-    i_deps = pkg_stats["i_depends"]
+    catalog_data = self.rest_client.GetCatalogData(pkg_data["md5_sum"])
+    i_deps = catalog_data["i_deps"]
     if i_deps:
       i_deps = "|".join(i_deps)
     else:
       i_deps = "none"
     deps = []
-    for dep, _ in deps_data["deps"]:
+    for dep, _ in catalog_data["deps"]:
       if "CSW" in dep:
         deps.append(dep)
     if deps:
@@ -68,7 +67,7 @@
     items = [
         pkg_data["catalogname"],
         pkg_data["version_string"],
-        deps_data["pkgname"],
+        catalog_data["pkgname"],
         pkg_data["basename"],
         pkg_data["md5_sum"],
         unicode(pkg_data["size"]),
@@ -77,7 +76,6 @@
         i_deps]
     return " ".join(items)
 
-
   def GenerateCatalog(self, out_dir):
     out_file = os.path.join(out_dir, CATALOG_FN)
     if os.path.exists(out_file):
@@ -96,8 +94,8 @@
     lines = []
     if self.catalog:
       for pkg_data in self.catalog:
-        pkg_stats = self.pkgcache.GetPkgstats(pkg_data["md5_sum"])
-        lines.append(pkg_stats["pkginfo"]["NAME"])
+        catalog_data = self.pkgcache.GetDeps(pkg_data["md5_sum"])
+        lines.append(catalog_data['pkginfo_name'])
     with open(out_file, "w") as fd:
       fd.write("\n".join(lines))
 

Modified: csw/mgar/gar/v2/lib/python/models.py
===================================================================
--- csw/mgar/gar/v2/lib/python/models.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/python/models.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -150,6 +150,19 @@
   srv4_file = sqlobject.SingleJoin('Srv4FileStats')
 
 
+class CatalogGenData(sqlobject.SQLObject):
+  """Fields required to generate the catalog.
+
+  Having this smaller table lets us avoid fetching the main big data
+  structure.
+  """
+  deps = sqlobject.UnicodeCol(notNone=True, length=(2 ** 14 - 1))
+  i_deps = sqlobject.UnicodeCol(notNone=True, length=(2 ** 14 - 1))
+  pkginfo_name = sqlobject.UnicodeCol(notNone=True, length=(2 ** 14 - 1))
+  pkgname = sqlobject.UnicodeCol(default=None, length=250)
+  md5_sum = sqlobject.UnicodeCol(notNone=True, unique=True, length=32)
+
+
 class Srv4FileStats(sqlobject.SQLObject):
   """Represents a srv4 file.
 

Modified: csw/mgar/gar/v2/lib/python/package_stats.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_stats.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/python/package_stats.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -348,6 +348,13 @@
     for override_dict in pkg_stats["overrides"]:
       m.CheckpkgOverride(srv4_file=db_pkg_stats,
                              **override_dict)
+    # Adding the catalog generation info.
+    catalog_gen_data = m.CatalogGenData(
+        md5_sum=pkg_stats["basic_stats"]["md5_sum"],
+        deps=cjson.encode(pkg_stats["depends"]),
+        pkgname=pkg_stats["basic_stats"]["pkgname"],
+        i_deps=cjson.encode(pkg_stats["i_depends"]),
+        pkginfo_name=pkg_stats["pkginfo"]["NAME"])
     return db_pkg_stats
 
   @classmethod

Modified: csw/mgar/gar/v2/lib/python/rest.py
===================================================================
--- csw/mgar/gar/v2/lib/python/rest.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/python/rest.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -76,6 +76,16 @@
         # Other HTTP errors are should be thrown.
         raise
 
+  def GetCatalogData(self, md5_sum):
+    self.ValidateMd5(md5_sum)
+    url = self.rest_url + self.PKGDB_APP + "/srv4/%s/catalog-data/" % md5_sum
+    try:
+      data = urllib2.urlopen(url).read()
+      return cjson.decode(data)
+    except urllib2.HTTPError as e:
+      logging.warning("Could not fetch catalog data for %r: %r", url, e)
+      raise
+
   def GetMaintainerByMd5(self, md5_sum):
     self.ValidateMd5(md5_sum)
     pkg = self.GetPkgByMd5(md5_sum)
@@ -269,12 +279,11 @@
     if str(md5) in self.deps:
       return cjson.decode(self.deps[md5])
     else:
-      pkgstats = self.GetPkgstats(md5)
-      data = {"deps": pkgstats["depends"],
-              "pkgname": pkgstats["basic_stats"]["pkgname"]}
+      data = self.rest_client.GetCatalogData(md5)
       self.deps[md5] = cjson.encode(data)
       return data
 
+
 def GetUsernameAndPassword():
   username = os.environ["LOGNAME"]
   password = None

Modified: csw/mgar/gar/v2/lib/sh/db_privileges.sh
===================================================================
--- csw/mgar/gar/v2/lib/sh/db_privileges.sh	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/sh/db_privileges.sh	2013-04-28 18:23:19 UTC (rev 20893)
@@ -41,6 +41,7 @@
 )
 TABLES_REGULAR=(
   pkginst
+  catalog_gen_data
   checkpkg_error_tag
   checkpkg_override
   srv4_depends_on

Modified: csw/mgar/gar/v2/lib/web/pkgdb_web.py
===================================================================
--- csw/mgar/gar/v2/lib/web/pkgdb_web.py	2013-04-28 18:23:02 UTC (rev 20892)
+++ csw/mgar/gar/v2/lib/web/pkgdb_web.py	2013-04-28 18:23:19 UTC (rev 20893)
@@ -51,6 +51,7 @@
   r'/rest/srv4/([0-9a-f]{32})/', 'RestSrv4Detail',
   r'/rest/srv4/([0-9a-f]{32})/files/', 'RestSrv4DetailFiles',
   r'/rest/srv4/([0-9a-f]{32})/pkg-stats/', 'RestSrv4FullStats',
+  r'/rest/srv4/([0-9a-f]{32})/catalog-data/', 'RestSvr4CatalogData',
 )
 urls = urls_html + urls_rest
 
@@ -503,6 +504,22 @@
     return cjson.encode(catalogs)
 
 
+class RestSvr4CatalogData(object):
+
+  def GET(self, md5_sum):
+    try:
+      cat_gen_data = models.CatalogGenData.selectBy(md5_sum=md5_sum).getOne()
+    except sqlobject.main.SQLObjectNotFound:
+      raise web.notfound("RestSvr4CatalogData for %r not found" % md5_sum)
+    simple_data = {
+        'deps': cjson.decode(cat_gen_data.deps),
+        'i_deps': cjson.decode(cat_gen_data.i_deps),
+        'pkginfo_name': cat_gen_data.pkginfo_name,
+        'pkgname': cat_gen_data.pkgname,
+    }
+    return cjson.encode(simple_data)
+
+
 web.webapi.internalerror = web.debugerror
 
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list