[csw-devel] SF.net SVN: gar:[21086] csw/mgar/gar/v2/lib

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Wed May 15 01:15:17 CEST 2013


Revision: 21086
          http://gar.svn.sourceforge.net/gar/?rev=21086&view=rev
Author:   wahwah
Date:     2013-05-14 23:15:16 +0000 (Tue, 14 May 2013)
Log Message:
-----------
pkgdb: Faster catalog generation

Instead of lots of back-and-forth over the REST interface, we now have
a single endpoint which gives us complete catalog information. All we need to
do is to write that to a text file and call it a catalog.

The server side has to do a little more work, for example filter out all
non-CSW packages from dependencies.

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/generate_catalog_file.py
    csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py
    csw/mgar/gar/v2/lib/python/models.py
    csw/mgar/gar/v2/lib/python/representations.py
    csw/mgar/gar/v2/lib/python/rest.py
    csw/mgar/gar/v2/lib/web/pkgdb_web.py

Modified: csw/mgar/gar/v2/lib/python/generate_catalog_file.py
===================================================================
--- csw/mgar/gar/v2/lib/python/generate_catalog_file.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/generate_catalog_file.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -27,6 +27,7 @@
 import logging
 import sys
 import datetime
+import representations
 
 
 class Error(Exception):
@@ -35,81 +36,61 @@
 class CatalogFileGenerator(object):
 
   def __init__(self, catrel, arch, osrel,
-               pkgcache=None, rest_client=None):
+               rest_client=None):
     self.catrel = catrel
     self.arch = arch
     self.osrel = osrel
     home_dir = os.environ['HOME']
-    self.pkgcache = pkgcache or rest.CachedPkgstats(os.path.join(home_dir, "pkgstats"))
     self.rest_client = rest_client or rest.RestClient()
     self._catalog = None
 
   @property
   def catalog(self):
     if not self._catalog:
-      self._catalog = self.rest_client.GetCatalog(self.catrel, self.arch, self.osrel)
+      self._catalog = self.rest_client.GetCatalogForGeneration(self.catrel,
+                                                               self.arch,
+                                                               self.osrel)
+      try:
+        self._catalog = [representations.CatalogEntry._make(x)
+                         for x in self._catalog]
+      except TypeError:
+        print self._catalog
+        raise
     return self._catalog
 
-  def ComposeCatalogLine(self, pkg_data):
-    catalog_data = self.rest_client.GetCatalogData(pkg_data["md5_sum"])
-    i_deps = catalog_data["i_deps"]
-    if i_deps:
-      i_deps = "|".join(i_deps)
-    else:
-      i_deps = "none"
-    deps = []
-    for dep, _ in catalog_data["deps"]:
-      if "CSW" in dep:
-        deps.append(dep)
-    if deps:
-      deps = "|".join(deps)
-    else:
-      deps = "none"
-    items = [
-        pkg_data["catalogname"],
-        pkg_data["version_string"],
-        catalog_data["pkgname"],
-        pkg_data["basename"],
-        pkg_data["md5_sum"],
-        unicode(pkg_data["size"]),
-        deps,
-        "none",
-        i_deps]
+  def ComposeCatalogLine(self, catalog_entry):
+    items = tuple(catalog_entry)[:9]
     return " ".join(items)
 
   def GenerateCatalog(self, out_dir):
-    out_file = os.path.join(out_dir, CATALOG_FN)
-    if os.path.exists(out_file):
-      raise Error("File %s already exists." % out_file)
-    lines = self._GenerateCatalogAsLines()
-    with open(out_file, "w") as fd:
+    out_catalog = os.path.join(out_dir, CATALOG_FN)
+    out_desc = os.path.join(out_dir, DESC_FN)
+    if os.path.exists(out_catalog):
+      raise Error("File %s already exists." % out_catalog)
+    if os.path.exists(out_desc):
+      raise Error("File %s already exists." % out_desc)
+    lines, descriptions = self._GenerateCatalogAsLines()
+    with open(out_catalog, "w") as fd:
       fd.write("\n".join(lines).encode('utf-8'))
+    with open(out_desc, "w") as fd:
+      fd.write("\n".join(descriptions).encode('utf-8'))
 
   def _GenerateCatalogAsLines(self):
     """Return the complete catalog as a list of lines."""
     lines = []
-    lines.append("# CREATIONDATE " + datetime.datetime.utcnow().replace(microsecond=0).isoformat() + "Z")
+    descriptions = []
+    date_iso = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
+    lines.append("# CREATIONDATE %sZ" % date_iso)
 
     # Potential additional lines might go here.
     # lines.append("...")
     if self.catalog:  # the catalog might be None
-      for pkg_data in self.catalog:
-        lines.append(self.ComposeCatalogLine(pkg_data))
-    return lines
+      for catalog_entry in self.catalog:
+        lines.append(self.ComposeCatalogLine(catalog_entry))
+        descriptions.append(catalog_entry.desc)
+    return lines, descriptions
 
-  def GenerateDescriptions(self, out_dir):
-    out_file = os.path.join(out_dir, DESC_FN)
-    if os.path.exists(out_file):
-      raise Error("File %s already exists." % out_file)
-    lines = []
-    if self.catalog:
-      for pkg_data in self.catalog:
-        catalog_data = self.pkgcache.GetDeps(pkg_data["md5_sum"])
-        lines.append(catalog_data['pkginfo_name'])
-    with open(out_file, "w") as fd:
-      fd.write("\n".join(lines).encode('utf-8'))
 
-
 def main():
   logging.basicConfig(level=logging.DEBUG)
   parser = optparse.OptionParser()
@@ -120,7 +101,6 @@
   options, args = parser.parse_args()
   cfg = CatalogFileGenerator(options.catrel, options.arch, options.osrel)
   cfg.GenerateCatalog(options.out_dir)
-  cfg.GenerateDescriptions(options.out_dir)
 
 
 if __name__ == '__main__':

Modified: csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -9,28 +9,15 @@
 
 from lib.python import generate_catalog_file
 
-PKG_DATA_1 = {
-        "basename": "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
-        "catalogname": "389_admin",
-        "file_basename": "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
-        "md5_sum": "fdb7912713da36afcbbe52266c15cb3f",
-        "mtime": "2012-05-02 12:06:38",
-        "rev": "2012.05.02",
-        "size": 395802,
-        "version": "1.1.29,REV=2012.05.02",
-        "version_string": "1.1.29,REV=2012.05.02"
-}
+PKG_DATA_1 = [
+        ["389_admin", "1.1.29,REV=2012.05.02",
+         "CSW389-admin-mock",
+         "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
+         "fdb7912713da36afcbbe52266c15cb3f",
+         "395802", "CSWfoo|CSWbar", "none", "none",
+         "389_admin - The 389 LDAP server Admin Tools"],
+]
 
-FAKE_CATALOG_DATA = {
-    "deps": [
-      ["CSWfoo", ""],
-      ["CSWbar", ""],
-    ],
-    "i_deps": [],
-    "pkginfo_name": "389_admin - The 389 LDAP server Admin Tools",
-    "pkgname": "CSW389-admin-mock",
-}
-
 EXPECTED_LINE = ("389_admin 1.1.29,REV=2012.05.02 CSW389-admin-mock "
                  "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz "
                  "fdb7912713da36afcbbe52266c15cb3f 395802 CSWfoo|CSWbar "
@@ -39,7 +26,6 @@
 class CatalogFileGeneratorUnitTest(mox.MoxTestBase, unittest.TestCase):
 
   def testComposeCatalogLineBasic(self):
-    mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
     mock_rest = self.mox.CreateMock(rest.RestClient)
     # Catalog format:
     #   http://wiki.opencsw.org/catalog-format
@@ -51,14 +37,11 @@
     cfg = generate_catalog_file.CatalogFileGenerator("dublin",
                                                      "sparc",
                                                      "SunOS5.10",
-                                                     mock_pkgcache, mock_rest)
-    md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
-    mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
+                                                     mock_rest)
     self.mox.ReplayAll()
-    self.assertEquals(EXPECTED_LINE, cfg.ComposeCatalogLine(PKG_DATA_1))
+    self.assertEquals(EXPECTED_LINE, cfg.ComposeCatalogLine(PKG_DATA_1[0]))
 
-  def testGenerateCatalogAsLines(self):
-    mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+  def testGenerateCatalogEmpty(self):
     mock_rest = self.mox.CreateMock(rest.RestClient)
     fake_datetime = datetime.datetime(year=2013, month=4, day=1,
                                       hour=11, minute=11, second=11)
@@ -67,47 +50,50 @@
     cfg = generate_catalog_file.CatalogFileGenerator("dublin",
                                                      "sparc",
                                                      "SunOS5.10",
-                                                     mock_pkgcache, mock_rest)
-    md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
-    mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
-    mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
+                                                     mock_rest)
+    mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn([])
     self.mox.ReplayAll()
-    self.assertEquals([
+    catalog_lines, descriptions = cfg._GenerateCatalogAsLines()
+    expected_lines = [
       "# CREATIONDATE 2013-04-01T11:11:11Z",
-      EXPECTED_LINE,
-    ], cfg._GenerateCatalogAsLines())
+    ]
+    self.assertEquals(expected_lines, catalog_lines)
+    self.assertEquals([], descriptions)
 
-  def testGenerateCatalog(self):
-    mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+  def testGenerateCatalogAsLines(self):
     mock_rest = self.mox.CreateMock(rest.RestClient)
-    self.mox.StubOutWithMock(__builtin__, 'open')
+    fake_datetime = datetime.datetime(year=2013, month=4, day=1,
+                                      hour=11, minute=11, second=11)
+    self.mox.StubOutWithMock(datetime, 'datetime')
+    datetime.datetime.utcnow().AndReturn(fake_datetime)
     cfg = generate_catalog_file.CatalogFileGenerator("dublin",
                                                      "sparc",
                                                      "SunOS5.10",
-                                                     mock_pkgcache, mock_rest)
-    mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
-    md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
-    mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
-    fake_file = io.BytesIO()
-    open('fake-dir/catalog', 'w').AndReturn(fake_file)
+                                                     mock_rest)
+    mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn(PKG_DATA_1)
     self.mox.ReplayAll()
-    cfg.GenerateCatalog('fake-dir')
+    catalog_lines, descriptions = cfg._GenerateCatalogAsLines()
+    expected_lines = [
+      "# CREATIONDATE 2013-04-01T11:11:11Z",
+      EXPECTED_LINE,
+    ]
+    self.assertEquals(expected_lines, catalog_lines)
+    self.assertEquals(['389_admin - The 389 LDAP server Admin Tools'], descriptions)
 
-  def testGenerateDescriptions(self):
-    mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+  def testGenerateCatalog(self):
     mock_rest = self.mox.CreateMock(rest.RestClient)
     self.mox.StubOutWithMock(__builtin__, 'open')
     cfg = generate_catalog_file.CatalogFileGenerator("dublin",
                                                      "sparc",
                                                      "SunOS5.10",
-                                                     mock_pkgcache, mock_rest)
-    md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
-    mock_pkgcache.GetDeps(md5_sum).AndReturn(FAKE_CATALOG_DATA)
-    mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
+                                                     mock_rest)
+    mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn(PKG_DATA_1)
     fake_file = io.BytesIO()
-    open('fake-dir/descriptions', 'w').AndReturn(fake_file)
+    fake_desc_file = io.BytesIO()
+    open('fake-dir/catalog', 'w').AndReturn(fake_file)
+    open('fake-dir/descriptions', 'w').AndReturn(fake_desc_file)
     self.mox.ReplayAll()
-    cfg.GenerateDescriptions('fake-dir')
+    cfg.GenerateCatalog('fake-dir')
 
 
 if __name__ == '__main__':

Modified: csw/mgar/gar/v2/lib/python/models.py
===================================================================
--- csw/mgar/gar/v2/lib/python/models.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/models.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -511,9 +511,15 @@
         Srv4FileStats.q.use_to_generate_catalogs==True,
   )
   select = sqlbuilder.Select(
-      ['catalogname', 'version_string', 'pkgname', 'basename',
+      ['catalogname',
+       'version_string',
+       'pkgname',
+       'basename',
        'srv4_file_stats.md5_sum',
-       'size', 'deps', 'i_deps'],
+       'size',
+       'deps',
+       'i_deps',
+       'pkginfo_name'],
       where=where,
       orderBy='catalogname',
       join=join)

Modified: csw/mgar/gar/v2/lib/python/representations.py
===================================================================
--- csw/mgar/gar/v2/lib/python/representations.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/representations.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -3,4 +3,4 @@
 # Full catalog entry, enough to write a line of a catalog file.
 CatalogEntry = collections.namedtuple(
     'CatalogEntry', 'catalogname version pkgname basename '
-                    'md5_sum size deps category i_deps')
+                    'md5_sum size deps category i_deps desc')

Modified: csw/mgar/gar/v2/lib/python/rest.py
===================================================================
--- csw/mgar/gar/v2/lib/python/rest.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/rest.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -247,7 +247,13 @@
     logging.debug("SavePkgstats(): url=%r", url)
     return self._CurlPut(url, [('pkgstats', cjson.encode(pkgstats))])
 
+  def GetCatalogForGeneration(self, catrel, arch, osrel):
+    url = (self.rest_url + self.PKGDB_APP + "/catalogs/%s/%s/%s/for-generation/"
+           % (catrel, arch, osrel))
+    data = urllib2.urlopen(url).read()
+    return cjson.decode(data)
 
+
 class CachedPkgstats(object):
   """Class responsible for holding and caching package stats.
 

Modified: csw/mgar/gar/v2/lib/web/pkgdb_web.py
===================================================================
--- csw/mgar/gar/v2/lib/web/pkgdb_web.py	2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/web/pkgdb_web.py	2013-05-14 23:15:16 UTC (rev 21086)
@@ -527,16 +527,28 @@
         osrel_name, arch_name, catrel_name)
     rows = list(models.GetCatalogGenerationResult(sqo_osrel, sqo_arch, sqo_catrel))
     def GenCatalogEntry(row):
+      i_deps = cjson.decode(row[7])
+      if i_deps:
+        i_deps_str = "|".join(i_deps)
+      else:
+        i_deps_str = "none"
+      deps_with_desc = cjson.decode(row[6])
+      deps = [x[0] for x in deps_with_desc if x[0].startswith('CSW')]
+      if deps:
+        deps_str = '|'.join(deps)
+      else:
+        deps_str = "none"
       entry = representations.CatalogEntry(
-          catalogname=row[0],
-          version=row[1],
-          pkgname=row[2],
-          basename=row[3],
-          md5_sum=row[4],
-          size=row[5],
-          deps="|".join([x[0] for x in cjson.decode(row[6])]),
-          category="none",
-          i_deps="|".join(cjson.decode(row[7])),
+          catalogname=row[0],  # 0
+          version=row[1],      # 1
+          pkgname=row[2],      # 2
+          basename=row[3],     # 3
+          md5_sum=row[4],      # 4
+          size=str(row[5]),    # 5
+          deps=deps_str,       # 6
+          category="none",     # 7
+          i_deps=i_deps_str,   # 8
+          desc=row[8], # 9
       )
       return entry
     entries_list = [GenCatalogEntry(row) for row in rows]

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list