[csw-devel] SF.net SVN: gar:[21086] csw/mgar/gar/v2/lib
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Wed May 15 01:15:17 CEST 2013
Revision: 21086
http://gar.svn.sourceforge.net/gar/?rev=21086&view=rev
Author: wahwah
Date: 2013-05-14 23:15:16 +0000 (Tue, 14 May 2013)
Log Message:
-----------
pkgdb: Faster catalog generation
Instead of lots of back-and-forth over the REST interface, we now have
a single endpoint which gives us complete catalog information. All we need to
do is to write that to a text file and call it a catalog.
The server side has to do a little more work, for example filter out all
non-CSW packages from dependencies.
Modified Paths:
--------------
csw/mgar/gar/v2/lib/python/generate_catalog_file.py
csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py
csw/mgar/gar/v2/lib/python/models.py
csw/mgar/gar/v2/lib/python/representations.py
csw/mgar/gar/v2/lib/python/rest.py
csw/mgar/gar/v2/lib/web/pkgdb_web.py
Modified: csw/mgar/gar/v2/lib/python/generate_catalog_file.py
===================================================================
--- csw/mgar/gar/v2/lib/python/generate_catalog_file.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/generate_catalog_file.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -27,6 +27,7 @@
import logging
import sys
import datetime
+import representations
class Error(Exception):
@@ -35,81 +36,61 @@
class CatalogFileGenerator(object):
def __init__(self, catrel, arch, osrel,
- pkgcache=None, rest_client=None):
+ rest_client=None):
self.catrel = catrel
self.arch = arch
self.osrel = osrel
home_dir = os.environ['HOME']
- self.pkgcache = pkgcache or rest.CachedPkgstats(os.path.join(home_dir, "pkgstats"))
self.rest_client = rest_client or rest.RestClient()
self._catalog = None
@property
def catalog(self):
if not self._catalog:
- self._catalog = self.rest_client.GetCatalog(self.catrel, self.arch, self.osrel)
+ self._catalog = self.rest_client.GetCatalogForGeneration(self.catrel,
+ self.arch,
+ self.osrel)
+ try:
+ self._catalog = [representations.CatalogEntry._make(x)
+ for x in self._catalog]
+ except TypeError:
+ print self._catalog
+ raise
return self._catalog
- def ComposeCatalogLine(self, pkg_data):
- catalog_data = self.rest_client.GetCatalogData(pkg_data["md5_sum"])
- i_deps = catalog_data["i_deps"]
- if i_deps:
- i_deps = "|".join(i_deps)
- else:
- i_deps = "none"
- deps = []
- for dep, _ in catalog_data["deps"]:
- if "CSW" in dep:
- deps.append(dep)
- if deps:
- deps = "|".join(deps)
- else:
- deps = "none"
- items = [
- pkg_data["catalogname"],
- pkg_data["version_string"],
- catalog_data["pkgname"],
- pkg_data["basename"],
- pkg_data["md5_sum"],
- unicode(pkg_data["size"]),
- deps,
- "none",
- i_deps]
+ def ComposeCatalogLine(self, catalog_entry):
+ items = tuple(catalog_entry)[:9]
return " ".join(items)
def GenerateCatalog(self, out_dir):
- out_file = os.path.join(out_dir, CATALOG_FN)
- if os.path.exists(out_file):
- raise Error("File %s already exists." % out_file)
- lines = self._GenerateCatalogAsLines()
- with open(out_file, "w") as fd:
+ out_catalog = os.path.join(out_dir, CATALOG_FN)
+ out_desc = os.path.join(out_dir, DESC_FN)
+ if os.path.exists(out_catalog):
+ raise Error("File %s already exists." % out_catalog)
+ if os.path.exists(out_desc):
+ raise Error("File %s already exists." % out_desc)
+ lines, descriptions = self._GenerateCatalogAsLines()
+ with open(out_catalog, "w") as fd:
fd.write("\n".join(lines).encode('utf-8'))
+ with open(out_desc, "w") as fd:
+ fd.write("\n".join(descriptions).encode('utf-8'))
def _GenerateCatalogAsLines(self):
"""Return the complete catalog as a list of lines."""
lines = []
- lines.append("# CREATIONDATE " + datetime.datetime.utcnow().replace(microsecond=0).isoformat() + "Z")
+ descriptions = []
+ date_iso = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
+ lines.append("# CREATIONDATE %sZ" % date_iso)
# Potential additional lines might go here.
# lines.append("...")
if self.catalog: # the catalog might be None
- for pkg_data in self.catalog:
- lines.append(self.ComposeCatalogLine(pkg_data))
- return lines
+ for catalog_entry in self.catalog:
+ lines.append(self.ComposeCatalogLine(catalog_entry))
+ descriptions.append(catalog_entry.desc)
+ return lines, descriptions
- def GenerateDescriptions(self, out_dir):
- out_file = os.path.join(out_dir, DESC_FN)
- if os.path.exists(out_file):
- raise Error("File %s already exists." % out_file)
- lines = []
- if self.catalog:
- for pkg_data in self.catalog:
- catalog_data = self.pkgcache.GetDeps(pkg_data["md5_sum"])
- lines.append(catalog_data['pkginfo_name'])
- with open(out_file, "w") as fd:
- fd.write("\n".join(lines).encode('utf-8'))
-
def main():
logging.basicConfig(level=logging.DEBUG)
parser = optparse.OptionParser()
@@ -120,7 +101,6 @@
options, args = parser.parse_args()
cfg = CatalogFileGenerator(options.catrel, options.arch, options.osrel)
cfg.GenerateCatalog(options.out_dir)
- cfg.GenerateDescriptions(options.out_dir)
if __name__ == '__main__':
Modified: csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/generate_catalog_file_test.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -9,28 +9,15 @@
from lib.python import generate_catalog_file
-PKG_DATA_1 = {
- "basename": "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
- "catalogname": "389_admin",
- "file_basename": "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
- "md5_sum": "fdb7912713da36afcbbe52266c15cb3f",
- "mtime": "2012-05-02 12:06:38",
- "rev": "2012.05.02",
- "size": 395802,
- "version": "1.1.29,REV=2012.05.02",
- "version_string": "1.1.29,REV=2012.05.02"
-}
+PKG_DATA_1 = [
+ ["389_admin", "1.1.29,REV=2012.05.02",
+ "CSW389-admin-mock",
+ "389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz",
+ "fdb7912713da36afcbbe52266c15cb3f",
+ "395802", "CSWfoo|CSWbar", "none", "none",
+ "389_admin - The 389 LDAP server Admin Tools"],
+]
-FAKE_CATALOG_DATA = {
- "deps": [
- ["CSWfoo", ""],
- ["CSWbar", ""],
- ],
- "i_deps": [],
- "pkginfo_name": "389_admin - The 389 LDAP server Admin Tools",
- "pkgname": "CSW389-admin-mock",
-}
-
EXPECTED_LINE = ("389_admin 1.1.29,REV=2012.05.02 CSW389-admin-mock "
"389_admin-1.1.29,REV=2012.05.02-SunOS5.10-sparc-CSW.pkg.gz "
"fdb7912713da36afcbbe52266c15cb3f 395802 CSWfoo|CSWbar "
@@ -39,7 +26,6 @@
class CatalogFileGeneratorUnitTest(mox.MoxTestBase, unittest.TestCase):
def testComposeCatalogLineBasic(self):
- mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
mock_rest = self.mox.CreateMock(rest.RestClient)
# Catalog format:
# http://wiki.opencsw.org/catalog-format
@@ -51,14 +37,11 @@
cfg = generate_catalog_file.CatalogFileGenerator("dublin",
"sparc",
"SunOS5.10",
- mock_pkgcache, mock_rest)
- md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
- mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
+ mock_rest)
self.mox.ReplayAll()
- self.assertEquals(EXPECTED_LINE, cfg.ComposeCatalogLine(PKG_DATA_1))
+ self.assertEquals(EXPECTED_LINE, cfg.ComposeCatalogLine(PKG_DATA_1[0]))
- def testGenerateCatalogAsLines(self):
- mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+ def testGenerateCatalogEmpty(self):
mock_rest = self.mox.CreateMock(rest.RestClient)
fake_datetime = datetime.datetime(year=2013, month=4, day=1,
hour=11, minute=11, second=11)
@@ -67,47 +50,50 @@
cfg = generate_catalog_file.CatalogFileGenerator("dublin",
"sparc",
"SunOS5.10",
- mock_pkgcache, mock_rest)
- md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
- mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
- mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
+ mock_rest)
+ mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn([])
self.mox.ReplayAll()
- self.assertEquals([
+ catalog_lines, descriptions = cfg._GenerateCatalogAsLines()
+ expected_lines = [
"# CREATIONDATE 2013-04-01T11:11:11Z",
- EXPECTED_LINE,
- ], cfg._GenerateCatalogAsLines())
+ ]
+ self.assertEquals(expected_lines, catalog_lines)
+ self.assertEquals([], descriptions)
- def testGenerateCatalog(self):
- mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+ def testGenerateCatalogAsLines(self):
mock_rest = self.mox.CreateMock(rest.RestClient)
- self.mox.StubOutWithMock(__builtin__, 'open')
+ fake_datetime = datetime.datetime(year=2013, month=4, day=1,
+ hour=11, minute=11, second=11)
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.utcnow().AndReturn(fake_datetime)
cfg = generate_catalog_file.CatalogFileGenerator("dublin",
"sparc",
"SunOS5.10",
- mock_pkgcache, mock_rest)
- mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
- md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
- mock_rest.GetCatalogData(md5_sum).AndReturn(FAKE_CATALOG_DATA)
- fake_file = io.BytesIO()
- open('fake-dir/catalog', 'w').AndReturn(fake_file)
+ mock_rest)
+ mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn(PKG_DATA_1)
self.mox.ReplayAll()
- cfg.GenerateCatalog('fake-dir')
+ catalog_lines, descriptions = cfg._GenerateCatalogAsLines()
+ expected_lines = [
+ "# CREATIONDATE 2013-04-01T11:11:11Z",
+ EXPECTED_LINE,
+ ]
+ self.assertEquals(expected_lines, catalog_lines)
+ self.assertEquals(['389_admin - The 389 LDAP server Admin Tools'], descriptions)
- def testGenerateDescriptions(self):
- mock_pkgcache = self.mox.CreateMock(rest.CachedPkgstats)
+ def testGenerateCatalog(self):
mock_rest = self.mox.CreateMock(rest.RestClient)
self.mox.StubOutWithMock(__builtin__, 'open')
cfg = generate_catalog_file.CatalogFileGenerator("dublin",
"sparc",
"SunOS5.10",
- mock_pkgcache, mock_rest)
- md5_sum = 'fdb7912713da36afcbbe52266c15cb3f'
- mock_pkgcache.GetDeps(md5_sum).AndReturn(FAKE_CATALOG_DATA)
- mock_rest.GetCatalog('dublin', 'sparc', 'SunOS5.10').AndReturn([PKG_DATA_1])
+ mock_rest)
+ mock_rest.GetCatalogForGeneration('dublin', 'sparc', 'SunOS5.10').AndReturn(PKG_DATA_1)
fake_file = io.BytesIO()
- open('fake-dir/descriptions', 'w').AndReturn(fake_file)
+ fake_desc_file = io.BytesIO()
+ open('fake-dir/catalog', 'w').AndReturn(fake_file)
+ open('fake-dir/descriptions', 'w').AndReturn(fake_desc_file)
self.mox.ReplayAll()
- cfg.GenerateDescriptions('fake-dir')
+ cfg.GenerateCatalog('fake-dir')
if __name__ == '__main__':
Modified: csw/mgar/gar/v2/lib/python/models.py
===================================================================
--- csw/mgar/gar/v2/lib/python/models.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/models.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -511,9 +511,15 @@
Srv4FileStats.q.use_to_generate_catalogs==True,
)
select = sqlbuilder.Select(
- ['catalogname', 'version_string', 'pkgname', 'basename',
+ ['catalogname',
+ 'version_string',
+ 'pkgname',
+ 'basename',
'srv4_file_stats.md5_sum',
- 'size', 'deps', 'i_deps'],
+ 'size',
+ 'deps',
+ 'i_deps',
+ 'pkginfo_name'],
where=where,
orderBy='catalogname',
join=join)
Modified: csw/mgar/gar/v2/lib/python/representations.py
===================================================================
--- csw/mgar/gar/v2/lib/python/representations.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/representations.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -3,4 +3,4 @@
# Full catalog entry, enough to write a line of a catalog file.
CatalogEntry = collections.namedtuple(
'CatalogEntry', 'catalogname version pkgname basename '
- 'md5_sum size deps category i_deps')
+ 'md5_sum size deps category i_deps desc')
Modified: csw/mgar/gar/v2/lib/python/rest.py
===================================================================
--- csw/mgar/gar/v2/lib/python/rest.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/python/rest.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -247,7 +247,13 @@
logging.debug("SavePkgstats(): url=%r", url)
return self._CurlPut(url, [('pkgstats', cjson.encode(pkgstats))])
+ def GetCatalogForGeneration(self, catrel, arch, osrel):
+ url = (self.rest_url + self.PKGDB_APP + "/catalogs/%s/%s/%s/for-generation/"
+ % (catrel, arch, osrel))
+ data = urllib2.urlopen(url).read()
+ return cjson.decode(data)
+
class CachedPkgstats(object):
"""Class responsible for holding and caching package stats.
Modified: csw/mgar/gar/v2/lib/web/pkgdb_web.py
===================================================================
--- csw/mgar/gar/v2/lib/web/pkgdb_web.py 2013-05-14 20:59:53 UTC (rev 21085)
+++ csw/mgar/gar/v2/lib/web/pkgdb_web.py 2013-05-14 23:15:16 UTC (rev 21086)
@@ -527,16 +527,28 @@
osrel_name, arch_name, catrel_name)
rows = list(models.GetCatalogGenerationResult(sqo_osrel, sqo_arch, sqo_catrel))
def GenCatalogEntry(row):
+ i_deps = cjson.decode(row[7])
+ if i_deps:
+ i_deps_str = "|".join(i_deps)
+ else:
+ i_deps_str = "none"
+ deps_with_desc = cjson.decode(row[6])
+ deps = [x[0] for x in deps_with_desc if x[0].startswith('CSW')]
+ if deps:
+ deps_str = '|'.join(deps)
+ else:
+ deps_str = "none"
entry = representations.CatalogEntry(
- catalogname=row[0],
- version=row[1],
- pkgname=row[2],
- basename=row[3],
- md5_sum=row[4],
- size=row[5],
- deps="|".join([x[0] for x in cjson.decode(row[6])]),
- category="none",
- i_deps="|".join(cjson.decode(row[7])),
+ catalogname=row[0], # 0
+ version=row[1], # 1
+ pkgname=row[2], # 2
+ basename=row[3], # 3
+ md5_sum=row[4], # 4
+ size=str(row[5]), # 5
+ deps=deps_str, # 6
+ category="none", # 7
+ i_deps=i_deps_str, # 8
+ desc=row[8], # 9
)
return entry
entries_list = [GenCatalogEntry(row) for row in rows]
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list