SF.net SVN: gar:[23102] csw/mgar/gar/v2/lib/python

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sun Mar 2 12:16:35 CET 2014


Revision: 23102
          http://sourceforge.net/p/gar/code/23102
Author:   wahwah
Date:     2014-03-02 11:16:33 +0000 (Sun, 02 Mar 2014)
Log Message:
-----------
find-obsolete-pkgs: Adapt to the new code

Additional changes:

- Instead of sometimes using CatSubSet and sometimes pkgname, let's always use
  CatSubSet
- Split the code into smaller functions

TODO ideas:
1. Provide a per-maintainer report, so people can know what they can drop or
   rebuild.
2. Use the for-generation REST endpoint, because it's fast and provides all
   the information necessary for calculation, and avoids the problem with
   reverse dependency cache being stale.

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/catalog_notifier.py
    csw/mgar/gar/v2/lib/python/find_obsolete_pkgs.py
    csw/mgar/gar/v2/lib/python/safe_remove_package.py

Modified: csw/mgar/gar/v2/lib/python/catalog_notifier.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog_notifier.py	2014-03-01 22:57:25 UTC (rev 23101)
+++ csw/mgar/gar/v2/lib/python/catalog_notifier.py	2014-03-02 11:16:33 UTC (rev 23102)
@@ -113,6 +113,7 @@
       )
       for label, pkg_list in labels_and_lists:
         for pkg in pkg_list:
+          # This is inefficient: it pulls down the whole package just to fetch the maintainer's email.
           maintainer = rest_client.GetMaintainerByMd5(pkg["md5sum"])
           maintainer_email = maintainer["maintainer_email"]
           pkgs_by_maintainer.setdefault(maintainer_email, {})

Modified: csw/mgar/gar/v2/lib/python/find_obsolete_pkgs.py
===================================================================
--- csw/mgar/gar/v2/lib/python/find_obsolete_pkgs.py	2014-03-01 22:57:25 UTC (rev 23101)
+++ csw/mgar/gar/v2/lib/python/find_obsolete_pkgs.py	2014-03-02 11:16:33 UTC (rev 23102)
@@ -1,110 +1,203 @@
 #!/opt/csw/bin/python -t -O
+"""Compare two catalog releases and show:
 
-''' Compare two different catalog releases and show:
-    - Which packages need rebuilding (so they don't depend on the _stub any more)
-    - Which _stub packages can be removed
-    - Which packages can declare incompatibility on the old packages, so that the old packages can be removed
+- Which packages need rebuilding (so they don't depend on the _stub any more)
+- Which _stub packages can be removed
+- Which packages can declare incompatibility on the old packages, so that the
+  old packages can be removed
 
-    set PYTHONPATH=/path/to/.buildsys/v2
-    alternatively can used: sys.path.append('/path/to/.buildsys/v2')
+set PYTHONPATH=/path/to/.buildsys/v2
+alternatively can used: sys.path.append('/path/to/.buildsys/v2')
 
-    ToDO:
-    - read the sub hint from catalog
-      * if stub has consumer -> rebuild
-      * if stub has no consumer and the stub is present in the old/"from" catalog -> remove
-      * if stub has no consumer and the stub does not yet present in the old/"from" catalog -> keep
-'''
+Overview: 
+- read the stub hint from catalog
+  * if any packages depend on the stub -> rebuild them
+  * if nothing depends on the stub and the stub is present in the old/"from"
+    catalog -> remove
+  * if nothing depends on the stub and the stub does not yet present in the
+    old/"from" catalog -> keep
 
-import optparse
-import pprint
+TODO:
+- Group packages to rebuild by maintainer, and provide a per-maintainer report.
+"""
+
+import cjson
 import gdbm
 import logging
-import sys
+import optparse
 import os
+import pprint
+import re
 import subprocess
+import sys
+
 from collections import namedtuple
-import re
-import cjson
 
-from lib.python import rest
 from lib.python import common_constants
 from lib.python import configuration
+from lib.python import rest
 from lib.python.safe_remove_package import RevDeps
 
-logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
+logging.basicConfig(format='%(levelname)s %(asctime)s %(filename)s:%(lineno)d %(message)s')
 logger = logging.getLogger(__name__)
 
-oldcatrel = ''
-newcatrel = ''
 datadir=configuration.CHECKPKG_DIR % os.environ
 # fn_revdep = os.path.join(datadir,'RevDeps_%s_%s_%s.json')
 fn_cat = os.path.join(datadir,'catalog_%s_%s_%s.json')
 fn_removelst = 'PkgsToRemoveFrom_%s_%s_%s.lst'
 fn_rebuildlst = 'PkgsToRebuildFrom_%s_%s_%s.lst'
-revdeplst = {}
 
-CatSubSet = namedtuple('CatSubSet','catalogname md5_sum version dependlist')
+CatSubSet = namedtuple('CatSubSet',
+                       'pkgname, catalogname, md5_sum, version, dependlist')
 
 class CompCatalog(object):
 
-    def __init__(self,name,arch,osrel):
-        self.rest_client = rest.RestClient()
+    def __init__(self, name, arch, osrel):
+        config = configuration.GetConfig()
+        username, password = rest.GetUsernameAndPassword()
+        self.rest_client = rest.RestClient(
+            pkgdb_url=config.get('rest', 'pkgdb'),
+            releases_url=config.get('rest', 'releases'),
+            username=username,
+            password=password)
+
         self.catrel = name
         self.arch = arch
         self.osrel = osrel
 
     def __getCat(self, name,arch,osrel):
         ''' get dependcy list from catalog, read cached list if available '''
-        catlst = {}
-        if os.path.exists(fn_cat % (name,osrel,arch)):
+        pkg_by_pkgname = {}
+        disk_cache_path = fn_cat % (name, osrel, arch)
+        if os.path.exists(disk_cache_path):
             logger.info('CompCatalog::getCat: use cached data: %s' % (fn_cat % (name,osrel,arch)))
-            with open(fn_cat % (name,osrel,arch), "r") as fd:
-                catlst = cjson.decode(fd.read())
+            with open(disk_cache_path) as fd:
+                data = cjson.decode(fd.read())
+            pkg_by_pkgname = {}
+            for pkgname, lst in data.iteritems():
+              lst[4] = tuple(lst[4])
+              pkg_by_pkgname[pkgname] = CatSubSet(*lst)
         else:
             cat = self.rest_client.GetCatalog(name,arch,osrel)
             for pkg in cat:
+                pkgitems = self.rest_client.GetPkgstatsByMd5(pkg['md5_sum'])
+                pkgname = pkgitems['basic_stats']['pkgname']
                 try:
-                    pkgitems = self.rest_client.GetPkgstatsByMd5(pkg['md5_sum'])
                     pkgdeplst = [ i[0] for i in pkgitems['depends']]
-                    catlst[pkgitems['basic_stats']['pkgname']] = CatSubSet(pkg['catalogname'],pkg['md5_sum'],pkg['version'],pkgdeplst)
-                except Exception as inst:
-                    logger.error("CompCatalog::getPkgStat: %s %s %s" , type(inst),pkg['catalogname'],pkg['md5_sum'])
-            with open(fn_cat % (name,osrel,arch), "w") as fd:
-                fd.write(cjson.encode(catlst))
-                logger.info('CompCatalog::getCat: write cache file: %s' % (fn_cat % (name,osrel,arch)))
-        return catlst
+                    pkg_by_pkgname[pkgname] = CatSubSet(pkgname, pkg['catalogname'],
+                                                        pkg['md5_sum'], pkg['version'],
+                                                        tuple(pkgdeplst))
+                except Exception as exc:
+                    logger.error("CompCatalog::getPkgStat: %s %s %s",
+                                 type(exc), pkg.catalogname, pkg.md5_sum)
+            with open(disk_cache_path, "w") as fd:
+                fd.write(cjson.encode(pkg_by_pkgname))
+                logger.info('CompCatalog::getCat: write cache file: %s'
+                            % (fn_cat % (name,osrel,arch)))
+        return pkg_by_pkgname
 
     def getCatalog(self):
         return self.__getCat(self.catrel,self.arch,self.osrel)
 
+
 def processCat(catrel,arch,osrel):
-    revdeplst = {}
-
-    logger.info("processCat: -> %s %s %s" % (catrel, arch, osrel))
+    logger.info("processCat: -> %r %r %r" % (catrel, arch, osrel))
     cc = CompCatalog(catrel,arch,osrel)
-    catlst = cc.getCatalog()
-    logger.info("processCat: iterate on %s" % (catrel))
+    pkg_by_pkgname = cc.getCatalog()
+    logger.info("processCat: iterate on %r" % (catrel))
 
-    ''' build reverse dependency list '''
-    rd = RevDeps()
-    for p in catlst.keys():
-        revdeplst[p] = rd.RevDepsByPkg(catrel,arch,osrel,p)
+    # build reverse dependency list
+    rev_deps_access = RevDeps()
+    rev_deps_by_pkg = {}
+    for pkgname in pkg_by_pkgname:
+        pkg = pkg_by_pkgname[pkgname]
+        # logger.info('pkg: %r', pkg)
+        # RevDepsByPkg returns only md5 sums and pkgnames, so we need to map
+        # them back to CatSubSet
+        revdeps = rev_deps_access.RevDepsByPkg(catrel, arch, osrel, pkgname)
+        # rev_deps_by_pkg[pkg] = [pkg_by_pkgname[pkgname] for _, pkgname in revdeps]
+        revdep_pkgs = []
+        for _, pkgname in revdeps:
+          revdep_pkg = pkg_by_pkgname[pkgname]
+          revdep_pkgs.append(revdep_pkg)
+        try:
+          rev_deps_by_pkg[pkg] = revdep_pkgs
+        except TypeError as exc:
+          logging.fatal('pkg: %r', pkg)
+          raise
 
-    logger.info("processCat: <- %s %s %s" % (catrel, arch, osrel))
-    return catlst, revdeplst
+    logger.info("processCat: <- %r %r %r" % (catrel, arch, osrel))
+    return pkg_by_pkgname, rev_deps_by_pkg
 
-def main():
+
+def ComputeRemoveAndRebuild(oldcatrel, newcatrel, arch, osrel):
+    newcatlst, newrevdeplst = processCat(newcatrel,arch,osrel)
+    oldcatlst, oldrevdeplst = processCat(oldcatrel,arch,osrel)
+
+    to_remove_candidates = []
+    rebuildlst = set()
+    logger.debug(' process dependecies in %s' % newcatrel)
+    for pkg in newrevdeplst:
+        # Checking stub packages
+        catalogname = pkg.catalogname
+        if catalogname.endswith("_stub"):
+            if not newrevdeplst[pkg]:
+                # Stub has no reverse dependencies, so it will be considered for removal.
+                to_remove_candidates.append(pkg)
+                logger.debug("{0}({1}) has no consumer".format(pkg.pkgname, catalogname))
+            else:
+                # Reverse dependencies of this stub need to be rebuilt.
+                for new_rev_dep in newrevdeplst[pkg]:
+                    is_newpkg_stub = new_rev_dep.catalogname.endswith("_stub")
+                    if new_rev_dep not in rebuildlst and not is_newpkg_stub:
+                          rebuildlst.add(new_rev_dep)
+                          logger.info(" REBUILD: {3}, it still depends on {0} ({1}) in {2}"
+                                .format(pkg.pkgname, pkg.catalogname, newcatrel,
+                                  '%s/%s' % (new_rev_dep.pkgname, new_rev_dep.catalogname)))
+    pkgs_to_drop = []
+    logger.debug(' process dependecies in %s' % newcatrel)
+
+    for pkg in to_remove_candidates:
+        if pkg in oldrevdeplst:
+            # this package is already a _stub in the old catalog,
+            # and therefore can be dropped
+            pkgs_to_drop.append(pkg)
+            logger.info(" DROP   : {0}/{1} from {2}"
+                        .format(pkg.pkgname, pkg.catalogname, newcatrel))
+        else:
+            logger.info(" KEEP   : {0} not a _stub package in {1}"
+                        .format(pkg.pkgname, oldcatrel))
+    return pkgs_to_drop, rebuildlst
+
+
+def WriteToTextFiles(pkgs_to_drop, pkgs_to_rebuild, newcatrel, arch, osrel):
+    print ('write %s' % (fn_removelst % (newcatrel,osrel,arch)))
+    with open(fn_removelst % (newcatrel, osrel, arch), "w") as fd:
+        for pkg in sorted(pkgs_to_drop, key=lambda p: p.catalogname):
+            fd.write(pkg.catalogname + '\n')
+    logger.info("number of packages to remove: %d" % len(pkgs_to_drop))
+    print ('write %s' % (fn_pkgs_to_rebuild % (newcatrel,osrel,arch)))
+    with open(fn_pkgs_to_rebuild % (newcatrel,osrel,arch), "w") as fd:
+        for pkg in sorted(pkgs_to_rebuild, key=lambda p: p.catalogname):
+            fd.write(pkg.catalogname+'\n')
+    logger.info("packages to rebuild: %d" % len(pkgs_to_rebuild))
+
+
+def GetCLIOptions():
     parser = optparse.OptionParser()
     parser.add_option("--debug", dest="debug", action="store_true")
     parser.add_option("--verbose", dest="verbose", action="store_true")
-    parser.add_option("--to-catalog-release", dest="newcatalog", default='kiel',
+    parser.add_option("--to-catalog-release", dest="newcatalog", default='unstable',
                     help='set name of catalog to fetch', metavar = 'catalogname')
-    parser.add_option("--from-catalog-release", dest="oldcatalog", default='dublin',
-                    help='set name of previous (older) catalog to fetch', metavar = 'old catalogname')
+    parser.add_option("--from-catalog-release", dest="oldcatalog", default='kiel',
+                    help='set name of previous (older) catalog to fetch',
+                    metavar = 'old catalogname')
     parser.add_option("--os-arch", dest="arch", default='i386',
-                    help='set name of architecture (sparc|i386) to fetch', metavar = 'OS Architecture')
+                    help='set name of architecture (sparc|i386) to fetch',
+                    metavar = 'OS Architecture')
     parser.add_option("--os-release", dest="osrel", default='SunOS5.10',
-                    help='set os release to fetch (SunOS5.10|SunOS5.11)', metavar = 'OS Release')
+                    help='set os release to fetch (SunOS5.10|SunOS5.11)',
+                    metavar = 'OS Release')
     options, args = parser.parse_args()
     opterror = False
     if options.verbose:
@@ -133,55 +226,15 @@
         opterror = True
     if opterror:
         sys.exit(1)
+    return oldcatrel, newcatrel, arch, osrel
 
-    newcatlst, newrevdeplst = processCat(newcatrel,arch,osrel)
-    oldcatlst, oldrevdeplst = processCat(oldcatrel,arch,osrel)
 
-    to_remove_candidates = []
-    rebuildlst = []
-    logger.debug(' process dependecies in %s' % newcatrel)
-    for p in newrevdeplst.keys():
-        ''' check stub packages '''
-        catalogname = CatSubSet(*newcatlst[p]).catalogname
-        if catalogname.endswith("_stub"):
-            if not newrevdeplst[p]:
-                to_remove_candidates.append(p)
-                logger.debug("{0}({1}) has no consumer".format(p,catalogname))
-            else:
-                for dp in newrevdeplst[p]:
-                    dpkg = dp[1]
-                    if dpkg not in rebuildlst and not CatSubSet(*newcatlst[dpkg]).catalogname.endswith("_stub"):
-                          rebuildlst.append(dpkg)
-                          logger.info(" REBUILD: {3}\n\t\t\tthese still use {0} ({1}) in {2}\n"
-                                .format(p,CatSubSet(*newcatlst[p]).catalogname,newcatrel,
-                                        [ dp[1] for dp in newrevdeplst[p]]))
-    reallyremovelst = []
-    logger.debug(' process dependecies in %s' % newcatrel)
+def main():
+    oldcatrel, newcatrel, arch, osrel = GetCLIOptions()
+    reallyremovelst, rebuildlst = ComputeRemoveAndRebuild(oldcatrel, newcatrel,
+                                                          arch, osrel)
+    WriteToTextFiles(reallyremovelst, rebuildlst, newcatrel, arch, osrel)
 
-    for p in to_remove_candidates:
-        if p in oldrevdeplst: # this package is already a _stub in oldcatalog -> drop
-            reallyremovelst.append(p)
-            logger.info(" DROP   : %s from %s" % (p,newcatrel))
-        else:
-            logger.info(" KEEP   : {0} not a _stub package in {1}".format(p,oldcatrel))
 
-    print ('write %s' % (fn_removelst % (newcatrel,osrel,arch)))
-    rmcnt = 0
-    remove_catnames = sorted(CatSubSet(*newcatlst[rp]).catalogname for rp in reallyremovelst)
-    with open(fn_removelst % (newcatrel,osrel,arch), "w") as fd:
-        for c in remove_catnames:
-            fd.write(c+'\n')
-            rmcnt = rmcnt + 1
-    logger.info("packages to remove: %d" % rmcnt)
-    print ('write %s' % (fn_rebuildlst % (newcatrel,osrel,arch)))
-    rbcnt = 0
-    rebuild_catnames = sorted(CatSubSet(*newcatlst[rp]).catalogname for rp in rebuildlst)
-    with open(fn_rebuildlst % (newcatrel,osrel,arch), "w") as fd:
-        for c in rebuild_catnames:
-            fd.write(c+'\n')
-            rbcnt = rbcnt + 1
-    logger.info("packages to rebuild: %d" % rbcnt)
-
-
 if __name__ == '__main__':
     main()

Modified: csw/mgar/gar/v2/lib/python/safe_remove_package.py
===================================================================
--- csw/mgar/gar/v2/lib/python/safe_remove_package.py	2014-03-01 22:57:25 UTC (rev 23101)
+++ csw/mgar/gar/v2/lib/python/safe_remove_package.py	2014-03-02 11:16:33 UTC (rev 23102)
@@ -45,9 +45,10 @@
 UNSTABLE = "unstable"
 EVERY_N_DOTS = 100
 datadir = configuration.CHECKPKG_DIR % os.environ
-fn_revdeps = os.path.join(datadir,'revdeps-%s-%s-%s.json')
-fn_pkgstatsdb = os.path.join(datadir,"pkgstats")
+fn_revdeps = os.path.join(datadir, 'revdeps-%s-%s-%s.json')
+fn_pkgstatsdb = os.path.join(datadir, 'pkgstats')
 
+
 class Error(Exception):
   """A generic error."""
 
@@ -62,10 +63,18 @@
     RevDepsSet = namedtuple('RevDepsSet','md5_sum pkgname')
   '''
 
-  def __init__(self):
+  def __init__(self, rest_client=None):
     self.cached_catalogs = {}
-    self.rest_client = rest.RestClient()
     self.cp = rest.CachedPkgstats(fn_pkgstatsdb)
+    self.rest_client = rest_client
+    if self.rest_client is None:
+      config = configuration.GetConfig()
+      username, password = rest.GetUsernameAndPassword()
+      self.rest_client = rest.RestClient(
+          pkgdb_url=config.get('rest', 'pkgdb'),
+          releases_url=config.get('rest', 'releases'),
+          username=username,
+          password=password)
 
   def MakeRevIndex(self, catrel, arch, osrel, quiet=False):
     key = (catrel, arch, osrel)
@@ -88,7 +97,7 @@
       short_data = self.cp.GetDeps(md5)
       pkgname = short_data["pkgname"]
       for dep_pkgname, _ in short_data["deps"]:
-        rev_dep_set = rev_deps.setdefault(dep_pkgname, list())
+        rev_dep_set = rev_deps.setdefault(dep_pkgname, [])
         rev_dep_set.append((md5, pkgname))
       if not quiet and not counter % EVERY_N_DOTS:
         sys.stdout.write(".")

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list