[csw-devel] SF.net SVN: gar:[8571] csw/mgar/gar/v2-checkpkg-stats

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Tue Feb 16 10:05:51 CET 2010


Revision: 8571
          http://gar.svn.sourceforge.net/gar/?rev=8571&view=rev
Author:   wahwah
Date:     2010-02-16 09:05:51 +0000 (Tue, 16 Feb 2010)

Log Message:
-----------
mGAR v2-checkpkg-stats: Using yaml data for checks, all modules work except the library module, which is big and needs more work.

Modified Paths:
--------------
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-actionclasses.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-archall.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-license.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-missing-symbols.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-obsolete-deps.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-you-can-write-your-own.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
    csw/mgar/gar/v2-checkpkg-stats/lib/python/checkpkg.py
    csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg	2010-02-16 09:05:51 UTC (rev 8571)
@@ -130,8 +130,9 @@
 	shift
 fi
 if [[ "$1" == "-h" ]] ; then
-	print 'Usage: checkpkg [-e] pkg1 [pkg2 ....]'
-	print '   -e = "exit on warnings"'
+	print 'Usage: checkpkg [-d] [-e] pkg1 [pkg2 ....]'
+	print '   -d  display debug messages'
+	print '   -e  exit on warnings (soon to be obsolete)'
 	shift
 fi
 
@@ -543,13 +544,14 @@
 set_variables_for_individual_package_check "$f"
 
 test_suite_ok=1
-plugindir=${command_basedir}/checkpkg.d
+checkpkg_module_dir=${command_basedir}/checkpkg.d
 checkpkg_module_tag="checkpkg-"
+checkpkg_stats_basedir="${HOME}/.checkpkg/stats"
 
 # Cleaning up old *.pyc files which can cause grief.  This is because of the
 # move of Python libraries.
-for pyc_file in ${plugindir}/opencsw.pyc \
-                ${plugindir}/checkpkg.pyc; do
+for pyc_file in ${checkpkg_module_dir}/opencsw.pyc \
+                ${checkpkg_module_dir}/checkpkg.pyc; do
   if [ -f "${pyc_file}" ]; then
     echo "Removing old pyc file: '${pyc_file}'"
     rm "${pyc_file}"
@@ -564,12 +566,13 @@
 if [[ "${DEBUG}" != "" ]]; then
 	extra_options="--debug"
 fi
-debugmsg "plugindir: '$plugindir'"
+debugmsg "checkpkg_module_dir: '$checkpkg_module_dir'"
 log_files=""
 module_name_format="%-40s"
-if [[ -d "$plugindir" ]]; then
-	echo "Running modular tests"
-	for plugin in "${plugindir}/${checkpkg_module_tag}"*; do
+md5sums=`gmd5sum "$@" | awk '{print $1}'`
+if [[ -d "${checkpkg_module_dir}" ]]; then
+	echo "Running modular tests in ${checkpkg_module_dir}"
+	for plugin in "${checkpkg_module_dir}/${checkpkg_module_tag}"*; do
 		if [[ -x "${plugin}" ]]; then
 			plugin_base_name=`basename ${plugin}`
 			plugin_log="${EXTRACTDIR}/${plugin_base_name}.log"
@@ -577,12 +580,12 @@
 			plugin_name="`echo ${plugin} | sed -e 's+.*/checkpkg-++' | sed -e 's+\.py$++'`"
 			error_tag_file="tags.${plugin_name}"
 			printf "${BOLD}${module_name_format}${COLOR_RESET} running..." "${plugin_name}"
-			debugmsg "Executing: ${plugin} $extra_options -e \"${EXTRACTDIR}\" ${pkgnames}"
+			debugmsg "Executing: ${plugin} $extra_options -b \"${checkpkg_stats_basedir}\" -o \"${EXTRACTDIR}/${error_tag_file}\" ${md5sums}"
 			${plugin} \
 				$extra_options \
-				-e "${EXTRACTDIR}" \
+				-b "${checkpkg_stats_basedir}" \
 				-o "${EXTRACTDIR}/${error_tag_file}" \
-				${pkgnames} \
+				${md5sums} \
 				> "${plugin_log}" 2>&1
 			if [[ "$?" -ne 0 ]]; then
 				printf "\r${module_name_format} ${RED}[ERROR]${COLOR_RESET}        \\n" "${plugin_name}"
@@ -595,7 +598,7 @@
 		fi
 	done
 else
-	debugmsg "plugin dir ${plugindir} does not exist"
+	debugmsg "module dir ${checkpkg_module_dir} does not exist"
 fi
 
 for log_file in ${log_files}; do

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-actionclasses.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-actionclasses.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-actionclasses.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -22,13 +22,16 @@
 import opencsw
 
 
-def CheckActionClasses(pkg, debug):
+def CheckActionClasses(pkg_data, debug):
   """Checks the consistency between classes in the prototype and pkginfo."""
   errors = []
-  pkginfo = pkg.GetParsedPkginfo()
-  pkgmap = pkg.GetPkgmap()
+  pkginfo = pkg_data["pkginfo"]
+  pkgmap = pkg_data["pkgmap"]
   pkginfo_classes = set(re.split(opencsw.WS_RE, pkginfo["CLASSES"]))
-  pkgmap_classes = pkgmap.GetClasses()
+  pkgmap_classes = set()
+  for entry in pkgmap:
+    if entry["class"]:  # might be None
+      pkgmap_classes.add(entry["class"])
   only_in_pkginfo = pkginfo_classes.difference(pkgmap_classes)
   only_in_pkgmap = pkgmap_classes.difference(pkginfo_classes)
   for action_class in only_in_pkginfo:
@@ -46,10 +49,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
+  md5sums = args
+  # CheckpkgManager class abstracts away things such as the collection of
+  # results.
   check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
-                                           options.extractdir,
-                                           pkgnames,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
   # Registering functions defined above.
   check_manager.RegisterIndividualCheck(CheckActionClasses)

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-archall.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-archall.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-archall.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -16,11 +16,11 @@
 sys.path.append(os.path.join(*path_list))
 import checkpkg
 
-def CheckArchitectureVsContents(pkg, debug):
+def CheckArchitectureVsContents(pkg_data, debug):
   """Verifies the relationship between package contents and architecture."""
   errors = []
-  binaries = pkg.ListBinaries()
-  pkginfo = pkg.GetParsedPkginfo()
+  binaries = pkg_data["binaries"]
+  pkginfo = pkg_data["pkginfo"]
   arch = pkginfo["ARCH"]
   if binaries and arch == "all":
     for binary in binaries:
@@ -38,10 +38,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
+  md5sums = args
+  # CheckpkgManager class abstracts away things such as the collection of
+  # results.
   check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
-                                           options.extractdir,
-                                           pkgnames,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
 
   check_manager.RegisterIndividualCheck(CheckArchitectureVsContents)

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-license.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-license.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-license.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -19,16 +19,17 @@
 
 LICENSE_TMPL = "/opt/csw/share/doc/%s/license"
 
-def CheckLicenseFile(pkg, debug):
+def CheckLicenseFile(pkg_data, debug):
   """Checks for the presence of the license file."""
   errors = []
-  pkgmap = pkg.GetPkgmap()
-  catalogname = pkg.GetCatalogname()
+  pkgmap = pkg_data["pkgmap"]
+  catalogname = pkg_data["basic_stats"]["catalogname"]
   license_path = LICENSE_TMPL % catalogname
-  if license_path not in pkgmap.entries_by_path:
+  pkgmap_paths = [x["path"] for x in pkgmap]
+  if license_path not in pkgmap_paths:
     errors.append(
         checkpkg.CheckpkgTag(
-          pkg.pkgname,
+          pkg_data["basic_stats"]["pkgname"],
           "license-missing",
           msg="See http://sourceforge.net/apps/trac/gar/wiki/CopyRight"))
   return errors
@@ -36,10 +37,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
+  md5sums = args
+  # CheckpkgManager class abstracts away things such as the collection of
+  # results.
   check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
-                                           options.extractdir,
-                                           pkgnames,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
   # Registering functions defined above.
   check_manager.RegisterIndividualCheck(CheckLicenseFile)

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-missing-symbols.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-missing-symbols.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-missing-symbols.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -22,21 +22,13 @@
 
 # Defining checking functions.
 
-def CheckForMissingSymbols(pkg, debug):
+def CheckForMissingSymbols(pkg_data, debug):
   """Looks for "symbol not found" in ldd -r output."""
   errors = []
-  binaries = pkg.ListBinaries()
+  binaries = pkg_data["binaries"]
   symbol_re = re.compile(r"symbol not found:")
   for binary in binaries:
-    # this could be potentially moved into the DirectoryFormatPackage class.
-    args = ["ldd", "-r", binary]
-    ldd_proc = subprocess.Popen(
-        args,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-    stdout, stderr = ldd_proc.communicate()
-    retcode = ldd_proc.wait()
-    lines = stdout.splitlines()
+    lines = pkg_data["ldd_dash_r"][binary]
     missing_symbols = False
     for line in lines:
       if re.search(symbol_re, line):
@@ -49,12 +41,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
+  md5sums = args
   # CheckpkgManager class abstracts away things such as the collection of
   # results.
   check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
-                                           options.extractdir,
-                                           pkgnames,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
   # Registering functions defined above.
   check_manager.RegisterIndividualCheck(CheckForMissingSymbols)

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-obsolete-deps.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-obsolete-deps.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-obsolete-deps.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -8,6 +8,8 @@
 import os.path
 import sys
 
+CHECKPKG_MODULE_NAME = "obsolete dependencies"
+
 # The following bit of code sets the correct path to Python libraries
 # distributed with GAR.
 path_list = [os.path.dirname(__file__),
@@ -26,10 +28,10 @@
     },
 }
 
-def CheckObsoleteDeps(pkg, debug):
+def CheckObsoleteDeps(pkg_data, debug):
   """Checks for obsolete dependencies."""
   errors = []
-  deps = set(pkg.GetDependencies())
+  deps = set(pkg_data["depends"])
   obsolete_pkg_deps = deps.intersection(set(OBSOLETE_DEPS))
   if obsolete_pkg_deps:
     for obsolete_pkg in obsolete_pkg_deps:
@@ -50,10 +52,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
-  check_manager = checkpkg.CheckpkgManager("obsolete dependencies",
-                                           options.extractdir,
-                                           pkgnames,
+  md5sums = args
+  # CheckpkgManager class abstracts away things such as the collection of
+  # results.
+  check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
   check_manager.RegisterIndividualCheck(CheckObsoleteDeps)
   # Running the checks, reporting and exiting.

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-you-can-write-your-own.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-you-can-write-your-own.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg.d/checkpkg-you-can-write-your-own.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -21,7 +21,7 @@
 # Defining the checking functions.  They come in two flavors: individual
 # package checks and set checks.
 
-def MyCheckForAsinglePackage(pkg, debug):
+def MyCheckForAsinglePackage(pkg_data, debug):
   """Checks an individual package.
   
   Gets a DirctoryFormatPackage as an argument, and returns a list of errors.
@@ -40,11 +40,13 @@
   # Here's how to report an error:
   something_is_wrong = False
   if something_is_wrong:
-    errors.append(checkpkg.CheckpkgTag(pkg.pkgname, "example-problem", "thing"))
+    errors.append(checkpkg.CheckpkgTag(
+      pkg_data["basic_stats"]["pkgname"],
+      "example-problem", "thing"))
   return errors
 
 
-def MyCheckForAsetOfPackages(pkgs, debug):
+def MyCheckForAsetOfPackages(pkgs_data, debug):
   """Checks a set of packages.
 
   Sometimes individual checks aren't enough. If you need to write code which
@@ -59,12 +61,12 @@
 
 def main():
   options, args = checkpkg.GetOptions()
-  pkgnames = args
+  md5sums = args
   # CheckpkgManager class abstracts away things such as the collection of
   # results.
   check_manager = checkpkg.CheckpkgManager(CHECKPKG_MODULE_NAME,
-                                           options.extractdir,
-                                           pkgnames,
+                                           options.stats_basedir,
+                                           md5sums,
                                            options.debug)
   # Registering functions defined above.
   check_manager.RegisterIndividualCheck(MyCheckForAsinglePackage)

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -5,15 +5,12 @@
 # Collects statistics about a package and saves to a directory, for later use
 # by checkpkg modules.
 
-import copy
-import errno
 import logging
 import optparse
 import os
 import os.path
 import subprocess
 import sys
-import yaml
 
 # The following bit of code sets the correct path to Python libraries
 # distributed with GAR.
@@ -24,145 +21,6 @@
 import opencsw
 
 
-STATS_VERSION = 1L
-
-
-class PackageStats(object):
-  """Collects stats about a package and saves it.
-
-  base-stats.yml
-  binaries.yml
-  """
-
-  def __init__(self, srv4_pkg):
-    self.srv4_pkg = srv4_pkg
-    self.md5sum = None
-    self.dir_format_pkg = None
-    self.stats_path = None
-
-  def GetMd5sum(self):
-    if not self.md5sum:
-    	self.md5sum = self.srv4_pkg.GetMd5sum()
-    return self.md5sum
-
-  def GetStatsPath(self, home=None):
-    if not self.stats_path:
-      if not home:
-        home = os.environ["HOME"]
-      md5sum = self.GetMd5sum()
-      two_chars = md5sum[0:2]
-      parts = [home, ".checkpkg", "stats", two_chars, md5sum]
-      self.stats_path = os.path.join(*parts)
-    return self.stats_path
-
-  def StatsExist(self):
-    """Checks if statistics of a package exist.
-
-    Returns:
-      bool
-    """
-    if not self.StatsDirExists():
-    	return False
-    # More checks can be added in the future.
-    return True
-
-  def StatsDirExists(self):
-    return os.path.isdir(self.GetStatsPath())
-
-  def GetDirFormatPkg(self):
-    if not self.dir_format_pkg:
-    	self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
-    return self.dir_format_pkg
-
-  def MakeStatsDir(self):
-    """mkdir -p equivalent.
-
-    http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
-    """
-    stats_path = self.GetStatsPath()
-    try:
-      os.makedirs(stats_path)
-    except OSError, e:
-      if e.errno == errno.EEXIST:
-      	pass
-      else:
-      	raise
-
-  def GetBinaryDumpInfo(self):
-    dir_pkg = self.GetDirFormatPkg()
-    # Binaries. This could be split off to a separate function.
-    # man ld.so.1 for more info on this hack
-    env = copy.copy(os.environ)
-    env["LD_NOAUXFLTR"] = "1"
-    binaries_dump_info = []
-    for binary in dir_pkg.ListBinaries():
-      binary_abs_path = os.path.join(dir_pkg.directory, "root", binary)
-      binary_base_name = os.path.basename(binary)
-      args = [checkpkg.DUMP_BIN, "-Lv", binary_abs_path]
-      dump_proc = subprocess.Popen(args, stdout=subprocess.PIPE, env=env)
-      stdout, stderr = dump_proc.communicate()
-      ret = dump_proc.wait()
-      binary_data = checkpkg.ParseDumpOutput(stdout)
-      binary_data["path"] = binary
-      binary_data["soname_guessed"] = False
-      binary_data["base_name"] = binary_base_name
-      if checkpkg.SONAME not in binary_data:
-        logging.debug("The %s binary doesn't provide a SONAME. "
-                      "(It might be an executable)",
-                     binary_base_name)
-        # The binary doesn't tell its SONAME.  We're guessing it's the
-        # same as the base file name.
-        binary_data[checkpkg.SONAME] = binary_base_name
-        binary_data["soname_guessed"] = True
-      binaries_dump_info.append(binary_data)
-    return binaries_dump_info
-
-  def GetBasicStats(self):
-    dir_pkg = self.GetDirFormatPkg()
-    basic_stats = {}
-    basic_stats["stats_version"] = STATS_VERSION
-    basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
-    basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
-    basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(basic_stats["pkg_basename"])
-    basic_stats["pkgname"] = dir_pkg.pkgname
-    basic_stats["catalogname"] = dir_pkg.GetCatalogname()
-    return basic_stats
-
-  def GetOverrides(self):
-    dir_pkg = self.GetDirFormatPkg()
-    overrides = dir_pkg.GetOverrides()
-    def OverrideToDict(override):
-      d = {}
-      d["pkgname"] = override.pkgname
-      d["tag_name"] = override.tag_name
-      d["tag_info"] = override.tag_info
-      return d
-    overrides_simple = [OverrideToDict(x) for x in overrides]
-    return overrides_simple
-
-  def CollectStats(self):
-    stats_path = self.GetStatsPath()
-    self.MakeStatsDir()
-    dir_pkg = self.GetDirFormatPkg()
-    self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
-    self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
-    self.DumpObject(dir_pkg.ListBinaries(), "binaries")
-    self.DumpObject(dir_pkg.GetDependencies(), "depends")
-    self.DumpObject(dir_pkg.GetAllFilenames(), "all_filenames")
-    self.DumpObject(checkpkg.GetIsalist(), "isalist")
-    self.DumpObject(self.GetOverrides, "overrides")
-    self.DumpObject(self.GetBasicStats(), "basic_stats")
-    self.DumpObject(self.GetBinaryDumpInfo(), "binaries_dump_info")
-
-  def DumpObject(self, obj, name):
-    stats_path = self.GetStatsPath()
-    out_file_name = os.path.join(self.GetStatsPath(), "%s.yml" % name)
-    logging.debug("DumpObject(): writing %s", repr(out_file_name))
-    f = open(out_file_name, "w")
-    f.write(yaml.safe_dump(obj))
-    f.close()
-
-
 def main():
   debug = True
   logging.basicConfig(level=logging.DEBUG)
@@ -172,7 +30,7 @@
   logging.info("Collecting statistics about given package files.")
   logging.debug("args: %s", args)
   packages = [opencsw.CswSrv4File(x, debug) for x in args]
-  stats_list = [PackageStats(pkg) for pkg in packages]
+  stats_list = [checkpkg.PackageStats(pkg) for pkg in packages]
   for pkg_stats in stats_list:
   	pkg_stats.CollectStats()
 

Modified: csw/mgar/gar/v2-checkpkg-stats/lib/python/checkpkg.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/python/checkpkg.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/python/checkpkg.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -3,6 +3,8 @@
 # This is the checkpkg library, common for all checkpkg tests written in
 # Python.
 
+import copy
+import errno
 import itertools
 import logging
 import optparse
@@ -12,6 +14,7 @@
 import socket
 import sqlite3
 import subprocess
+import yaml
 from Cheetah import Template
 import opencsw
 
@@ -27,8 +30,8 @@
 DUMP_BIN = "/usr/ccs/bin/dump"
 
 SYSTEM_SYMLINKS = (
-    ("/opt/csw/bdb4", ["/opt/csw/bdb42"]),
-    ("/64", ["/amd64", "/sparcv9"]),
+    ("/opt/csw/bdb4",     ["/opt/csw/bdb42"]),
+    ("/64",               ["/amd64", "/sparcv9"]),
     ("/opt/csw/lib/i386", ["/opt/csw/lib"]),
 )
 
@@ -36,10 +39,10 @@
 # Solaris 8 on i386.  It's okay if it's missing.
 ALLOWED_ORPHAN_SONAMES = set([u"libm.so.2"])
 DEPENDENCY_FILENAME_REGEXES = (
-    (r".*\.pl", u"CSWperl"),
-    (r".*\.pm", u"CSWperl"),
-    (r".*\.py", u"CSWpython"),
-    (r".*\.rb", u"CSWruby"),
+    (r".*\.pl$", u"CSWperl"),
+    (r".*\.pm$", u"CSWperl"),
+    (r".*\.py$", u"CSWpython"),
+    (r".*\.rb$", u"CSWruby"),
 )
 
 REPORT_TMPL = u"""#if $missing_deps or $surplus_deps or $orphan_sonames
@@ -76,7 +79,7 @@
 #end for
 #else
 #if $debug
-OK: $name found no problems.
+OK: $repr($name) module found no problems.
 #end if
 #end if
 """
@@ -107,16 +110,19 @@
 
 def GetOptions():
   parser = optparse.OptionParser()
-  parser.add_option("-e", dest="extractdir",
-                    help="The directory into which the package has been extracted")
+  parser.add_option("-b", dest="stats_basedir",
+                    help=("The base directory with package statistics "
+                          "in yaml format"))
   parser.add_option("-d", "--debug", dest="debug",
                     default=False, action="store_true",
                     help="Turn on debugging messages")
   parser.add_option("-o", "--output", dest="output",
                     help="Output error tag file")
   (options, args) = parser.parse_args()
-  if not options.extractdir:
-    raise ConfigurationError("ERROR: -e option is missing.")
+  if not options.stats_basedir:
+    raise ConfigurationError("ERROR: the -b option is missing.")
+  if not options.output:
+    raise ConfigurationError("ERROR: the -o option is missing.")
   # Using set() to make the arguments unique.
   return options, set(args)
 
@@ -561,11 +567,11 @@
 class CheckpkgManager(object):
   """Takes care of calling checking functions"""
 
-  def __init__(self, name, extractdir, pkgname_list, debug=False):
+  def __init__(self, name, stats_basedir, md5sum_list, debug=False):
     self.debug = debug
     self.name = name
-    self.extractdir = extractdir
-    self.pkgname_list = pkgname_list
+    self.md5sum_list = md5sum_list
+    self.stats_basedir = stats_basedir
     self.errors = []
     self.individual_checks = []
     self.set_checks = []
@@ -577,23 +583,24 @@
   def RegisterSetCheck(self, function):
     self.set_checks.append(function)
 
-  def GetDirectoryFormatPackages(self):
-    packages = []
-    for pkgname in self.pkgname_list:
-        pkg_path = os.path.join(self.extractdir, pkgname)
-        packages.append(opencsw.DirectoryFormatPackage(pkg_path))
-    return packages
+  def GetPackageStatsList(self):
+    stats_list = []
+    for md5sum in self.md5sum_list:
+      stats_list.append(PackageStats(None, self.stats_basedir, md5sum))
+    return stats_list
 
-  def GetAllTags(self, packages):
+  def GetAllTags(self, packages_data):
     errors = {}
-    for pkg in packages:
+    for pkg_data in packages_data:
       for function in self.individual_checks:
-        errors_for_pkg = function(pkg, debug=self.debug)
+        all_stats = pkg_data.GetAllStats()
+        errors_for_pkg = function(all_stats, debug=self.debug)
         if errors_for_pkg:
-          errors[pkg.pkgname] = errors_for_pkg
+          errors[all_stats["basic_stats"]["pkgname"]] = errors_for_pkg
     # Set checks
     for function in self.set_checks:
-      set_errors = function(packages, debug=self.debug)
+      set_errors = function([x.GetAllStats() for x in packages_data],
+                            debug=self.debug)
       if set_errors:
         # These were generated by a set, but are likely to be bound to specific
         # packages. We'll try to preserve the package assignments.
@@ -625,8 +632,8 @@
 
     Returns a tuple of an exit code and a report.
     """
-    packages = self.GetDirectoryFormatPackages()
-    errors = self.GetAllTags(packages)
+    packages_data = self.GetPackageStatsList()
+    errors = self.GetAllTags(packages_data)
     screen_report, tags_report = self.FormatReports(errors)
     exit_code = 0
     return (exit_code, screen_report, tags_report)
@@ -726,3 +733,201 @@
     logging.error("Calling isalist has failed.")
   isalist = re.split(r"\s+", stdout.strip())
   return isalist
+
+
+class PackageStats(object):
+  """Collects stats about a package and saves it."""
+  STATS_VERSION = 1L
+  # This list needs to be synchronized with the CollectStats() method.
+  STAT_FILES = [
+      "all_filenames",
+      "basic_stats",
+      "binaries",
+      "binaries_dump_info",
+      "depends",
+      "isalist",
+      "ldd_dash_r",
+      "overrides",
+      "pkginfo",
+      "pkgmap",
+  ]
+
+  def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None):
+    self.srv4_pkg = srv4_pkg
+    self.md5sum = md5sum
+    self.dir_format_pkg = None
+    self.stats_path = None
+    self.all_stats = {}
+    self.stats_basedir = stats_basedir
+    if not self.stats_basedir:
+      home = os.environ["HOME"]
+      parts = [home, ".checkpkg", "stats"]
+      self.stats_basedir = os.path.join(*parts)
+
+  def GetMd5sum(self):
+    if not self.md5sum:
+      self.md5sum = self.srv4_pkg.GetMd5sum()
+    return self.md5sum
+
+  def GetStatsPath(self):
+    if not self.stats_path:
+      md5sum = self.GetMd5sum()
+      two_chars = md5sum[0:2]
+      parts = [self.stats_basedir, two_chars, md5sum]
+      self.stats_path = os.path.join(*parts)
+    return self.stats_path
+
+  def StatsExist(self):
+    """Checks if statistics of a package exist.
+
+    Returns:
+      bool
+    """
+    if not self.StatsDirExists():
+      return False
+    # More checks can be added in the future.
+    return True
+
+  def StatsDirExists(self):
+    return os.path.isdir(self.GetStatsPath())
+
+  def GetDirFormatPkg(self):
+    if not self.dir_format_pkg:
+      self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
+    return self.dir_format_pkg
+
+  def MakeStatsDir(self):
+    """mkdir -p equivalent.
+
+    http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+    """
+    stats_path = self.GetStatsPath()
+    try:
+      os.makedirs(stats_path)
+    except OSError, e:
+      if e.errno == errno.EEXIST:
+        pass
+      else:
+        raise
+
+  def GetBinaryDumpInfo(self):
+    dir_pkg = self.GetDirFormatPkg()
+    # Binaries. This could be split off to a separate function.
+    # man ld.so.1 for more info on this hack
+    env = copy.copy(os.environ)
+    env["LD_NOAUXFLTR"] = "1"
+    binaries_dump_info = []
+    for binary in dir_pkg.ListBinaries():
+      binary_abs_path = os.path.join(dir_pkg.directory, "root", binary)
+      binary_base_name = os.path.basename(binary)
+      args = [DUMP_BIN, "-Lv", binary_abs_path]
+      dump_proc = subprocess.Popen(args, stdout=subprocess.PIPE, env=env)
+      stdout, stderr = dump_proc.communicate()
+      ret = dump_proc.wait()
+      binary_data = ParseDumpOutput(stdout)
+      binary_data["path"] = binary
+      binary_data["soname_guessed"] = False
+      binary_data["base_name"] = binary_base_name
+      if SONAME not in binary_data:
+        logging.debug("The %s binary doesn't provide a SONAME. "
+                      "(It might be an executable)",
+                     binary_base_name)
+        # The binary doesn't tell its SONAME.  We're guessing it's the
+        # same as the base file name.
+        binary_data[SONAME] = binary_base_name
+        binary_data["soname_guessed"] = True
+      binaries_dump_info.append(binary_data)
+    return binaries_dump_info
+
+  def GetBasicStats(self):
+    dir_pkg = self.GetDirFormatPkg()
+    basic_stats = {}
+    basic_stats["stats_version"] = self.STATS_VERSION
+    basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
+    basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
+    basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(basic_stats["pkg_basename"])
+    basic_stats["pkgname"] = dir_pkg.pkgname
+    basic_stats["catalogname"] = dir_pkg.GetCatalogname()
+    return basic_stats
+
+  def GetOverrides(self):
+    dir_pkg = self.GetDirFormatPkg()
+    overrides = dir_pkg.GetOverrides()
+    def OverrideToDict(override):
+      d = {}
+      d["pkgname"] = override.pkgname
+      d["tag_name"] = override.tag_name
+      d["tag_info"] = override.tag_info
+      return d
+    overrides_simple = [OverrideToDict(x) for x in overrides]
+    return overrides_simple
+
+  def GetLddMinusRlines(self):
+    """Returns ldd -r output."""
+    dir_pkg = self.GetDirFormatPkg()
+    binaries = dir_pkg.ListBinaries()
+    ldd_output = {}
+    for binary in binaries:
+      binary_abspath = os.path.join(dir_pkg.directory, "root", binary)
+      # this could be potentially moved into the DirectoryFormatPackage class.
+      # ldd needs the binary to be executable
+      os.chmod(binary_abspath, 0755)
+      args = ["ldd", "-r", binary_abspath]
+      ldd_proc = subprocess.Popen(
+          args,
+          stdout=subprocess.PIPE,
+          stderr=subprocess.PIPE)
+      stdout, stderr = ldd_proc.communicate()
+      retcode = ldd_proc.wait()
+      if retcode:
+        logging.error("%s returned an error: %s", args, stderr)
+      lines = stdout.splitlines()
+      ldd_output[binary] = lines
+    return ldd_output
+
+
+  def CollectStats(self):
+    stats_path = self.GetStatsPath()
+    self.MakeStatsDir()
+    dir_pkg = self.GetDirFormatPkg()
+    self.DumpObject(dir_pkg.GetAllFilenames(), "all_filenames")
+    self.DumpObject(self.GetBasicStats(), "basic_stats")
+    self.DumpObject(dir_pkg.ListBinaries(), "binaries")
+    self.DumpObject(self.GetBinaryDumpInfo(), "binaries_dump_info")
+    self.DumpObject(dir_pkg.GetDependencies(), "depends")
+    self.DumpObject(GetIsalist(), "isalist")
+    self.DumpObject(self.GetOverrides(), "overrides")
+    self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
+    self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
+    self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
+
+  def DumpObject(self, obj, name):
+    stats_path = self.GetStatsPath()
+    out_file_name = os.path.join(stats_path, "%s.yml" % name)
+    logging.debug("DumpObject(): writing %s", repr(out_file_name))
+    f = open(out_file_name, "w")
+    f.write(yaml.safe_dump(obj))
+    f.close()
+    self.all_stats[name] = obj
+
+  def GetAllStats(self):
+    if self.StatsExist():
+      self.all_stats = self.ReadSavedStats()
+    else:
+      self.CollectStats()
+    return self.all_stats
+
+  def ReadObject(self, name):
+    stats_path = self.GetStatsPath()
+    in_file_name = os.path.join(stats_path, "%s.yml" % name)
+    logging.debug("ReadObject(): reading %s", repr(in_file_name))
+    f = open(in_file_name, "r")
+    obj = yaml.safe_load(f)
+    f.close()
+    return obj
+
+  def ReadSavedStats(self):
+    all_stats = {}
+    for name in self.STAT_FILES:
+      all_stats[name] = self.ReadObject(name)
+    return all_stats

Modified: csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py	2010-02-16 05:46:13 UTC (rev 8570)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py	2010-02-16 09:05:51 UTC (rev 8571)
@@ -568,10 +568,14 @@
 
 
 class DirectoryFormatPackage(ShellMixin, object):
+  """Represents a package in the directory format.
 
+  Allows some read-write operations.
+  """
+
   def __init__(self, directory):
     self.directory = directory
-    self.pkgname = os.path.split(directory)[1]
+    self.pkgname = os.path.basename(directory)
     self.pkgpath = self.directory
     self.pkginfo_dict = None
     self.binaries = None


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list