[csw-devel] SF.net SVN: gar:[11224] csw/mgar/gar/v2

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sun Oct 10 22:38:42 CEST 2010


Revision: 11224
          http://gar.svn.sourceforge.net/gar/?rev=11224&view=rev
Author:   wahwah
Date:     2010-10-10 20:38:42 +0000 (Sun, 10 Oct 2010)

Log Message:
-----------
mGAR v2: More refactoring, needs testing and pychecker.

Modified Paths:
--------------
    csw/mgar/gar/v2/bin/analyze_module_results.py
    csw/mgar/gar/v2/bin/checkpkg_collect_stats.py
    csw/mgar/gar/v2/lib/python/catalog.py
    csw/mgar/gar/v2/lib/python/checkpkg.py
    csw/mgar/gar/v2/lib/python/checkpkg_test.py
    csw/mgar/gar/v2/lib/python/opencsw.py
    csw/mgar/gar/v2/lib/python/package.py
    csw/mgar/gar/v2/tests/run_tests.py

Added Paths:
-----------
    csw/mgar/gar/v2/lib/python/database.py
    csw/mgar/gar/v2/lib/python/package_stats.py

Modified: csw/mgar/gar/v2/bin/analyze_module_results.py
===================================================================
--- csw/mgar/gar/v2/bin/analyze_module_results.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/bin/analyze_module_results.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -17,6 +17,7 @@
 sys.path.append(os.path.join(*path_list))
 import checkpkg
 import overrides
+import package_stats
 
 BEFORE_OVERRIDES = """If any of the reported errors were false positives, you
 can override them pasting the lines below to the GAR recipe."""
@@ -48,7 +49,7 @@
   # It might be a good idea to store the error tags in the database and
   # eliminate the need to access the directory with the error tag files.
 
-  pkgstats = checkpkg.StatsListFromCatalog(filenames, options.catalog)
+  pkgstats = package_stats.StatsListFromCatalog(filenames, options.catalog)
   overrides_list = [pkg.GetSavedOverrides() for pkg in pkgstats]
   override_list = reduce(operator.add, overrides_list)
   error_tags = reduce(operator.add, [stat.GetSavedErrorTags() for stat in pkgstats])

Modified: csw/mgar/gar/v2/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2/bin/checkpkg_collect_stats.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/bin/checkpkg_collect_stats.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -21,6 +21,7 @@
 sys.path.append(os.path.join(*path_list))
 import checkpkg
 import opencsw
+import package_stats
 
 def main():
   parser = optparse.OptionParser()
@@ -43,7 +44,7 @@
     args_display = args_display[:5] + ["...more..."]
   file_list = args
   logging.debug("Processing: %s, please be patient", args_display)
-  stats_list = checkpkg.StatsListFromCatalog(
+  stats_list = package_stats.StatsListFromCatalog(
       file_list, options.catalog, options.debug)
   # Reversing the item order in the list, so that the pop() method can be used
   # to get packages, and the order of processing still matches the one in the

Modified: csw/mgar/gar/v2/lib/python/catalog.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/lib/python/catalog.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -1,7 +1,18 @@
 #!/usr/bin/env python2.6
 
+import os
 import re
+import logging
 
+
+class Error(Exception):
+  pass
+
+
+class CatalogLineParseError(Error):
+  pass
+
+
 class OpencswCatalogBuilder(object):
 
   def __init__(self, product_dir, catalog_dir):

Modified: csw/mgar/gar/v2/lib/python/checkpkg.py
===================================================================
--- csw/mgar/gar/v2/lib/python/checkpkg.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/lib/python/checkpkg.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -24,16 +24,18 @@
 import textwrap
 import yaml
 from Cheetah import Template
+import database
+
 import opencsw
 import overrides
 import package_checks
+import package_stats
 import models as m
 import configuration as c
 import tag
 
+
 DEBUG_BREAK_PKGMAP_AFTER = False
-DB_SCHEMA_VERSION = 5L
-PACKAGE_STATS_VERSION = 9L
 SYSTEM_PKGMAP = "/var/sadm/install/contents"
 NEEDED_SONAMES = "needed sonames"
 RUNPATH = "runpath"
@@ -194,58 +196,7 @@
   return m.group("username") if m else None
 
 
-class DatabaseClient(object):
-
-  CHECKPKG_DIR = ".checkpkg"
-  SQLITE3_DBNAME_TMPL = "checkpkg-db-%(fqdn)s"
-  TABLES_THAT_NEED_UPDATES = (m.CswFile,)
-  TABLES = TABLES_THAT_NEED_UPDATES + (
-            m.Pkginst,
-            m.CswConfig,
-            m.Srv4FileStats,
-            m.CheckpkgOverride,
-            m.CheckpkgErrorTag,
-            m.Architecture,
-            m.OsRelease,
-            m.Maintainer)
-  sqo_conn = None
-  db_path = None
-
-  def __init__(self, debug=False):
-    self.debug = debug
-
-  @classmethod
-  def GetDatabasePath(cls):
-    if not cls.db_path:
-      dbname_dict = {'fqdn': socket.getfqdn()}
-      db_filename = cls.SQLITE3_DBNAME_TMPL % dbname_dict
-      home_dir = os.environ["HOME"]
-      cls.db_path = os.path.join(home_dir, cls.CHECKPKG_DIR, db_filename)
-    return cls.db_path
-
-  @classmethod
-  def InitializeSqlobject(cls):
-    """Establishes a database connection and stores it as a class member.
-
-    The idea is to share the database connection between instances.  It would
-    be solved even better if the connection was passed to the class
-    constructor.
-    """
-    if not cls.sqo_conn:
-      db_path = cls.GetDatabasePath()
-      cls.sqo_conn = sqlobject.connectionForURI('sqlite:%s' % db_path)
-      sqlobject.sqlhub.processConnection = cls.sqo_conn
-
-  def CreateTables(self):
-    for table in self.TABLES:
-      table.createTable(ifNotExists=True)
-
-  def IsDatabaseGoodSchema(self):
-    good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
-    return good_version
-
-
-class SystemPkgmap(DatabaseClient):
+class SystemPkgmap(database.DatabaseClient):
   """A class to hold and manipulate the /var/sadm/install/contents file."""
 
   STOP_PKGS = ["SUNWbcp", "SUNWowbcp", "SUNWucb"]
@@ -457,7 +408,7 @@
     try:
       config_option = m.CswConfig.select(
           m.CswConfig.q.option_key==CONFIG_DB_SCHEMA).getOne()
-      config_option.int_value = DB_SCHEMA_VERSION
+      config_option.int_value = database.DB_SCHEMA_VERSION
     except sqlobject.main.SQLObjectNotFound, e:
       version = m.CswConfig(option_key=CONFIG_DB_SCHEMA,
                             int_value=DB_SCHEMA_VERSION)
@@ -578,7 +529,7 @@
     # subsequent checkpkg runs won't pick up the last change.
     # I don't expect pkgadd to run under 1s.
     fresh = f_mtime <= d_mtime
-    good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
+    good_version = self.GetDatabaseSchemaVersion() >= database.DB_SCHEMA_VERSION
     logging.debug("IsDatabaseUpToDate: good_version=%s, fresh=%s",
                   repr(good_version), repr(fresh))
     return fresh and good_version
@@ -775,7 +726,8 @@
     self.packages = []
 
   def GetPackageStatsList(self):
-    return [PackageStats(None, self.stats_basedir, x) for x in self.md5sum_list]
+    return [package_stats.PackageStats(None, self.stats_basedir, x)
+            for x in self.md5sum_list]
 
   def FormatReports(self, errors, messages, gar_lines):
     namespace = {
@@ -802,7 +754,7 @@
       else:
         if "package-set" not in errors:
           errors["package-set"] = []
-        errors["package-set"].append(error)
+        errors["package-set"].append(tag)
     return errors
 
   def GetOptimizedAllStats(self, stats_obj_list):
@@ -1083,425 +1035,6 @@
   return tuple(isalist)
 
 
-class PackageStats(DatabaseClient):
-  """Collects stats about a package and saves it.
-
-  TODO: Maintain a global database connection instead of creating one for each
-  instantiated object.
-  TODO: Store overrides in a separate table for performance.
-  """
-
-  def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None, debug=False):
-    super(PackageStats, self).__init__(debug=debug)
-    self.srv4_pkg = srv4_pkg
-    self.md5sum = md5sum
-    self.dir_format_pkg = None
-    self.all_stats = {}
-    self.stats_basedir = stats_basedir
-    self.db_pkg_stats = None
-    if not self.stats_basedir:
-      home = os.environ["HOME"]
-      parts = [home, ".checkpkg", "stats"]
-      self.stats_basedir = os.path.join(*parts)
-    self.InitializeSqlobject()
-
-  def GetPkgchkData(self):
-    ret, stdout, stderr = self.srv4_pkg.GetPkgchkOutput()
-    data = {
-        'return_code': ret,
-        'stdout_lines': stdout.splitlines(),
-        'stderr_lines': stderr.splitlines(),
-    }
-    return data
-
-  def GetMd5sum(self):
-    if not self.md5sum:
-      self.md5sum = self.srv4_pkg.GetMd5sum()
-    return self.md5sum
-
-  def GetDbObject(self):
-    if not self.db_pkg_stats:
-      md5_sum = self.GetMd5sum()
-      res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
-      if not res.count():
-        # TODO: Change this bit to throw an exception if the object is not
-        # found.
-        return None
-      else:
-        self.db_pkg_stats = res.getOne()
-    return self.db_pkg_stats
-
-
-  def StatsExist(self):
-    """Checks if statistics of a package exist.
-
-    Returns:
-      bool
-    """
-    pkg_stats = self.GetDbObject()
-    if not pkg_stats:
-      return False
-    if pkg_stats.stats_version != PACKAGE_STATS_VERSION:
-      pkg_stats.destroySelf()
-    else:
-      return True
-    return False
-
-  def GetDirFormatPkg(self):
-    if not self.dir_format_pkg:
-      self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
-    return self.dir_format_pkg
-
-  def GetMtime(self):
-    return self.srv4_pkg.GetMtime()
-
-  def _MakeDirP(self, dir_path):
-    """mkdir -p equivalent.
-
-    http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
-    """
-    try:
-      os.makedirs(dir_path)
-    except OSError, e:
-      if e.errno == errno.EEXIST:
-        pass
-      else:
-        raise
-
-  def GetBinaryDumpInfo(self):
-    dir_pkg = self.GetDirFormatPkg()
-    # Binaries. This could be split off to a separate function.
-    # man ld.so.1 for more info on this hack
-    env = copy.copy(os.environ)
-    env["LD_NOAUXFLTR"] = "1"
-    binaries_dump_info = []
-    for binary in dir_pkg.ListBinaries():
-      binary_abs_path = os.path.join(dir_pkg.directory, "root", binary)
-      binary_base_name = os.path.basename(binary)
-      args = [DUMP_BIN, "-Lv", binary_abs_path]
-      dump_proc = subprocess.Popen(args, stdout=subprocess.PIPE, env=env)
-      stdout, stderr = dump_proc.communicate()
-      ret = dump_proc.wait()
-      binary_data = ParseDumpOutput(stdout)
-      binary_data["path"] = binary
-      binary_data["base_name"] = binary_base_name
-      binaries_dump_info.append(binary_data)
-    return binaries_dump_info
-
-  def GetBasicStats(self):
-    dir_pkg = self.GetDirFormatPkg()
-    basic_stats = {}
-    basic_stats["stats_version"] = PACKAGE_STATS_VERSION
-    basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
-    basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
-    basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(
-        basic_stats["pkg_basename"])
-    basic_stats["pkgname"] = dir_pkg.pkgname
-    basic_stats["catalogname"] = dir_pkg.GetCatalogname()
-    basic_stats["md5_sum"] = self.GetMd5sum()
-    return basic_stats
-
-  def GetOverrides(self):
-    dir_pkg = self.GetDirFormatPkg()
-    override_list = dir_pkg.GetOverrides()
-    def OverrideToDict(override):
-      return {
-        "pkgname":  override.pkgname,
-        "tag_name":  override.tag_name,
-        "tag_info":  override.tag_info,
-      }
-    overrides_simple = [OverrideToDict(x) for x in override_list]
-    return overrides_simple
-
-  def GetLddMinusRlines(self):
-    """Returns ldd -r output."""
-    dir_pkg = self.GetDirFormatPkg()
-    binaries = dir_pkg.ListBinaries()
-    ldd_output = {}
-    for binary in binaries:
-      binary_abspath = os.path.join(dir_pkg.directory, "root", binary)
-      # this could be potentially moved into the DirectoryFormatPackage class.
-      # ldd needs the binary to be executable
-      os.chmod(binary_abspath, 0755)
-      args = ["ldd", "-r", binary_abspath]
-      ldd_proc = subprocess.Popen(
-          args,
-          stdout=subprocess.PIPE,
-          stderr=subprocess.PIPE)
-      stdout, stderr = ldd_proc.communicate()
-      retcode = ldd_proc.wait()
-      if retcode:
-        logging.error("%s returned an error: %s", args, stderr)
-      ldd_info = []
-      for line in stdout.splitlines():
-        ldd_info.append(self._ParseLddDashRline(line))
-      ldd_output[binary] = ldd_info
-    return ldd_output
-
-  def GetDefinedSymbols(self):
-    """Returns text symbols (i.e. defined functions) for packaged ELF objects
-
-    To do this we parse output lines from nm similar to the following. "T"s are
-    the definitions which we are after.
-
-      0000104000 D _lib_version
-      0000986980 D _libiconv_version
-      0000000000 U abort
-      0000097616 T aliases_lookup
-    """
-    dir_pkg = self.GetDirFormatPkg()
-    binaries = dir_pkg.ListBinaries()
-    defined_symbols = {}
-
-    for binary in binaries:
-      binary_abspath = os.path.join(dir_pkg.directory, "root", binary)
-      # Get parsable, ld.so.1 relevant SHT_DYNSYM symbol information
-      args = ["/usr/ccs/bin/nm", "-p", "-D", binary_abspath]
-      nm_proc = subprocess.Popen(
-          args,
-          stdout=subprocess.PIPE,
-          stderr=subprocess.PIPE)
-      stdout, stderr = nm_proc.communicate()
-      retcode = nm_proc.wait()
-      if retcode:
-        logging.error("%s returned an error: %s", args, stderr)
-        continue
-      nm_out = stdout.splitlines()
-
-      defined_symbols[binary] = []
-      for line in nm_out:
-        sym = self._ParseNmSymLine(line)
-        if not sym:
-          continue
-        if sym['type'] not in ("T", "D", "B"):
-          continue
-        defined_symbols[binary].append(sym['name'])
-
-    return defined_symbols
-
-  def _ParseNmSymLine(self, line):
-    re_defined_symbol =  re.compile('[0-9]+ [ABDFNSTU] \S+')
-    m = re_defined_symbol.match(line)
-    if not m:
-      return None
-    fields = line.split()
-    sym = { 'address': fields[0], 'type': fields[1], 'name': fields[2] }
-    return sym
-
-  def CollectStats(self, force=False):
-    """Lazy stats collection."""
-    if force or not self.StatsExist():
-      return self._CollectStats()
-    return self.ReadSavedStats()
-
-  def _CollectStats(self):
-    """The list of variables needs to be synchronized with the one
-    at the top of this class.
-    """
-    dir_pkg = self.GetDirFormatPkg()
-    logging.debug("Collecting %s package statistics.", repr(dir_pkg.pkgname))
-    override_dicts = self.GetOverrides()
-    pkg_stats = {
-        "binaries": dir_pkg.ListBinaries(),
-        "binaries_dump_info": self.GetBinaryDumpInfo(),
-        "depends": dir_pkg.GetDependencies(),
-        "isalist": GetIsalist(),
-        "overrides": override_dicts,
-        "pkgchk": self.GetPkgchkData(),
-        "pkginfo": dir_pkg.GetParsedPkginfo(),
-        "pkgmap": dir_pkg.GetPkgmap().entries,
-        "bad_paths": dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES),
-        "basic_stats": self.GetBasicStats(),
-        "files_metadata": dir_pkg.GetFilesMetadata(),
-        "mtime": self.GetMtime(),
-    }
-    pkgname = pkg_stats["basic_stats"]["pkgname"]
-    # Getting sqlobject representations.
-    try:
-      pkginst = m.Pkginst.select(m.Pkginst.q.pkgname==pkgname).getOne()
-    except sqlobject.main.SQLObjectNotFound, e:
-      logging.debug(e)
-      pkginst = m.Pkginst(pkgname=pkgname)
-    try:
-      res = m.Architecture.select(
-          m.Architecture.q.name==pkg_stats["pkginfo"]["ARCH"])
-      arch = res.getOne()
-    except sqlobject.main.SQLObjectNotFound, e:
-      logging.debug(e)
-      arch = m.Architecture(name=pkg_stats["pkginfo"]["ARCH"])
-    parsed_basename = pkg_stats["basic_stats"]["parsed_basename"]
-    os_rel_name = parsed_basename["osrel"]
-    try:
-      os_rel = m.OsRelease.select(
-          m.OsRelease.q.short_name==os_rel_name).getOne()
-    except sqlobject.main.SQLObjectNotFound, e:
-      logging.debug(e)
-      os_rel = m.OsRelease(short_name=os_rel_name, full_name=os_rel_name)
-    try:
-      maint_email = pkg_stats["pkginfo"]["EMAIL"]
-      maintainer = m.Maintainer.select(
-          m.Maintainer.q.email==maint_email).getOne()
-    except sqlobject.main.SQLObjectNotFound, e:
-      logging.debug(e)
-      maintainer = m.Maintainer(email=maint_email)
-
-    # If there are any previous records of the same pkginst, arch and os_rel,
-    # we're marking them as not-latest.
-    # This assumes that the packages are examined in a chronological order.
-    res = m.Srv4FileStats.select(sqlobject.AND(
-        m.Srv4FileStats.q.pkginst==pkginst,
-        m.Srv4FileStats.q.arch==arch,
-        m.Srv4FileStats.q.os_rel==os_rel))
-    for obj in res:
-      obj.latest = False
-
-    rev=None
-    if "revision_info" in parsed_basename:
-      if "REV" in parsed_basename["revision_info"]:
-        rev = parsed_basename["revision_info"]["REV"]
-    # Creating the object in the database.
-    db_pkg_stats = m.Srv4FileStats(
-        md5_sum=self.GetMd5sum(),
-        pkginst=pkginst,
-        catalogname=pkg_stats["basic_stats"]["catalogname"],
-        stats_version=PACKAGE_STATS_VERSION,
-        os_rel=os_rel,
-        arch=arch,
-        basename=pkg_stats["basic_stats"]["pkg_basename"],
-        maintainer=maintainer,
-        latest=True,
-        version_string=parsed_basename["full_version_string"],
-        rev=rev,
-        mtime=self.GetMtime(),
-        data=cPickle.dumps(pkg_stats))
-    # Inserting overrides as rows into the database
-    for override_dict in override_dicts:
-      o = m.CheckpkgOverride(srv4_file=db_pkg_stats,
-                             **override_dict)
-
-    # The ldd -r reporting breaks on bigger packages during yaml saving.
-    # It might work when yaml is disabled
-    # self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
-    # This check is currently disabled, let's save time by not collecting
-    # these data.
-    # self.DumpObject(self.GetDefinedSymbols(), "defined_symbols")
-    # This one should be last, so that if the collection is interrupted
-    # in one of the previous runs, the basic_stats.pickle file is not there
-    # or not updated, and the collection is started again.
-
-    logging.debug("Statistics of %s have been collected.", repr(dir_pkg.pkgname))
-    return pkg_stats
-
-  def GetAllStats(self):
-    if not self.all_stats and self.StatsExist():
-      self.all_stats = self.ReadSavedStats()
-    elif not self.all_stats:
-      self.all_stats = self.CollectStats()
-    return self.all_stats
-
-  def GetSavedOverrides(self):
-    if not self.StatsExist():
-      raise PackageError("Package stats not ready.")
-    pkg_stats = self.GetDbObject()
-    res = m.CheckpkgOverride.select(m.CheckpkgOverride.q.srv4_file==pkg_stats)
-    override_list = []
-    for db_override in res:
-      d = {
-          'pkgname': db_override.pkgname,
-          'tag_name': db_override.tag_name,
-          'tag_info': db_override.tag_info,
-      }
-      override_list.append(overrides.Override(**d))
-    return override_list
-
-  def GetSavedErrorTags(self):
-    pkg_stats = self.GetDbObject()
-    res = m.CheckpkgErrorTag.select(m.CheckpkgErrorTag.q.srv4_file==pkg_stats)
-    tag_list = [tag.CheckpkgTag(x.pkgname, x.tag_name, x.tag_info, x.msg)
-                for x in res]
-    return tag_list
-
-  def ReadSavedStats(self):
-    if not self.all_stats:
-      md5_sum = self.GetMd5sum()
-      res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
-      self.all_stats = cPickle.loads(str(res.getOne().data))
-    return self.all_stats
-
-  def _ParseLddDashRline(self, line):
-    found_re = r"^\t(?P<soname>\S+)\s+=>\s+(?P<path_found>\S+)"
-    symbol_not_found_re = (r"^\tsymbol not found:\s(?P<symbol>\S+)\s+"
-                           r"\((?P<path_not_found>\S+)\)")
-    only_so = r"^\t(?P<path_only>\S+)$"
-    version_so = (r'^\t(?P<soname_version_not_found>\S+) '
-                  r'\((?P<lib_name>\S+)\) =>\t \(version not found\)')
-    stv_protected = (r'^\trelocation \S+ symbol: (?P<relocation_symbol>\S+): '
-                     r'file (?P<relocation_path>\S+): '
-                     r'relocation bound to a symbol '
-                     r'with STV_PROTECTED visibility$')
-    sizes_differ = (r'^\trelocation \S+ sizes differ: '
-                    r'(?P<sizes_differ_symbol>\S+)$')
-    sizes_info = (r'^\t\t\(file (?P<sizediff_file1>\S+) size=(?P<size1>0x\w+); '
-                  r'file (?P<sizediff_file2>\S+) size=(?P<size2>0x\w+)\)$')
-    sizes_one_used = (r'^\t\t(?P<sizediffused_file>\S+) size used; '
-                      r'possible insufficient data copied$')
-    common_re = (r"(%s|%s|%s|%s|%s|%s|%s|%s)"
-                 % (found_re, symbol_not_found_re, only_so, version_so,
-                    stv_protected, sizes_differ, sizes_info, sizes_one_used))
-    m = re.match(common_re, line)
-    response = {}
-    if m:
-      d = m.groupdict()
-      if "soname" in d and d["soname"]:
-        # it was found
-        response["state"] = "OK"
-        response["soname"] = d["soname"]
-        response["path"] = d["path_found"]
-        response["symbol"] = None
-      elif "symbol" in d and d["symbol"]:
-        response["state"] = "symbol-not-found"
-        response["soname"] = None
-        response["path"] = d["path_not_found"]
-        response["symbol"] = d["symbol"]
-      elif d["path_only"]:
-        response["state"] = "OK"
-        response["soname"] = None
-        response["path"] = d["path_only"]
-        response["symbol"] = None
-      elif d["soname_version_not_found"]:
-        response["state"] = "version-not-found"
-        response["soname"] = d["soname_version_not_found"]
-        response["path"] = None
-        response["symbol"] = None
-      elif d["relocation_symbol"]:
-        response["state"] = 'relocation-bound-to-a-symbol-with-STV_PROTECTED-visibility'
-        response["soname"] = None
-        response["path"] = d["relocation_path"]
-        response["symbol"] = d["relocation_symbol"]
-      elif d["sizes_differ_symbol"]:
-        response["state"] = 'sizes-differ'
-        response["soname"] = None
-        response["path"] = None
-        response["symbol"] = d["sizes_differ_symbol"]
-      elif d["sizediff_file1"]:
-        response["state"] = 'sizes-diff-info'
-        response["soname"] = None
-        response["path"] = "%s %s" % (d["sizediff_file1"], d["sizediff_file2"])
-        response["symbol"] = None
-      elif d["sizediffused_file"]:
-        response["state"] = 'sizes-diff-one-used'
-        response["soname"] = None
-        response["path"] = "%s" % (d["sizediffused_file"])
-        response["symbol"] = None
-      else:
-        raise StdoutSyntaxError("Could not parse %s with %s"
-                                % (repr(line), common_re))
-    else:
-      raise StdoutSyntaxError("Could not parse %s with %s"
-                              % (repr(line), common_re))
-    return response
-
-
 def ErrorTagsFromFile(file_name):
   fd = open(file_name)
   error_tags = []
@@ -1513,20 +1046,6 @@
   return error_tags
 
 
-def StatsListFromCatalog(file_name_list, catalog_file_name=None, debug=False):
-  packages = [opencsw.CswSrv4File(x, debug) for x in file_name_list]
-  if catalog_file_name:
-    catalog = opencsw.OpencswCatalog(catalog_file_name)
-    md5s_by_basename = catalog.GetDataByBasename()
-    for pkg in packages:
-      basename = os.path.basename(pkg.pkg_path)
-      # It might be the case that a file is present on disk, but missing from
-      # the catalog file.
-      if basename in md5s_by_basename:
-        pkg.md5sum = md5s_by_basename[basename]["md5sum"]
-  stats_list = [PackageStats(pkg) for pkg in packages]
-  return stats_list
-
 def SliceList(l, size):
   """Trasforms a list into a list of lists."""
   idxes = xrange(0, len(l), size)
@@ -1546,14 +1065,14 @@
       md5s.append(arg)
     else:
       filenames.append(arg)
-  srv4_pkgs = [opencsw.CswSrv4File(x) for x in filenames]
+  srv4_pkgs = [package.CswSrv4File(x) for x in filenames]
   pkgstat_objs = []
   bar = progressbar.ProgressBar()
   bar.maxval = len(md5s) + len(srv4_pkgs)
   bar.start()
   counter = itertools.count()
   for pkg in srv4_pkgs:
-    pkgstat_objs.append(PackageStats(pkg, debug=debug))
+    pkgstat_objs.append(package_stats.PackageStats(pkg, debug=debug))
     bar.update(counter.next())
   for md5 in md5s:
     pkgstat_objs.append(PackageStats(None, md5sum=md5, debug=debug))

Modified: csw/mgar/gar/v2/lib/python/checkpkg_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/checkpkg_test.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/lib/python/checkpkg_test.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -23,18 +23,6 @@
 SELECT * FROM systempkgmap WHERE basename = 'libncursesw.so.5';
 """
 
-LDD_R_OUTPUT_1 =  """\tlibc.so.1 =>  /lib/libc.so.1
-\tsymbol not found: check_encoding_conversion_args    (/opt/csw/lib/postgresql/8.4/utf8_and_gbk.so)
-\tsymbol not found: LocalToUtf    (/opt/csw/lib/postgresql/8.4/utf8_and_gbk.so)
-\tsymbol not found: UtfToLocal    (/opt/csw/lib/postgresql/8.4/utf8_and_gbk.so)
-\tlibm.so.2 =>   /lib/libm.so.2
-\t/usr/lib/secure/s8_preload.so.1
-\tlibXext.so.0 (SUNW_1.1) =>\t (version not found)
-\trelocation R_SPARC_COPY symbol: ASN1_OCTET_STRING_it: file /opt/csw/lib/sparcv8plus+vis/libcrypto.so.0.9.8: relocation bound to a symbol with STV_PROTECTED visibility
-\trelocation R_SPARC_COPY sizes differ: _ZTI7QWidget
-\t\t(file /tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/bin/kslideshow.kss size=0x28; file /opt/csw/kde-gcc/lib/libqt-mt.so.3 size=0x20)
-"""
-
 class GetLinesBySonameUnitTest(unittest.TestCase):
 
   def setUp(self):
@@ -279,116 +267,6 @@
     self.assertEqual(expected, spkgmap._InferPackagesFromPkgmapLine(line))
 
 
-class PackageStatsUnitTest(unittest.TestCase):
-
-  def setUp(self):
-    self.pkgstats = checkpkg.PackageStats(None)
-
-  def test_ParseNmSymLineGoodLine(self):
-    line = '0000097616 T aliases_lookup'
-    expected = {
-        'address': '0000097616',
-        'type': 'T',
-        'name': 'aliases_lookup',
-    }
-    self.assertEqual(expected, self.pkgstats._ParseNmSymLine(line))
-
-  def test_ParseNmSymLineBadLine(self):
-    line = 'foo'
-    self.assertEqual(None, self.pkgstats._ParseNmSymLine(line))
-
-  def test_ParseLddDashRlineFound(self):
-    line = '\tlibc.so.1 =>  /lib/libc.so.1'
-    expected = {
-        'state': 'OK',
-        'soname': 'libc.so.1',
-        'path': '/lib/libc.so.1',
-        'symbol': None,
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLddDashRlineSymbolMissing(self):
-    line = ('\tsymbol not found: check_encoding_conversion_args    '
-            '(/opt/csw/lib/postgresql/8.4/utf8_and_gbk.so)')
-    expected = {
-        'state': 'symbol-not-found',
-        'soname': None,
-        'path': '/opt/csw/lib/postgresql/8.4/utf8_and_gbk.so',
-        'symbol': 'check_encoding_conversion_args',
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLddDashRlineFound(self):
-    line = '\t/usr/lib/secure/s8_preload.so.1'
-    expected = {
-        'state': 'OK',
-        'soname': None,
-        'path': '/usr/lib/secure/s8_preload.so.1',
-        'symbol': None,
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLdd_VersionNotFound(self):
-    line = '\tlibXext.so.0 (SUNW_1.1) =>\t (version not found)'
-    expected = {
-        'symbol': None,
-        'soname': 'libXext.so.0',
-        'path': None,
-        'state': 'version-not-found',
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLdd_StvProtectedVisibility(self):
-    line = ('\trelocation R_SPARC_COPY symbol: ASN1_OCTET_STRING_it: '
-            'file /opt/csw/lib/sparcv8plus+vis/libcrypto.so.0.9.8: '
-            'relocation bound to a symbol with STV_PROTECTED visibility')
-    expected = {
-        'symbol': 'ASN1_OCTET_STRING_it',
-        'soname': None,
-        'path': '/opt/csw/lib/sparcv8plus+vis/libcrypto.so.0.9.8',
-        'state': 'relocation-bound-to-a-symbol-with-STV_PROTECTED-visibility',
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLdd_SizesDiffer(self):
-    line = '\trelocation R_SPARC_COPY sizes differ: _ZTI7QWidget'
-    expected = {
-        'symbol': '_ZTI7QWidget',
-        'soname': None,
-        'path': None,
-        'state': 'sizes-differ',
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLdd_SizesDifferInfo(self):
-    line = ('\t\t(file /tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/bin/'
-            'kslideshow.kss size=0x28; '
-            'file /opt/csw/kde-gcc/lib/libqt-mt.so.3 size=0x20)')
-    expected = {
-        'symbol': None,
-        'path': ('/tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/'
-                 'bin/kslideshow.kss /opt/csw/kde-gcc/lib/libqt-mt.so.3'),
-        'state': 'sizes-diff-info',
-        'soname': None,
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLdd_SizesDifferOneUsed(self):
-    line = ('\t\t/opt/csw/kde-gcc/lib/libqt-mt.so.3 size used; '
-            'possible insufficient data copied')
-    expected = {
-        'symbol': None,
-        'path': '/opt/csw/kde-gcc/lib/libqt-mt.so.3',
-        'state': 'sizes-diff-one-used',
-        'soname': None,
-    }
-    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
-
-  def test_ParseLddDashRlineManyLines(self):
-    for line in LDD_R_OUTPUT_1.splitlines():
-      parsed = self.pkgstats._ParseLddDashRline(line)
-
-
 class ExtractorsUnitTest(unittest.TestCase):
 
   def testExtractDescriptionFromGoodData(self):

Added: csw/mgar/gar/v2/lib/python/database.py
===================================================================
--- csw/mgar/gar/v2/lib/python/database.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/database.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -0,0 +1,57 @@
+import socket
+import os
+import sqlobject
+import models as m
+
+DB_SCHEMA_VERSION = 5L
+
+
+class DatabaseClient(object):
+
+  CHECKPKG_DIR = ".checkpkg"
+  SQLITE3_DBNAME_TMPL = "checkpkg-db-%(fqdn)s"
+  TABLES_THAT_NEED_UPDATES = (m.CswFile,)
+  TABLES = TABLES_THAT_NEED_UPDATES + (
+            m.Pkginst,
+            m.CswConfig,
+            m.Srv4FileStats,
+            m.CheckpkgOverride,
+            m.CheckpkgErrorTag,
+            m.Architecture,
+            m.OsRelease,
+            m.Maintainer)
+  sqo_conn = None
+  db_path = None
+
+  def __init__(self, debug=False):
+    self.debug = debug
+
+  @classmethod
+  def GetDatabasePath(cls):
+    if not cls.db_path:
+      dbname_dict = {'fqdn': socket.getfqdn()}
+      db_filename = cls.SQLITE3_DBNAME_TMPL % dbname_dict
+      home_dir = os.environ["HOME"]
+      cls.db_path = os.path.join(home_dir, cls.CHECKPKG_DIR, db_filename)
+    return cls.db_path
+
+  @classmethod
+  def InitializeSqlobject(cls):
+    """Establishes a database connection and stores it as a class member.
+
+    The idea is to share the database connection between instances.  It would
+    be solved even better if the connection was passed to the class
+    constructor.
+    """
+    if not cls.sqo_conn:
+      db_path = cls.GetDatabasePath()
+      cls.sqo_conn = sqlobject.connectionForURI('sqlite:%s' % db_path)
+      sqlobject.sqlhub.processConnection = cls.sqo_conn
+
+  def CreateTables(self):
+    for table in self.TABLES:
+      table.createTable(ifNotExists=True)
+
+  def IsDatabaseGoodSchema(self):
+    good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
+    return good_version

Modified: csw/mgar/gar/v2/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/lib/python/opencsw.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -102,10 +102,6 @@
   pass
 
 
-class CatalogLineParseError(Error):
-  pass
-
-
 def ParsePackageFileName(p):
   if p.endswith(".gz"):
     p = p[:-3]

Modified: csw/mgar/gar/v2/lib/python/package.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/lib/python/package.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -1,7 +1,10 @@
 #!/usr/bin/env python2.6
 
+import hashlib
+import logging
+import subprocess
+
 import magic
-import subprocess
 import hachoir_parser as hp
 
 # Suppress unhelpful warnings

Added: csw/mgar/gar/v2/lib/python/package_stats.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_stats.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/package_stats.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -0,0 +1,446 @@
+#!/usr/bin/env python2.6
+
+import cPickle
+import os
+import re
+
+import catalog
+import database
+import package
+import models as m
+import tag
+
+PACKAGE_STATS_VERSION = 9L
+
+class PackageStats(database.DatabaseClient):
+  """Collects stats about a package and saves it.
+
+  TODO: Maintain a global database connection instead of creating one for each
+  instantiated object.
+  TODO: Store overrides in a separate table for performance.
+  """
+
+  def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None, debug=False):
+    super(PackageStats, self).__init__(debug=debug)
+    self.srv4_pkg = srv4_pkg
+    self.md5sum = md5sum
+    self.dir_format_pkg = None
+    self.all_stats = {}
+    self.stats_basedir = stats_basedir
+    self.db_pkg_stats = None
+    if not self.stats_basedir:
+      home = os.environ["HOME"]
+      parts = [home, ".checkpkg", "stats"]
+      self.stats_basedir = os.path.join(*parts)
+    self.InitializeSqlobject()
+
+  def GetPkgchkData(self):
+    ret, stdout, stderr = self.srv4_pkg.GetPkgchkOutput()
+    data = {
+        'return_code': ret,
+        'stdout_lines': stdout.splitlines(),
+        'stderr_lines': stderr.splitlines(),
+    }
+    return data
+
+  def GetMd5sum(self):
+    if not self.md5sum:
+      self.md5sum = self.srv4_pkg.GetMd5sum()
+    return self.md5sum
+
+  def GetDbObject(self):
+    if not self.db_pkg_stats:
+      md5_sum = self.GetMd5sum()
+      res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+      if not res.count():
+        # TODO: Change this bit to throw an exception if the object is not
+        # found.
+        return None
+      else:
+        self.db_pkg_stats = res.getOne()
+    return self.db_pkg_stats
+
+
+  def StatsExist(self):
+    """Checks if statistics of a package exist.
+
+    Returns:
+      bool
+    """
+    pkg_stats = self.GetDbObject()
+    if not pkg_stats:
+      return False
+    if pkg_stats.stats_version != PACKAGE_STATS_VERSION:
+      pkg_stats.destroySelf()
+    else:
+      return True
+    return False
+
+  def GetDirFormatPkg(self):
+    if not self.dir_format_pkg:
+      self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
+    return self.dir_format_pkg
+
+  def GetMtime(self):
+    return self.srv4_pkg.GetMtime()
+
+  def _MakeDirP(self, dir_path):
+    """mkdir -p equivalent.
+
+    http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+    """
+    try:
+      os.makedirs(dir_path)
+    except OSError, e:
+      if e.errno == errno.EEXIST:
+        pass
+      else:
+        raise
+
+  def GetBinaryDumpInfo(self):
+    dir_pkg = self.GetDirFormatPkg()
+    # Binaries. This could be split off to a separate function.
+    # man ld.so.1 for more info on this hack
+    env = copy.copy(os.environ)
+    env["LD_NOAUXFLTR"] = "1"
+    binaries_dump_info = []
+    for binary in dir_pkg.ListBinaries():
+      binary_abs_path = os.path.join(dir_pkg.directory, "root", binary)
+      binary_base_name = os.path.basename(binary)
+      args = [DUMP_BIN, "-Lv", binary_abs_path]
+      dump_proc = subprocess.Popen(args, stdout=subprocess.PIPE, env=env)
+      stdout, stderr = dump_proc.communicate()
+      ret = dump_proc.wait()
+      binary_data = ParseDumpOutput(stdout)
+      binary_data["path"] = binary
+      binary_data["base_name"] = binary_base_name
+      binaries_dump_info.append(binary_data)
+    return binaries_dump_info
+
+  def GetBasicStats(self):
+    dir_pkg = self.GetDirFormatPkg()
+    basic_stats = {}
+    basic_stats["stats_version"] = PACKAGE_STATS_VERSION
+    basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
+    basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
+    basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(
+        basic_stats["pkg_basename"])
+    basic_stats["pkgname"] = dir_pkg.pkgname
+    basic_stats["catalogname"] = dir_pkg.GetCatalogname()
+    basic_stats["md5_sum"] = self.GetMd5sum()
+    return basic_stats
+
+  def GetOverrides(self):
+    dir_pkg = self.GetDirFormatPkg()
+    override_list = dir_pkg.GetOverrides()
+    def OverrideToDict(override):
+      return {
+        "pkgname":  override.pkgname,
+        "tag_name":  override.tag_name,
+        "tag_info":  override.tag_info,
+      }
+    overrides_simple = [OverrideToDict(x) for x in override_list]
+    return overrides_simple
+
+  def GetLddMinusRlines(self):
+    """Returns ldd -r output."""
+    dir_pkg = self.GetDirFormatPkg()
+    binaries = dir_pkg.ListBinaries()
+    ldd_output = {}
+    for binary in binaries:
+      binary_abspath = os.path.join(dir_pkg.directory, "root", binary)
+      # this could be potentially moved into the DirectoryFormatPackage class.
+      # ldd needs the binary to be executable
+      os.chmod(binary_abspath, 0755)
+      args = ["ldd", "-r", binary_abspath]
+      ldd_proc = subprocess.Popen(
+          args,
+          stdout=subprocess.PIPE,
+          stderr=subprocess.PIPE)
+      stdout, stderr = ldd_proc.communicate()
+      retcode = ldd_proc.wait()
+      if retcode:
+        logging.error("%s returned an error: %s", args, stderr)
+      ldd_info = []
+      for line in stdout.splitlines():
+        ldd_info.append(self._ParseLddDashRline(line))
+      ldd_output[binary] = ldd_info
+    return ldd_output
+
+  def GetDefinedSymbols(self):
+    """Returns text symbols (i.e. defined functions) for packaged ELF objects
+
+    To do this we parse output lines from nm similar to the following. "T"s are
+    the definitions which we are after.
+
+      0000104000 D _lib_version
+      0000986980 D _libiconv_version
+      0000000000 U abort
+      0000097616 T aliases_lookup
+    """
+    dir_pkg = self.GetDirFormatPkg()
+    binaries = dir_pkg.ListBinaries()
+    defined_symbols = {}
+
+    for binary in binaries:
+      binary_abspath = os.path.join(dir_pkg.directory, "root", binary)
+      # Get parsable, ld.so.1 relevant SHT_DYNSYM symbol information
+      args = ["/usr/ccs/bin/nm", "-p", "-D", binary_abspath]
+      nm_proc = subprocess.Popen(
+          args,
+          stdout=subprocess.PIPE,
+          stderr=subprocess.PIPE)
+      stdout, stderr = nm_proc.communicate()
+      retcode = nm_proc.wait()
+      if retcode:
+        logging.error("%s returned an error: %s", args, stderr)
+        continue
+      nm_out = stdout.splitlines()
+
+      defined_symbols[binary] = []
+      for line in nm_out:
+        sym = self._ParseNmSymLine(line)
+        if not sym:
+          continue
+        if sym['type'] not in ("T", "D", "B"):
+          continue
+        defined_symbols[binary].append(sym['name'])
+
+    return defined_symbols
+
+  def _ParseNmSymLine(self, line):
+    re_defined_symbol =  re.compile('[0-9]+ [ABDFNSTU] \S+')
+    m = re_defined_symbol.match(line)
+    if not m:
+      return None
+    fields = line.split()
+    sym = { 'address': fields[0], 'type': fields[1], 'name': fields[2] }
+    return sym
+
+  def CollectStats(self, force=False):
+    """Lazy stats collection."""
+    if force or not self.StatsExist():
+      return self._CollectStats()
+    return self.ReadSavedStats()
+
+  def _CollectStats(self):
+    """The list of variables needs to be synchronized with the one
+    at the top of this class.
+    """
+    dir_pkg = self.GetDirFormatPkg()
+    logging.debug("Collecting %s package statistics.", repr(dir_pkg.pkgname))
+    override_dicts = self.GetOverrides()
+    pkg_stats = {
+        "binaries": dir_pkg.ListBinaries(),
+        "binaries_dump_info": self.GetBinaryDumpInfo(),
+        "depends": dir_pkg.GetDependencies(),
+        "isalist": GetIsalist(),
+        "overrides": override_dicts,
+        "pkgchk": self.GetPkgchkData(),
+        "pkginfo": dir_pkg.GetParsedPkginfo(),
+        "pkgmap": dir_pkg.GetPkgmap().entries,
+        "bad_paths": dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES),
+        "basic_stats": self.GetBasicStats(),
+        "files_metadata": dir_pkg.GetFilesMetadata(),
+        "mtime": self.GetMtime(),
+    }
+    pkgname = pkg_stats["basic_stats"]["pkgname"]
+    # Getting sqlobject representations.
+    try:
+      pkginst = m.Pkginst.select(m.Pkginst.q.pkgname==pkgname).getOne()
+    except sqlobject.main.SQLObjectNotFound, e:
+      logging.debug(e)
+      pkginst = m.Pkginst(pkgname=pkgname)
+    try:
+      res = m.Architecture.select(
+          m.Architecture.q.name==pkg_stats["pkginfo"]["ARCH"])
+      arch = res.getOne()
+    except sqlobject.main.SQLObjectNotFound, e:
+      logging.debug(e)
+      arch = m.Architecture(name=pkg_stats["pkginfo"]["ARCH"])
+    parsed_basename = pkg_stats["basic_stats"]["parsed_basename"]
+    os_rel_name = parsed_basename["osrel"]
+    try:
+      os_rel = m.OsRelease.select(
+          m.OsRelease.q.short_name==os_rel_name).getOne()
+    except sqlobject.main.SQLObjectNotFound, e:
+      logging.debug(e)
+      os_rel = m.OsRelease(short_name=os_rel_name, full_name=os_rel_name)
+    try:
+      maint_email = pkg_stats["pkginfo"]["EMAIL"]
+      maintainer = m.Maintainer.select(
+          m.Maintainer.q.email==maint_email).getOne()
+    except sqlobject.main.SQLObjectNotFound, e:
+      logging.debug(e)
+      maintainer = m.Maintainer(email=maint_email)
+
+    # If there are any previous records of the same pkginst, arch and os_rel,
+    # we're marking them as not-latest.
+    # This assumes that the packages are examined in a chronological order.
+    res = m.Srv4FileStats.select(sqlobject.AND(
+        m.Srv4FileStats.q.pkginst==pkginst,
+        m.Srv4FileStats.q.arch==arch,
+        m.Srv4FileStats.q.os_rel==os_rel))
+    for obj in res:
+      obj.latest = False
+
+    rev=None
+    if "revision_info" in parsed_basename:
+      if "REV" in parsed_basename["revision_info"]:
+        rev = parsed_basename["revision_info"]["REV"]
+    # Creating the object in the database.
+    db_pkg_stats = m.Srv4FileStats(
+        md5_sum=self.GetMd5sum(),
+        pkginst=pkginst,
+        catalogname=pkg_stats["basic_stats"]["catalogname"],
+        stats_version=PACKAGE_STATS_VERSION,
+        os_rel=os_rel,
+        arch=arch,
+        basename=pkg_stats["basic_stats"]["pkg_basename"],
+        maintainer=maintainer,
+        latest=True,
+        version_string=parsed_basename["full_version_string"],
+        rev=rev,
+        mtime=self.GetMtime(),
+        data=cPickle.dumps(pkg_stats))
+    # Inserting overrides as rows into the database
+    for override_dict in override_dicts:
+      o = m.CheckpkgOverride(srv4_file=db_pkg_stats,
+                             **override_dict)
+
+    # The ldd -r reporting breaks on bigger packages during yaml saving.
+    # It might work when yaml is disabled
+    # self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
+    # This check is currently disabled, let's save time by not collecting
+    # these data.
+    # self.DumpObject(self.GetDefinedSymbols(), "defined_symbols")
+    # This one should be last, so that if the collection is interrupted
+    # in one of the previous runs, the basic_stats.pickle file is not there
+    # or not updated, and the collection is started again.
+
+    logging.debug("Statistics of %s have been collected.", repr(dir_pkg.pkgname))
+    return pkg_stats
+
+  def GetAllStats(self):
+    if not self.all_stats and self.StatsExist():
+      self.all_stats = self.ReadSavedStats()
+    elif not self.all_stats:
+      self.all_stats = self.CollectStats()
+    return self.all_stats
+
+  def GetSavedOverrides(self):
+    if not self.StatsExist():
+      raise PackageError("Package stats not ready.")
+    pkg_stats = self.GetDbObject()
+    res = m.CheckpkgOverride.select(m.CheckpkgOverride.q.srv4_file==pkg_stats)
+    override_list = []
+    for db_override in res:
+      d = {
+          'pkgname': db_override.pkgname,
+          'tag_name': db_override.tag_name,
+          'tag_info': db_override.tag_info,
+      }
+      override_list.append(overrides.Override(**d))
+    return override_list
+
+  def GetSavedErrorTags(self):
+    pkg_stats = self.GetDbObject()
+    res = m.CheckpkgErrorTag.select(m.CheckpkgErrorTag.q.srv4_file==pkg_stats)
+    tag_list = [tag.CheckpkgTag(x.pkgname, x.tag_name, x.tag_info, x.msg)
+                for x in res]
+    return tag_list
+
+  def ReadSavedStats(self):
+    if not self.all_stats:
+      md5_sum = self.GetMd5sum()
+      res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+      self.all_stats = cPickle.loads(str(res.getOne().data))
+    return self.all_stats
+
+  def _ParseLddDashRline(self, line):
+    found_re = r"^\t(?P<soname>\S+)\s+=>\s+(?P<path_found>\S+)"
+    symbol_not_found_re = (r"^\tsymbol not found:\s(?P<symbol>\S+)\s+"
+                           r"\((?P<path_not_found>\S+)\)")
+    only_so = r"^\t(?P<path_only>\S+)$"
+    version_so = (r'^\t(?P<soname_version_not_found>\S+) '
+                  r'\((?P<lib_name>\S+)\) =>\t \(version not found\)')
+    stv_protected = (r'^\trelocation \S+ symbol: (?P<relocation_symbol>\S+): '
+                     r'file (?P<relocation_path>\S+): '
+                     r'relocation bound to a symbol '
+                     r'with STV_PROTECTED visibility$')
+    sizes_differ = (r'^\trelocation \S+ sizes differ: '
+                    r'(?P<sizes_differ_symbol>\S+)$')
+    sizes_info = (r'^\t\t\(file (?P<sizediff_file1>\S+) size=(?P<size1>0x\w+); '
+                  r'file (?P<sizediff_file2>\S+) size=(?P<size2>0x\w+)\)$')
+    sizes_one_used = (r'^\t\t(?P<sizediffused_file>\S+) size used; '
+                      r'possible insufficient data copied$')
+    common_re = (r"(%s|%s|%s|%s|%s|%s|%s|%s)"
+                 % (found_re, symbol_not_found_re, only_so, version_so,
+                    stv_protected, sizes_differ, sizes_info, sizes_one_used))
+    m = re.match(common_re, line)
+    response = {}
+    if m:
+      d = m.groupdict()
+      if "soname" in d and d["soname"]:
+        # it was found
+        response["state"] = "OK"
+        response["soname"] = d["soname"]
+        response["path"] = d["path_found"]
+        response["symbol"] = None
+      elif "symbol" in d and d["symbol"]:
+        response["state"] = "symbol-not-found"
+        response["soname"] = None
+        response["path"] = d["path_not_found"]
+        response["symbol"] = d["symbol"]
+      elif d["path_only"]:
+        response["state"] = "OK"
+        response["soname"] = None
+        response["path"] = d["path_only"]
+        response["symbol"] = None
+      elif d["soname_version_not_found"]:
+        response["state"] = "version-not-found"
+        response["soname"] = d["soname_version_not_found"]
+        response["path"] = None
+        response["symbol"] = None
+      elif d["relocation_symbol"]:
+        response["state"] = 'relocation-bound-to-a-symbol-with-STV_PROTECTED-visibility'
+        response["soname"] = None
+        response["path"] = d["relocation_path"]
+        response["symbol"] = d["relocation_symbol"]
+      elif d["sizes_differ_symbol"]:
+        response["state"] = 'sizes-differ'
+        response["soname"] = None
+        response["path"] = None
+        response["symbol"] = d["sizes_differ_symbol"]
+      elif d["sizediff_file1"]:
+        response["state"] = 'sizes-diff-info'
+        response["soname"] = None
+        response["path"] = "%s %s" % (d["sizediff_file1"], d["sizediff_file2"])
+        response["symbol"] = None
+      elif d["sizediffused_file"]:
+        response["state"] = 'sizes-diff-one-used'
+        response["soname"] = None
+        response["path"] = "%s" % (d["sizediffused_file"])
+        response["symbol"] = None
+      else:
+        raise StdoutSyntaxError("Could not parse %s with %s"
+                                % (repr(line), common_re))
+    else:
+      raise StdoutSyntaxError("Could not parse %s with %s"
+                              % (repr(line), common_re))
+    return response
+
+
+def StatsListFromCatalog(file_name_list, catalog_file_name=None, debug=False):
+  packages = [package.CswSrv4File(x, debug) for x in file_name_list]
+  if catalog_file_name:
+    catalog_obj = catalog.OpencswCatalog(catalog_file_name)
+    md5s_by_basename = catalog_obj.GetDataByBasename()
+    for pkg in packages:
+      basename = os.path.basename(pkg.pkg_path)
+      # It might be the case that a file is present on disk, but missing from
+      # the catalog file.
+      if basename in md5s_by_basename:
+        pkg.md5sum = md5s_by_basename[basename]["md5sum"]
+  stats_list = [PackageStats(pkg) for pkg in packages]
+  return stats_list

Modified: csw/mgar/gar/v2/tests/run_tests.py
===================================================================
--- csw/mgar/gar/v2/tests/run_tests.py	2010-10-10 20:37:57 UTC (rev 11223)
+++ csw/mgar/gar/v2/tests/run_tests.py	2010-10-10 20:38:42 UTC (rev 11224)
@@ -18,6 +18,7 @@
 from sharedlib_utils_test    import *
 from catalog_test            import *
 from package_test            import *
+from package_stats_test      import *
 
 # These are very slow GAR tests, which I'm disabling for now.
 # from example_test            import *


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list