[csw-devel] SF.net SVN: gar:[10435] csw/mgar/gar/v2-sqlite

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Mon Jul 5 16:14:07 CEST 2010


Revision: 10435
          http://gar.svn.sourceforge.net/gar/?rev=10435&view=rev
Author:   wahwah
Date:     2010-07-05 14:14:07 +0000 (Mon, 05 Jul 2010)

Log Message:
-----------
mGAR v2-sqlite: First working version, needs more performance tweaks.

Modified Paths:
--------------
    csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py
    csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py
    csw/mgar/gar/v2-sqlite/lib/python/models.py
    csw/mgar/gar/v2-sqlite/lib/python/overrides.py

Modified: csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py	2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py	2010-07-05 14:14:07 UTC (rev 10435)
@@ -39,7 +39,7 @@
   args_display = args
   if len(args_display) > 5:
     args_display = args_display[:5] + ["...more..."]
-  logging.debug("Calling: %s, please be patient", args_display)
+  logging.debug("Processing: %s, please be patient", args_display)
   packages = [opencsw.CswSrv4File(x, options.debug) for x in args]
   if options.catalog_file:
     # Using cached md5sums to save time: injecting md5sums

Modified: csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py	2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py	2010-07-05 14:14:07 UTC (rev 10435)
@@ -24,13 +24,14 @@
 import yaml
 from Cheetah import Template
 import opencsw
+import overrides
 import package_checks
 import models as m
 import configuration as c
 import tag
 
 DEBUG_BREAK_PKGMAP_AFTER = False
-DB_SCHEMA_VERSION = 3L
+DB_SCHEMA_VERSION = 4L
 PACKAGE_STATS_VERSION = 6L
 SYSTEM_PKGMAP = "/var/sadm/install/contents"
 NEEDED_SONAMES = "needed sonames"
@@ -204,22 +205,48 @@
     return None
 
 
-class SystemPkgmap(object):
-  """A class to hold and manipulate the /var/sadm/install/contents file."""
+class DatabaseClient(object):
 
-  STOP_PKGS = ["SUNWbcp", "SUNWowbcp", "SUNWucb"]
   CHECKPKG_DIR = ".checkpkg"
   SQLITE3_DBNAME_TMPL = "var-sadm-install-contents-cache-%s"
+  TABLES = (m.CswConfig,
+            m.CswFile,
+            m.CswPackage,
+            m.Srv4FileStats)
 
+  def __init__(self, debug=False):
+    self.fqdn = socket.getfqdn()
+    self.checkpkg_dir = os.path.join(os.environ["HOME"], self.CHECKPKG_DIR)
+    self.db_path = os.path.join(self.checkpkg_dir,
+                                self.SQLITE3_DBNAME_TMPL % self.fqdn)
+    self.debug = debug
+
+  def InitializeSqlobject(self):
+    logging.debug("Connecting to the %s database.", self.db_path)
+    self.sqo_conn = sqlobject.connectionForURI(
+        'sqlite:%s' % self.db_path, debug=(self.debug and False))
+    sqlobject.sqlhub.processConnection = self.sqo_conn
+
+  def CreateTables(self):
+    for table in self.TABLES:
+      table.createTable(ifNotExists=True)
+
+  def IsDatabaseGoodSchema(self):
+    good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
+    return good_version
+
+
+class SystemPkgmap(DatabaseClient):
+  """A class to hold and manipulate the /var/sadm/install/contents file."""
+
+  STOP_PKGS = ["SUNWbcp", "SUNWowbcp", "SUNWucb"]
+
   def __init__(self, system_pkgmap_files=None, debug=False):
     """There is no need to re-parse it each time.
 
     Read it slowly the first time and cache it for later."""
+    super(SystemPkgmap, self).__init__(debug=debug)
     self.cache = {}
-    self.checkpkg_dir = os.path.join(os.environ["HOME"], self.CHECKPKG_DIR)
-    self.fqdn = socket.getfqdn()
-    self.db_path = os.path.join(self.checkpkg_dir,
-                                self.SQLITE3_DBNAME_TMPL % self.fqdn)
     self.file_mtime = None
     self.cache_mtime = None
     self.initialized = False
@@ -227,25 +254,11 @@
       self.system_pkgmap_files = [SYSTEM_PKGMAP]
     else:
       self.system_pkgmap_files = system_pkgmap_files
-    self.debug = debug
 
   def _LazyInitializeDatabase(self):
     if not self.initialized:
       self.InitializeDatabase()
 
-  def InitializeSqlobject(self):
-    if True:
-      logging.debug("Connecting to the %s database.", self.db_path)
-      self.sqo_conn = sqlobject.connectionForURI(
-          'sqlite:%s' % self.db_path, debug=(self.debug and False))
-    else:
-      # TODO: Use a configuration file to store the credentials
-      logging.debug("Connecting MySQL.")
-      self.sqo_conn = sqlobject.connectionForURI(
-          'mysql://checkpkg:Nid3owlOn@mysql/checkpkg',
-          debug=(self.debug and False))
-    sqlobject.sqlhub.processConnection = self.sqo_conn
-
   def InitializeRawDb(self):
     """It's necessary for low level operations."""
     if True:
@@ -277,11 +290,6 @@
       self.PopulateDatabase()
     self.initialized = True
 
-  def CreateTables(self):
-    m.CswConfig.createTable(ifNotExists=True)
-    m.CswPackage.createTable(ifNotExists=True)
-    m.CswFile.createTable(ifNotExists=True)
-
   def PopulateDatabase(self):
     """Imports data into the database.
 
@@ -496,10 +504,6 @@
       schema_on_disk = res.getOne().int_value
     return schema_on_disk
 
-  def IsDatabaseGoodSchema(self):
-    good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
-    return good_version
-
   def IsDatabaseUpToDate(self):
     f_mtime = self.GetFileMtime()
     d_mtime = self.GetDatabaseMtime()
@@ -516,24 +520,14 @@
                   repr(good_version), repr(fresh))
     return fresh and good_version
 
-  def SoftDropTable(self, tablename):
-    c = self.conn.cursor()
-    try:
-      # This doesn't accept placeholders.
-      c.execute("DROP TABLE %s;" % tablename)
-    except sqlite3.OperationalError, e:
-      logging.warn("sqlite3.OperationalError: %s", e)
-
   def PurgeDatabase(self, drop_tables=False):
     if drop_tables:
-      # for table_name in ("config", "systempkgmap", "packages"):
-      #   self.SoftDropTable(table_name)
-      for table in (m.CswConfig, m.CswFile, m.CswPackage):
+      for table in self.TABLES:
         if table.tableExists():
           table.dropTable()
     else:
-      logging.info("Deleting all rows from the cache database")
-      for table in (m.CswConfig, m.CswFile, m.CswPackage):
+      logging.info("Truncating all tables")
+      for table in self.TABLES:
         table.clearTable()
 
   def GetInstalledPackages(self):
@@ -896,8 +890,13 @@
   return isalist
 
 
-class PackageStats(object):
-  """Collects stats about a package and saves it."""
+class PackageStats(DatabaseClient):
+  """Collects stats about a package and saves it.
+
+  TODO: Maintain a global database connection instead of creating one for each
+  instantiated object.
+  TODO: Store overrides in a separate table for performance.
+  """
   # This list needs to be synchronized with the CollectStats() method.
   STAT_FILES = [
       "bad_paths",
@@ -917,7 +916,8 @@
       "files_metadata",
   ]
 
-  def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None):
+  def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None, debug=False):
+    super(PackageStats, self).__init__(debug=debug)
     self.srv4_pkg = srv4_pkg
     self.md5sum = md5sum
     self.dir_format_pkg = None
@@ -928,6 +928,7 @@
       home = os.environ["HOME"]
       parts = [home, ".checkpkg", "stats"]
       self.stats_basedir = os.path.join(*parts)
+    self.InitializeSqlobject()
 
   def GetPkgchkData(self):
     ret, stdout, stderr = self.srv4_pkg.GetPkgchkOutput()
@@ -957,14 +958,18 @@
     Returns:
       bool
     """
-    if not self.StatsDirExists():
-      return False
     # More checks can be added in the future.
-    return True
+    md5_sum = self.GetMd5sum()
+    logging.debug("StatsExist() md5_sum=%s", md5_sum)
+    res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+    if not res.count():
+      logging.debug("%s are not in the db", md5_sum)
+      return False
+    else:
+      logging.debug("%s are in the db", md5_sum)
+      pkg_stats = res.getOne()
+      return pkg_stats.stats_version == PACKAGE_STATS_VERSION
 
-  def StatsDirExists(self):
-    return os.path.isdir(self.GetStatsPath())
-
   def GetDirFormatPkg(self):
     if not self.dir_format_pkg:
       self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
@@ -1114,21 +1119,11 @@
 
   def CollectStats(self, force=False):
     """Lazy stats collection."""
-    if not self.StatsDirExists() or force:
-      self._CollectStats()
-      return
-    for stats_name in self.STAT_FILES + ["basic_stats"]:
-      file_name = in_file_name_pickle = os.path.join(
-          self.GetStatsPath(), "%s.pickle" % stats_name)
-      if not os.path.exists(file_name):
-        self._CollectStats()
-        return
-    f = open(file_name, "r")
-    obj = cPickle.load(f)
-    f.close()
-    saved_version = obj["stats_version"]
-    if saved_version < PACKAGE_STATS_VERSION:
-      self._CollectStats()
+    if force:
+      return self._CollectStats()
+    if not self.StatsExist():
+      return self._CollectStats()
+    return self.ReadSavedStats()
 
   def _CollectStats(self):
     """The list of variables needs to be synchronized with the one
@@ -1142,41 +1137,50 @@
     self.MakeStatsDir()
     dir_pkg = self.GetDirFormatPkg()
     logging.info("Collecting %s package statistics.", repr(dir_pkg.pkgname))
-    self.DumpObject(dir_pkg.ListBinaries(), "binaries")
-    self.DumpObject(self.GetBinaryDumpInfo(), "binaries_dump_info")
-    self.DumpObject(dir_pkg.GetDependencies(), "depends")
-    self.DumpObject(GetIsalist(), "isalist")
-    self.DumpObject(self.GetOverrides(), "overrides")
-    self.DumpObject(self.GetPkgchkData(), "pkgchk")
-    self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
-    self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
+    pkg_stats = {
+        "binaries": dir_pkg.ListBinaries(),
+        "binaries_dump_info": self.GetBinaryDumpInfo(),
+        "depends": dir_pkg.GetDependencies(),
+        "isalist": GetIsalist(),
+        "overrides": self.GetOverrides(),
+        "pkgchk": self.GetPkgchkData(),
+        "pkginfo": dir_pkg.GetParsedPkginfo(),
+        "pkgmap": dir_pkg.GetPkgmap().entries,
+        "bad_paths": dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES),
+        "basic_stats": self.GetBasicStats(),
+        "files_metadata": dir_pkg.GetFilesMetadata(),
+    }
+    db_pkg_stats = m.Srv4FileStats(md5_sum=self.GetMd5sum(),
+                                   pkgname=pkg_stats["basic_stats"]["pkgname"],
+                                   stats_version=PACKAGE_STATS_VERSION,
+                                   data=cPickle.dumps(pkg_stats))
     # The ldd -r reporting breaks on bigger packages during yaml saving.
     # It might work when yaml is disabled
     # self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
     # This check is currently disabled, let's save time by not collecting
     # these data.
     # self.DumpObject(self.GetDefinedSymbols(), "defined_symbols")
-    self.DumpObject(dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES), "bad_paths")
     # This one should be last, so that if the collection is interrupted
     # in one of the previous runs, the basic_stats.pickle file is not there
     # or not updated, and the collection is started again.
-    self.DumpObject(self.GetBasicStats(), "basic_stats")
-    self.DumpObject(dir_pkg.GetFilesMetadata(), "files_metadata")
+
     logging.debug("Statistics of %s have been collected.", repr(dir_pkg.pkgname))
+    return pkg_stats
 
   def GetAllStats(self):
-    if self.StatsExist():
+    logging.debug("GetAllStats()")
+    if not self.all_stats and self.StatsExist():
       self.all_stats = self.ReadSavedStats()
-    else:
-      self.CollectStats()
+    elif not self.all_stats:
+      self.all_stats = self.CollectStats()
     return self.all_stats
 
   def GetSavedOverrides(self):
     if not self.StatsExist():
       raise PackageError("Package stats not ready.")
-    override_stats = self.ReadObject("overrides")
-    overrides = [Override(**x) for x in override_stats]
-    return overrides
+    override_stats = self.GetAllStats()["overrides"]
+    override_list = [overrides.Override(**x) for x in override_stats]
+    return override_list
 
   def DumpObject(self, obj, name):
     """Saves an object."""
@@ -1196,35 +1200,18 @@
     f.close()
     self.all_stats[name] = obj
 
-  def ReadObject(self, name):
-    """Reads an object."""
-    stats_path = self.GetStatsPath()
-    in_file_name = os.path.join(stats_path, "%s.yml" % name)
-    in_file_name_pickle = os.path.join(stats_path, "%s.pickle" % name)
-    if os.path.exists(in_file_name_pickle):
-      f = open(in_file_name_pickle, "r")
-      obj = cPickle.load(f)
-      f.close()
-    elif os.path.exists(in_file_name):
-      f = open(in_file_name, "r")
-      obj = yaml.safe_load(f)
-      f.close()
-    else:
-      raise PackageError("Can't read %s nor %s."
-                         % (in_file_name, in_file_name_pickle))
-    return obj
-
   def ReadSavedStats(self):
-    all_stats = {}
-    for name in self.STAT_FILES:
-      all_stats[name] = self.ReadObject(name)
-    return all_stats
+    md5_sum = self.GetMd5sum()
+    res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+    return cPickle.loads(str(res.getOne().data))
 
   def _ParseLddDashRline(self, line):
     found_re = r"^\t(?P<soname>\S+)\s+=>\s+(?P<path_found>\S+)"
-    symbol_not_found_re = r"^\tsymbol not found:\s(?P<symbol>\S+)\s+\((?P<path_not_found>\S+)\)"
+    symbol_not_found_re = (r"^\tsymbol not found:\s(?P<symbol>\S+)\s+"
+                           r"\((?P<path_not_found>\S+)\)")
     only_so = r"^\t(?P<path_only>\S+)$"
-    version_so = r'^\t(?P<soname_version_not_found>\S+) \((?P<lib_name>\S+)\) =>\t \(version not found\)'
+    version_so = (r'^\t(?P<soname_version_not_found>\S+) '
+                  r'\((?P<lib_name>\S+)\) =>\t \(version not found\)')
     stv_protected = (r'^\trelocation \S+ symbol: (?P<relocation_symbol>\S+): '
                      r'file (?P<relocation_path>\S+): '
                      r'relocation bound to a symbol with STV_PROTECTED visibility$')

Modified: csw/mgar/gar/v2-sqlite/lib/python/models.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/models.py	2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/models.py	2010-07-05 14:14:07 UTC (rev 10435)
@@ -40,3 +40,9 @@
   path = sqlobject.UnicodeCol(notNone=True)
   line = sqlobject.UnicodeCol(notNone=True)
   basename_idx = sqlobject.DatabaseIndex('basename')
+
+class Srv4FileStats(sqlobject.SQLObject):
+  md5_sum = sqlobject.UnicodeCol(notNone=True, unique=True)
+  pkgname = sqlobject.UnicodeCol(length=255, notNone=True)
+  stats_version = sqlobject.IntCol(notNone=True)
+  data = sqlobject.UnicodeCol(notNone=True)

Modified: csw/mgar/gar/v2-sqlite/lib/python/overrides.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/overrides.py	2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/overrides.py	2010-07-05 14:14:07 UTC (rev 10435)
@@ -53,17 +53,17 @@
     return basket_a == basket_b
 
 
-def ApplyOverrides(error_tags, overrides):
+def ApplyOverrides(error_tags, override_list):
   """Filters out all the error tags that overrides apply to.
 
   O(N * M), but N and M are always small.
   """
   tags_after_overrides = []
   applied_overrides = set([])
-  provided_overrides = set(copy.copy(overrides))
+  provided_overrides = set(copy.copy(override_list))
   for tag in error_tags:
     override_applies = False
-    for override in overrides:
+    for override in override_list:
       if override.DoesApply(tag):
         override_applies = True
         applied_overrides.add(override)


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list