[csw-devel] SF.net SVN: gar:[10435] csw/mgar/gar/v2-sqlite
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Mon Jul 5 16:14:07 CEST 2010
Revision: 10435
http://gar.svn.sourceforge.net/gar/?rev=10435&view=rev
Author: wahwah
Date: 2010-07-05 14:14:07 +0000 (Mon, 05 Jul 2010)
Log Message:
-----------
mGAR v2-sqlite: First working version, needs more performance tweaks.
Modified Paths:
--------------
csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py
csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py
csw/mgar/gar/v2-sqlite/lib/python/models.py
csw/mgar/gar/v2-sqlite/lib/python/overrides.py
Modified: csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py 2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/bin/checkpkg_collect_stats.py 2010-07-05 14:14:07 UTC (rev 10435)
@@ -39,7 +39,7 @@
args_display = args
if len(args_display) > 5:
args_display = args_display[:5] + ["...more..."]
- logging.debug("Calling: %s, please be patient", args_display)
+ logging.debug("Processing: %s, please be patient", args_display)
packages = [opencsw.CswSrv4File(x, options.debug) for x in args]
if options.catalog_file:
# Using cached md5sums to save time: injecting md5sums
Modified: csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py 2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/checkpkg.py 2010-07-05 14:14:07 UTC (rev 10435)
@@ -24,13 +24,14 @@
import yaml
from Cheetah import Template
import opencsw
+import overrides
import package_checks
import models as m
import configuration as c
import tag
DEBUG_BREAK_PKGMAP_AFTER = False
-DB_SCHEMA_VERSION = 3L
+DB_SCHEMA_VERSION = 4L
PACKAGE_STATS_VERSION = 6L
SYSTEM_PKGMAP = "/var/sadm/install/contents"
NEEDED_SONAMES = "needed sonames"
@@ -204,22 +205,48 @@
return None
-class SystemPkgmap(object):
- """A class to hold and manipulate the /var/sadm/install/contents file."""
+class DatabaseClient(object):
- STOP_PKGS = ["SUNWbcp", "SUNWowbcp", "SUNWucb"]
CHECKPKG_DIR = ".checkpkg"
SQLITE3_DBNAME_TMPL = "var-sadm-install-contents-cache-%s"
+ TABLES = (m.CswConfig,
+ m.CswFile,
+ m.CswPackage,
+ m.Srv4FileStats)
+ def __init__(self, debug=False):
+ self.fqdn = socket.getfqdn()
+ self.checkpkg_dir = os.path.join(os.environ["HOME"], self.CHECKPKG_DIR)
+ self.db_path = os.path.join(self.checkpkg_dir,
+ self.SQLITE3_DBNAME_TMPL % self.fqdn)
+ self.debug = debug
+
+ def InitializeSqlobject(self):
+ logging.debug("Connecting to the %s database.", self.db_path)
+ self.sqo_conn = sqlobject.connectionForURI(
+ 'sqlite:%s' % self.db_path, debug=(self.debug and False))
+ sqlobject.sqlhub.processConnection = self.sqo_conn
+
+ def CreateTables(self):
+ for table in self.TABLES:
+ table.createTable(ifNotExists=True)
+
+ def IsDatabaseGoodSchema(self):
+ good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
+ return good_version
+
+
+class SystemPkgmap(DatabaseClient):
+ """A class to hold and manipulate the /var/sadm/install/contents file."""
+
+ STOP_PKGS = ["SUNWbcp", "SUNWowbcp", "SUNWucb"]
+
def __init__(self, system_pkgmap_files=None, debug=False):
"""There is no need to re-parse it each time.
Read it slowly the first time and cache it for later."""
+ super(SystemPkgmap, self).__init__(debug=debug)
self.cache = {}
- self.checkpkg_dir = os.path.join(os.environ["HOME"], self.CHECKPKG_DIR)
- self.fqdn = socket.getfqdn()
- self.db_path = os.path.join(self.checkpkg_dir,
- self.SQLITE3_DBNAME_TMPL % self.fqdn)
self.file_mtime = None
self.cache_mtime = None
self.initialized = False
@@ -227,25 +254,11 @@
self.system_pkgmap_files = [SYSTEM_PKGMAP]
else:
self.system_pkgmap_files = system_pkgmap_files
- self.debug = debug
def _LazyInitializeDatabase(self):
if not self.initialized:
self.InitializeDatabase()
- def InitializeSqlobject(self):
- if True:
- logging.debug("Connecting to the %s database.", self.db_path)
- self.sqo_conn = sqlobject.connectionForURI(
- 'sqlite:%s' % self.db_path, debug=(self.debug and False))
- else:
- # TODO: Use a configuration file to store the credentials
- logging.debug("Connecting MySQL.")
- self.sqo_conn = sqlobject.connectionForURI(
- 'mysql://checkpkg:Nid3owlOn@mysql/checkpkg',
- debug=(self.debug and False))
- sqlobject.sqlhub.processConnection = self.sqo_conn
-
def InitializeRawDb(self):
"""It's necessary for low level operations."""
if True:
@@ -277,11 +290,6 @@
self.PopulateDatabase()
self.initialized = True
- def CreateTables(self):
- m.CswConfig.createTable(ifNotExists=True)
- m.CswPackage.createTable(ifNotExists=True)
- m.CswFile.createTable(ifNotExists=True)
-
def PopulateDatabase(self):
"""Imports data into the database.
@@ -496,10 +504,6 @@
schema_on_disk = res.getOne().int_value
return schema_on_disk
- def IsDatabaseGoodSchema(self):
- good_version = self.GetDatabaseSchemaVersion() >= DB_SCHEMA_VERSION
- return good_version
-
def IsDatabaseUpToDate(self):
f_mtime = self.GetFileMtime()
d_mtime = self.GetDatabaseMtime()
@@ -516,24 +520,14 @@
repr(good_version), repr(fresh))
return fresh and good_version
- def SoftDropTable(self, tablename):
- c = self.conn.cursor()
- try:
- # This doesn't accept placeholders.
- c.execute("DROP TABLE %s;" % tablename)
- except sqlite3.OperationalError, e:
- logging.warn("sqlite3.OperationalError: %s", e)
-
def PurgeDatabase(self, drop_tables=False):
if drop_tables:
- # for table_name in ("config", "systempkgmap", "packages"):
- # self.SoftDropTable(table_name)
- for table in (m.CswConfig, m.CswFile, m.CswPackage):
+ for table in self.TABLES:
if table.tableExists():
table.dropTable()
else:
- logging.info("Deleting all rows from the cache database")
- for table in (m.CswConfig, m.CswFile, m.CswPackage):
+ logging.info("Truncating all tables")
+ for table in self.TABLES:
table.clearTable()
def GetInstalledPackages(self):
@@ -896,8 +890,13 @@
return isalist
-class PackageStats(object):
- """Collects stats about a package and saves it."""
+class PackageStats(DatabaseClient):
+ """Collects stats about a package and saves it.
+
+ TODO: Maintain a global database connection instead of creating one for each
+ instantiated object.
+ TODO: Store overrides in a separate table for performance.
+ """
# This list needs to be synchronized with the CollectStats() method.
STAT_FILES = [
"bad_paths",
@@ -917,7 +916,8 @@
"files_metadata",
]
- def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None):
+ def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None, debug=False):
+ super(PackageStats, self).__init__(debug=debug)
self.srv4_pkg = srv4_pkg
self.md5sum = md5sum
self.dir_format_pkg = None
@@ -928,6 +928,7 @@
home = os.environ["HOME"]
parts = [home, ".checkpkg", "stats"]
self.stats_basedir = os.path.join(*parts)
+ self.InitializeSqlobject()
def GetPkgchkData(self):
ret, stdout, stderr = self.srv4_pkg.GetPkgchkOutput()
@@ -957,14 +958,18 @@
Returns:
bool
"""
- if not self.StatsDirExists():
- return False
# More checks can be added in the future.
- return True
+ md5_sum = self.GetMd5sum()
+ logging.debug("StatsExist() md5_sum=%s", md5_sum)
+ res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+ if not res.count():
+ logging.debug("%s are not in the db", md5_sum)
+ return False
+ else:
+ logging.debug("%s are in the db", md5_sum)
+ pkg_stats = res.getOne()
+ return pkg_stats.stats_version == PACKAGE_STATS_VERSION
- def StatsDirExists(self):
- return os.path.isdir(self.GetStatsPath())
-
def GetDirFormatPkg(self):
if not self.dir_format_pkg:
self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
@@ -1114,21 +1119,11 @@
def CollectStats(self, force=False):
"""Lazy stats collection."""
- if not self.StatsDirExists() or force:
- self._CollectStats()
- return
- for stats_name in self.STAT_FILES + ["basic_stats"]:
- file_name = in_file_name_pickle = os.path.join(
- self.GetStatsPath(), "%s.pickle" % stats_name)
- if not os.path.exists(file_name):
- self._CollectStats()
- return
- f = open(file_name, "r")
- obj = cPickle.load(f)
- f.close()
- saved_version = obj["stats_version"]
- if saved_version < PACKAGE_STATS_VERSION:
- self._CollectStats()
+ if force:
+ return self._CollectStats()
+ if not self.StatsExist():
+ return self._CollectStats()
+ return self.ReadSavedStats()
def _CollectStats(self):
"""The list of variables needs to be synchronized with the one
@@ -1142,41 +1137,50 @@
self.MakeStatsDir()
dir_pkg = self.GetDirFormatPkg()
logging.info("Collecting %s package statistics.", repr(dir_pkg.pkgname))
- self.DumpObject(dir_pkg.ListBinaries(), "binaries")
- self.DumpObject(self.GetBinaryDumpInfo(), "binaries_dump_info")
- self.DumpObject(dir_pkg.GetDependencies(), "depends")
- self.DumpObject(GetIsalist(), "isalist")
- self.DumpObject(self.GetOverrides(), "overrides")
- self.DumpObject(self.GetPkgchkData(), "pkgchk")
- self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
- self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
+ pkg_stats = {
+ "binaries": dir_pkg.ListBinaries(),
+ "binaries_dump_info": self.GetBinaryDumpInfo(),
+ "depends": dir_pkg.GetDependencies(),
+ "isalist": GetIsalist(),
+ "overrides": self.GetOverrides(),
+ "pkgchk": self.GetPkgchkData(),
+ "pkginfo": dir_pkg.GetParsedPkginfo(),
+ "pkgmap": dir_pkg.GetPkgmap().entries,
+ "bad_paths": dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES),
+ "basic_stats": self.GetBasicStats(),
+ "files_metadata": dir_pkg.GetFilesMetadata(),
+ }
+ db_pkg_stats = m.Srv4FileStats(md5_sum=self.GetMd5sum(),
+ pkgname=pkg_stats["basic_stats"]["pkgname"],
+ stats_version=PACKAGE_STATS_VERSION,
+ data=cPickle.dumps(pkg_stats))
# The ldd -r reporting breaks on bigger packages during yaml saving.
# It might work when yaml is disabled
# self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
# This check is currently disabled, let's save time by not collecting
# these data.
# self.DumpObject(self.GetDefinedSymbols(), "defined_symbols")
- self.DumpObject(dir_pkg.GetFilesContaining(BAD_CONTENT_REGEXES), "bad_paths")
# This one should be last, so that if the collection is interrupted
# in one of the previous runs, the basic_stats.pickle file is not there
# or not updated, and the collection is started again.
- self.DumpObject(self.GetBasicStats(), "basic_stats")
- self.DumpObject(dir_pkg.GetFilesMetadata(), "files_metadata")
+
logging.debug("Statistics of %s have been collected.", repr(dir_pkg.pkgname))
+ return pkg_stats
def GetAllStats(self):
- if self.StatsExist():
+ logging.debug("GetAllStats()")
+ if not self.all_stats and self.StatsExist():
self.all_stats = self.ReadSavedStats()
- else:
- self.CollectStats()
+ elif not self.all_stats:
+ self.all_stats = self.CollectStats()
return self.all_stats
def GetSavedOverrides(self):
if not self.StatsExist():
raise PackageError("Package stats not ready.")
- override_stats = self.ReadObject("overrides")
- overrides = [Override(**x) for x in override_stats]
- return overrides
+ override_stats = self.GetAllStats()["overrides"]
+ override_list = [overrides.Override(**x) for x in override_stats]
+ return override_list
def DumpObject(self, obj, name):
"""Saves an object."""
@@ -1196,35 +1200,18 @@
f.close()
self.all_stats[name] = obj
- def ReadObject(self, name):
- """Reads an object."""
- stats_path = self.GetStatsPath()
- in_file_name = os.path.join(stats_path, "%s.yml" % name)
- in_file_name_pickle = os.path.join(stats_path, "%s.pickle" % name)
- if os.path.exists(in_file_name_pickle):
- f = open(in_file_name_pickle, "r")
- obj = cPickle.load(f)
- f.close()
- elif os.path.exists(in_file_name):
- f = open(in_file_name, "r")
- obj = yaml.safe_load(f)
- f.close()
- else:
- raise PackageError("Can't read %s nor %s."
- % (in_file_name, in_file_name_pickle))
- return obj
-
def ReadSavedStats(self):
- all_stats = {}
- for name in self.STAT_FILES:
- all_stats[name] = self.ReadObject(name)
- return all_stats
+ md5_sum = self.GetMd5sum()
+ res = m.Srv4FileStats.select(m.Srv4FileStats.q.md5_sum==md5_sum)
+ return cPickle.loads(str(res.getOne().data))
def _ParseLddDashRline(self, line):
found_re = r"^\t(?P<soname>\S+)\s+=>\s+(?P<path_found>\S+)"
- symbol_not_found_re = r"^\tsymbol not found:\s(?P<symbol>\S+)\s+\((?P<path_not_found>\S+)\)"
+ symbol_not_found_re = (r"^\tsymbol not found:\s(?P<symbol>\S+)\s+"
+ r"\((?P<path_not_found>\S+)\)")
only_so = r"^\t(?P<path_only>\S+)$"
- version_so = r'^\t(?P<soname_version_not_found>\S+) \((?P<lib_name>\S+)\) =>\t \(version not found\)'
+ version_so = (r'^\t(?P<soname_version_not_found>\S+) '
+ r'\((?P<lib_name>\S+)\) =>\t \(version not found\)')
stv_protected = (r'^\trelocation \S+ symbol: (?P<relocation_symbol>\S+): '
r'file (?P<relocation_path>\S+): '
r'relocation bound to a symbol with STV_PROTECTED visibility$')
Modified: csw/mgar/gar/v2-sqlite/lib/python/models.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/models.py 2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/models.py 2010-07-05 14:14:07 UTC (rev 10435)
@@ -40,3 +40,9 @@
path = sqlobject.UnicodeCol(notNone=True)
line = sqlobject.UnicodeCol(notNone=True)
basename_idx = sqlobject.DatabaseIndex('basename')
+
+class Srv4FileStats(sqlobject.SQLObject):
+ md5_sum = sqlobject.UnicodeCol(notNone=True, unique=True)
+ pkgname = sqlobject.UnicodeCol(length=255, notNone=True)
+ stats_version = sqlobject.IntCol(notNone=True)
+ data = sqlobject.UnicodeCol(notNone=True)
Modified: csw/mgar/gar/v2-sqlite/lib/python/overrides.py
===================================================================
--- csw/mgar/gar/v2-sqlite/lib/python/overrides.py 2010-07-05 14:12:48 UTC (rev 10434)
+++ csw/mgar/gar/v2-sqlite/lib/python/overrides.py 2010-07-05 14:14:07 UTC (rev 10435)
@@ -53,17 +53,17 @@
return basket_a == basket_b
-def ApplyOverrides(error_tags, overrides):
+def ApplyOverrides(error_tags, override_list):
"""Filters out all the error tags that overrides apply to.
O(N * M), but N and M are always small.
"""
tags_after_overrides = []
applied_overrides = set([])
- provided_overrides = set(copy.copy(overrides))
+ provided_overrides = set(copy.copy(override_list))
for tag in error_tags:
override_applies = False
- for override in overrides:
+ for override in override_list:
if override.DoesApply(tag):
override_applies = True
applied_overrides.add(override)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list