[csw-devel] SF.net SVN: gar:[9447] csw/mgar/gar/v2

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Mon Mar 29 18:44:43 CEST 2010


Revision: 9447
          http://gar.svn.sourceforge.net/gar/?rev=9447&view=rev
Author:   wahwah
Date:     2010-03-29 16:44:43 +0000 (Mon, 29 Mar 2010)

Log Message:
-----------
mGAR v2: checkpkg, optimizations for the whole catalog run.  More parsing capabilities for ldd -r, but it still can't handle KDE packages.

Modified Paths:
--------------
    csw/mgar/gar/v2/bin/analyze_module_results.py
    csw/mgar/gar/v2/bin/checkpkg
    csw/mgar/gar/v2/bin/checkpkg_collect_stats.py
    csw/mgar/gar/v2/lib/python/checkpkg.py
    csw/mgar/gar/v2/lib/python/checkpkg_test.py
    csw/mgar/gar/v2/lib/python/opencsw.py
    csw/mgar/gar/v2/lib/python/package_checks.py

Modified: csw/mgar/gar/v2/bin/analyze_module_results.py
===================================================================
--- csw/mgar/gar/v2/bin/analyze_module_results.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/bin/analyze_module_results.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -34,8 +34,8 @@
    unapplied_overrides) = checkpkg.ApplyOverrides(error_tags, overrides)
   exit_code = bool(tags_after_overrides)
   if tags_after_overrides:
-    print "There were errors reported."
-    print "If you know they are false positives, you can override them:"
+    print "If any of the reported errors were false positives, you can"
+    print "override them pasting the lines below to the GAR recipe."
     for tag in tags_after_overrides:
       print tag.ToGarSyntax()
   if unapplied_overrides:

Modified: csw/mgar/gar/v2/bin/checkpkg
===================================================================
--- csw/mgar/gar/v2/bin/checkpkg	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/bin/checkpkg	2010-03-29 16:44:43 UTC (rev 9447)
@@ -240,11 +240,20 @@
 	extra_options="--debug"
 fi
 
+if [[ -n "${MD5_SUMS_CATALOG_FILE}" ]]; then
+	catalog_options="--catalog=${MD5_SUMS_CATALOG_FILE}"
+else
+	catalog_options=""
+fi
+
 # /var/sadm/install/contents cache update
 ${command_basedir}/update_contents_cache.py
 if [[ "${SKIP_STATS_COLLECTION}" -eq 0 ]]; then
-  # Collects package stats to be later analyzed
-  ${command_basedir}/checkpkg_collect_stats.py ${extra_options} "$@"
+  # Collects package stats to be analyzed later
+  ${command_basedir}/checkpkg_collect_stats.py \
+      ${catalog_options} \
+      ${extra_options} \
+      "$@"
   if [[ "$?" -ne 0 ]]; then
     errmsg "Stats collection phase has failed."
   fi

Modified: csw/mgar/gar/v2/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2/bin/checkpkg_collect_stats.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/bin/checkpkg_collect_stats.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -27,15 +27,30 @@
   parser.add_option("-d", "--debug", dest="debug",
                     default=False, action="store_true",
                     help="Turn on debugging messages")
+  parser.add_option("-c", "--catalog", dest="catalog_file",
+                    help="Catalog file")
   options, args = parser.parse_args()
   if options.debug:
     logging.basicConfig(level=logging.DEBUG)
   else:
     logging.basicConfig(level=logging.INFO)
   logging.debug("Collecting statistics about given package files.")
-  logging.debug("calling: %s, please be patient", args)
+  args_display = args
+  if len(args_display) > 5:
+    args_display = args_display[:5] + ["...more..."]
+  logging.debug("Calling: %s, please be patient", args_display)
   packages = [opencsw.CswSrv4File(x, options.debug) for x in args]
+  if options.catalog_file:
+    # Using cached md5sums to save time: injecting md5sums
+    # from the catalog.
+    catalog = opencsw.OpencswCatalog(options.catalog_file)
+    md5s_by_basename = catalog.GetDataByBasename()
+    for pkg in packages:
+      basename = os.path.basename(pkg.pkg_path)
+      pkg.md5sum = md5s_by_basename[basename]["md5sum"]
   stats_list = [checkpkg.PackageStats(pkg) for pkg in packages]
+  md5s_by_basename = None # To free memory
+  catalog = None          # To free memory
   del(packages)
   stats_list.reverse()
   total_packages = len(stats_list)

Modified: csw/mgar/gar/v2/lib/python/checkpkg.py
===================================================================
--- csw/mgar/gar/v2/lib/python/checkpkg.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/lib/python/checkpkg.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -31,6 +31,7 @@
 RUNPATH = "runpath"
 SONAME = "soname"
 CONFIG_MTIME = "mtime"
+WRITE_YAML = False
 DO_NOT_REPORT_SURPLUS = set([u"CSWcommon", u"CSWcswclassutils", u"CSWisaexec"])
 DO_NOT_REPORT_MISSING = set([])
 DO_NOT_REPORT_MISSING_RE = [r"SUNW.*", r"\*SUNW.*"]
@@ -115,8 +116,8 @@
 #end if
 #if $gar_lines
 
-# Checkpkg suggests adding the following lines to the GAR recipe,
-# see above for details:
+# Checkpkg suggests adding the following lines to the GAR recipe:
+# This is a summary; see above for details.
 #for $line in $gar_lines
 $line
 #end for
@@ -603,6 +604,7 @@
                repr(self.tag_info)))
 
   def ToGarSyntax(self):
+    """Presents the error tag using GAR syntax."""
     msg_lines = []
     if self.msg:
       msg_lines.extend(textwrap(self.msg, 70,
@@ -945,7 +947,6 @@
   STAT_FILES = [
       "all_filenames",
       "bad_paths",
-      "basic_stats",
       "binaries",
       "binaries_dump_info",
       # "defined_symbols",
@@ -956,6 +957,9 @@
       "pkgchk",
       "pkginfo",
       "pkgmap",
+      # This entry needs to be last because of the assumption in the
+      # CollectStats() function.
+      "basic_stats",
   ]
 
   def __init__(self, srv4_pkg, stats_basedir=None, md5sum=None):
@@ -1161,9 +1165,13 @@
     if not self.StatsDirExists() or force:
       self._CollectStats()
       return
-    basic_stats_file = in_file_name_pickle = os.path.join(
-        self.GetStatsPath(), "basic_stats.pickle")
-    f = open(basic_stats_file, "r")
+    for stats_name in self.STAT_FILES + ["basic_stats"]:
+      file_name = in_file_name_pickle = os.path.join(
+          self.GetStatsPath(), "%s.pickle" % stats_name)
+      if not os.path.exists(file_name):
+        self._CollectStats()
+        return
+    f = open(file_name, "r")
     obj = cPickle.load(f)
     f.close()
     saved_version = obj["stats_version"]
@@ -1192,6 +1200,7 @@
     self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
     self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
     # The ldd -r reporting breaks on bigger packages during yaml saving.
+    # It might work when yaml is disabled
     # self.DumpObject(self.GetLddMinusRlines(), "ldd_dash_r")
     # This check is currently disabled, let's save time by not collecting
     # these data.
@@ -1221,13 +1230,15 @@
     """Saves an object."""
     stats_path = self.GetStatsPath()
     # yaml
-    out_file_name = os.path.join(stats_path, "%s.yml" % name)
-    logging.debug("DumpObject(): writing %s", repr(out_file_name))
-    f = open(out_file_name, "w")
-    f.write(yaml.safe_dump(obj))
-    f.close()
+    if WRITE_YAML:
+      out_file_name = os.path.join(stats_path, "%s.yml" % name)
+      logging.debug("DumpObject(): writing %s", repr(out_file_name))
+      f = open(out_file_name, "w")
+      f.write(yaml.safe_dump(obj))
+      f.close()
     # pickle
     out_file_name_pickle = os.path.join(stats_path, "%s.pickle" % name)
+    logging.debug("DumpObject(): writing %s", repr(out_file_name_pickle))
     f = open(out_file_name_pickle, "wb")
     cPickle.dump(obj, f)
     f.close()
@@ -1242,10 +1253,13 @@
       f = open(in_file_name_pickle, "r")
       obj = cPickle.load(f)
       f.close()
-    else:
+    elif os.path.exists(in_file_name):
       f = open(in_file_name, "r")
       obj = yaml.safe_load(f)
       f.close()
+    else:
+      raise PackageError("Can't read %s nor %s."
+                         % (in_file_name, in_file_name_pickle))
     return obj
 
   def ReadSavedStats(self):
@@ -1262,9 +1276,15 @@
     stv_protected = (r'^\trelocation \S+ symbol: (?P<relocation_symbol>\S+): '
                      r'file (?P<relocation_path>\S+): '
                      r'relocation bound to a symbol with STV_PROTECTED visibility$')
-    common_re = (r"(%s|%s|%s|%s|%s)"
+    sizes_differ = (r'^\trelocation \S+ sizes differ: (?P<sizes_differ_symbol>\S+)$')
+    sizes_info = (r'^\t\t\(file (?P<sizediff_file1>\S+) size=(?P<size1>0x\w+); '
+                  r'file (?P<sizediff_file2>\S+) size=(?P<size2>0x\w+)\)$')
+    sizes_one_used = (
+        r'^\t\t(?P<sizediffused_file>\S+) size used; '
+        'possible insufficient data copied$')
+    common_re = (r"(%s|%s|%s|%s|%s|%s|%s|%s)"
                  % (found_re, symbol_not_found_re, only_so, version_so,
-                    stv_protected))
+                    stv_protected, sizes_differ, sizes_info, sizes_one_used))
     m = re.match(common_re, line)
     response = {}
     if m:
@@ -1295,6 +1315,21 @@
         response["soname"] = None
         response["path"] = d["relocation_path"]
         response["symbol"] = d["relocation_symbol"]
+      elif d["sizes_differ_symbol"]:
+        response["state"] = 'sizes-differ'
+        response["soname"] = None
+        response["path"] = None
+        response["symbol"] = d["sizes_differ_symbol"]
+      elif d["sizediff_file1"]:
+        response["state"] = 'sizes-diff-info'
+        response["soname"] = None
+        response["path"] = "%s %s" % (d["sizediff_file1"], d["sizediff_file2"])
+        response["symbol"] = None
+      elif d["sizediffused_file"]:
+        response["state"] = 'sizes-diff-one-used'
+        response["soname"] = None
+        response["path"] = "%s %s" % (d["sizediff_file1"], d["sizediff_file2"])
+        response["symbol"] = None
       else:
         raise StdoutSyntaxError("Could not parse %s with %s"
                                 % (repr(line), common_re))

Modified: csw/mgar/gar/v2/lib/python/checkpkg_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/checkpkg_test.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/lib/python/checkpkg_test.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -30,6 +30,9 @@
 \tlibm.so.2 =>   /lib/libm.so.2
 \t/usr/lib/secure/s8_preload.so.1
 \tlibXext.so.0 (SUNW_1.1) =>\t (version not found)
+\trelocation R_SPARC_COPY symbol: ASN1_OCTET_STRING_it: file /opt/csw/lib/sparcv8plus+vis/libcrypto.so.0.9.8: relocation bound to a symbol with STV_PROTECTED visibility
+\trelocation R_SPARC_COPY sizes differ: _ZTI7QWidget
+\t\t(file /tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/bin/kslideshow.kss size=0x28; file /opt/csw/kde-gcc/lib/libqt-mt.so.3 size=0x20)
 """
 
 class GetLinesBySonameUnitTest(unittest.TestCase):
@@ -413,6 +416,40 @@
     }
     self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
 
+  def test_ParseLdd_SizesDiffer(self):
+    line = '\trelocation R_SPARC_COPY sizes differ: _ZTI7QWidget'
+    expected = {
+        'symbol': '_ZTI7QWidget',
+        'soname': None,
+        'path': None,
+        'state': 'sizes-differ',
+    }
+    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
+
+  def test_ParseLdd_SizesDifferInfo(self):
+    line = ('\t\t(file /tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/bin/'
+            'kslideshow.kss size=0x28; '
+            'file /opt/csw/kde-gcc/lib/libqt-mt.so.3 size=0x20)')
+    expected = {
+        'symbol': None,
+        'path': ('/tmp/pkg_GqCk0P/CSWkdeartworkgcc/root/opt/csw/kde-gcc/'
+                 'bin/kslideshow.kss /opt/csw/kde-gcc/lib/libqt-mt.so.3'),
+        'state': 'sizes-diff-info',
+        'soname': None,
+    }
+    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
+
+  def test_ParseLdd_SizesDifferOneUsed(self):
+    line = ('\t\t/opt/csw/kde-gcc/lib/libqt-mt.so.3 size used; '
+            'possible insufficient data copied')
+    expected = {
+        'symbol': None,
+        'path': '/opt/csw/kde-gcc/lib/libqt-mt.so.3',
+        'state': 'sizes-diff-one-used',
+        'soname': None,
+    }
+    self.assertEqual(expected, self.pkgstats._ParseLddDashRline(line))
+
   def test_ParseLddDashRlineManyLines(self):
     for line in LDD_R_OUTPUT_1.splitlines():
       parsed = self.pkgstats._ParseLddDashRline(line)

Modified: csw/mgar/gar/v2/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/lib/python/opencsw.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -406,6 +406,7 @@
     self.dir_format_pkg = None
     self.debug = debug
     self.pkgname = None
+    self.md5sum = None
 
   def __repr__(self):
     return u"CswSrv4File(%s)" % repr(self.pkg_path)
@@ -508,12 +509,14 @@
     return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
 
   def GetMd5sum(self):
-    logging.debug("GetMd5sum() (%s)", repr(self.pkg_path))
-    fp = open(self.pkg_path)
-    hash = hashlib.md5()
-    hash.update(fp.read())
-    fp.close()
-    return hash.hexdigest()
+    if not self.md5sum:
+      logging.debug("GetMd5sum() reading file %s", repr(self.pkg_path))
+      fp = open(self.pkg_path)
+      hash = hashlib.md5()
+      hash.update(fp.read())
+      fp.close()
+      self.md5sum = hash.hexdigest()
+    return self.md5sum
 
   def GetPkgchkOutput(self):
     """Returns: (exit code, stdout, stderr)."""
@@ -1046,3 +1049,53 @@
     result = os.path.exists(srv4_path)
     logging.debug("Srv4Exists(%s) => %s, %s", pkg_dir, repr(srv4_path), result)
     return result
+
+
+class OpencswCatalog(object):
+  """Represents a catalog file."""
+
+  def __init__(self, file_name):
+    self.file_name = file_name
+    self.by_basename = {}
+    self.catalog_data = []
+
+  def _GetCatalogData(self, fd):
+    cline_re_str = (
+        r"^"
+        r"(?P<catalogname>\S+)"
+        r"\s+"
+        r"(?P<version>\S+)"
+        r"\s+"
+        r"(?P<pkgname>\S+)"
+        r"\s+"
+        r"(?P<file_basename>\S+)"
+        r"\s+"
+        r"(?P<md5sum>\S+)"
+        r"\s+"
+        r"(?P<size>\S+)"
+        r"\s+"
+        r"(?P<deps>\S+)"
+        r"\s+"
+        r"(?P<none_thing>\S+)$"
+    )
+    cline_re = re.compile(cline_re_str)
+    for line in fd:
+      m = cline_re.match(line)
+      if m:
+        d = m.groupdict()
+        self.catalog_data.append(d)
+      else:
+        logging.debug("%s did not match the regex", repr(line))
+
+  def GetCatalogData(self):
+    if not self.catalog_data:
+      fd = open(self.file_name, "r")
+      self._GetCatalogData(fd)
+    return self.catalog_data
+
+  def GetDataByBasename(self):
+    if not self.by_basename:
+      cd = self.GetCatalogData()
+      for d in cd:
+        self.by_basename[d["file_basename"]] = d
+    return self.by_basename

Modified: csw/mgar/gar/v2/lib/python/package_checks.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_checks.py	2010-03-29 15:56:54 UTC (rev 9446)
+++ csw/mgar/gar/v2/lib/python/package_checks.py	2010-03-29 16:44:43 UTC (rev 9447)
@@ -626,11 +626,11 @@
   return errors
 
 
-def DisableCheckForMissingSymbolsDumb(pkg_data, error_mgr, logger, messenger):
+def DisabledCheckForMissingSymbolsDumb(pkg_data, error_mgr, logger, messenger):
   """Analyzes missing symbols reported by ldd -r.
 
   So far only made sense for perl modules.  Disables because it falls over on
-  big KDE packages.
+  big KDE packages.  During pickling (serialization), Python runs out of memory.
   """
   pkgname = pkg_data["basic_stats"]["pkgname"]
   if not re.match(SYMBOLS_CHECK_ONLY_FOR, pkgname):


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list