[csw-devel] SF.net SVN: gar:[11220] csw/mgar/gar/v2

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sun Oct 10 22:36:33 CEST 2010


Revision: 11220
          http://gar.svn.sourceforge.net/gar/?rev=11220&view=rev
Author:   wahwah
Date:     2010-10-10 20:36:33 +0000 (Sun, 10 Oct 2010)

Log Message:
-----------
mGAR v2: checkpkg, refactoring: Split off of hachoir-dependent bits to
a separate module.

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/opencsw.py
    csw/mgar/gar/v2/lib/python/opencsw_test.py
    csw/mgar/gar/v2/lib/python/package_checks.py
    csw/mgar/gar/v2/lib/python/package_checks_test.py
    csw/mgar/gar/v2/lib/python/sharedlib_utils.py
    csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py
    csw/mgar/gar/v2/tests/run_tests.py

Added Paths:
-----------
    csw/mgar/gar/v2/lib/python/catalog.py
    csw/mgar/gar/v2/lib/python/catalog_test.py
    csw/mgar/gar/v2/lib/python/package.py
    csw/mgar/gar/v2/lib/python/package_test.py

Added: csw/mgar/gar/v2/lib/python/catalog.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/catalog.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,139 @@
+#!/usr/bin/env python2.6
+
+import re
+
+class OpencswCatalogBuilder(object):
+
+  def __init__(self, product_dir, catalog_dir):
+    self.product_dir = product_dir
+    self.catalog_dir = catalog_dir
+
+  def Run(self):
+    pkg_dirs = os.listdir(self.product_dir)
+    for pkg_dir in pkg_dirs:
+      pkg_path = os.path.join(self.product_dir, pkg_dir)
+      pkginfo_path = os.path.join(pkg_path, "pkginfo")
+      if (os.path.isdir(pkg_path)
+            and
+          os.path.exists(pkginfo_path)):
+        if not self.Srv4Exists(pkg_path):
+          pkg = None
+          tmpdir = None
+          try:
+            tmpdir = tempfile.mkdtemp(prefix="sunw-pkg-")
+            logging.debug("Copying %s to %s", repr(pkg_path), repr(tmpdir))
+            tmp_pkg_dir = os.path.join(tmpdir, pkg_dir)
+            shutil.copytree(pkg_path, tmp_pkg_dir, symlinks=True)
+            pkg = DirectoryFormatPackage(tmp_pkg_dir)
+            # Replacing NAME= in the pkginfo, setting it to the catalog name.
+            pkg.ResetNameProperty()
+            pkg.ToSrv4(self.catalog_dir)
+          except IOError, e:
+            logging.warn("%s has failed: %s", pkg_path, e)
+          finally:
+            if pkg:
+              del(pkg)
+            if os.path.exists(tmpdir):
+              shutil.rmtree(tmpdir)
+        else:
+          logging.warn("srv4 file for %s already exists, skipping", pkg_path)
+      else:
+        logging.warn("%s is not a directory.", pkg_path)
+
+
+  def Srv4Exists(self, pkg_dir):
+    pkg = DirectoryFormatPackage(pkg_dir)
+    srv4_name = pkg.GetSrv4FileName()
+    srv4_name += ".gz"
+    srv4_path = os.path.join(self.catalog_dir, srv4_name)
+    result = os.path.exists(srv4_path)
+    logging.debug("Srv4Exists(%s) => %s, %s", pkg_dir, repr(srv4_path), result)
+    return result
+
+
+class OpencswCatalog(object):
+  """Represents a catalog file."""
+
+  def __init__(self, file_name):
+    self.file_name = file_name
+    self.by_basename = None
+    self.catalog_data = None
+
+  def _ParseCatalogLine(self, line):
+    cline_re_str_list = [
+        (
+            r"^"
+            # tmux
+            r"(?P<catalogname>\S+)"
+            r"\s+"
+            # 1.2,REV=2010.05.17
+            r"(?P<version>\S+)"
+            r"\s+"
+            # CSWtmux
+            r"(?P<pkgname>\S+)"
+            r"\s+"
+            # tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz
+            r"(?P<file_basename>\S+)"
+            r"\s+"
+            # 145351cf6186fdcadcd169b66387f72f
+            r"(?P<md5sum>\S+)"
+            r"\s+"
+            # 214091
+            r"(?P<size>\S+)"
+            r"\s+"
+            # CSWcommon|CSWlibevent
+            r"(?P<deps>\S+)"
+            r"\s+"
+            # none
+            r"(?P<none_thing_1>\S+)"
+            # An optional empty field.
+            r"("
+              r"\s+"
+              # none\n'
+              r"(?P<none_thing_2>\S+)"
+            r")?"
+            r"$"
+        ),
+    ]
+    cline_re_list = [re.compile(x) for x in cline_re_str_list]
+    matched = False
+    d = None
+    for cline_re in cline_re_list:
+      m = cline_re.match(line)
+      if m:
+        d = m.groupdict()
+        matched = True
+        if not d:
+          raise CatalogLineParseError("Parsed %s data is empty" % repr(line))
+    if not matched:
+      raise CatalogLineParseError("No regexes matched %s" % repr(line))
+    return d
+
+  def _GetCatalogData(self, fd):
+    catalog_data = []
+    for line in fd:
+      try:
+        parsed = self._ParseCatalogLine(line)
+        catalog_data.append(parsed)
+      except CatalogLineParseError, e:
+        logging.debug("Could not parse %s, %s", repr(line), e)
+    return catalog_data
+
+  def GetCatalogData(self):
+    if not self.catalog_data:
+      fd = open(self.file_name, "r")
+      self.catalog_data = self._GetCatalogData(fd)
+    return self.catalog_data
+
+  def GetDataByBasename(self):
+    if not self.by_basename:
+      self.by_basename = {}
+      cd = self.GetCatalogData()
+      for d in cd:
+        if "file_basename" not in d:
+          logging.error("%s is missing the file_basename field", d)
+        self.by_basename[d["file_basename"]] = d
+    return self.by_basename
+
+
+

Added: csw/mgar/gar/v2/lib/python/catalog_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog_test.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/catalog_test.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,29 @@
+#!/usr/bin/env python2.6
+
+import unittest
+import catalog
+
+class OpencswCatalogUnitTest(unittest.TestCase):
+
+  def test_ParseCatalogLine_1(self):
+    line = (
+        'tmux 1.2,REV=2010.05.17 CSWtmux '
+        'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz '
+        '145351cf6186fdcadcd169b66387f72f 214091 '
+        'CSWcommon|CSWlibevent none none\n')
+    oc = catalog.OpencswCatalog(None)
+    parsed = oc._ParseCatalogLine(line)
+    expected = {'catalogname': 'tmux',
+                'deps': 'CSWcommon|CSWlibevent',
+                'file_basename': 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz',
+                'md5sum': '145351cf6186fdcadcd169b66387f72f',
+                'none_thing_1': 'none',
+                'none_thing_2': 'none',
+                'pkgname': 'CSWtmux',
+                'size': '214091',
+                'version': '1.2,REV=2010.05.17'}
+    self.assertEquals(expected, parsed)
+
+
+if __name__ == '__main__':
+  unittest.main()

Modified: csw/mgar/gar/v2/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/opencsw.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -15,9 +15,7 @@
 import copy
 import datetime
 import difflib
-import hachoir_parser as hp
 import hashlib
-import magic
 import logging
 import os
 import os.path
@@ -30,12 +28,8 @@
 import overrides
 import configuration as c
 from Cheetah import Template
+import sharedlib_utils as su
 
-# Suppress unhelpful warnings
-# http://bitbucket.org/haypo/hachoir/issue/23
-import hachoir_core.config
-hachoir_core.config.quiet = True
-
 ARCH_SPARC = "sparc"
 ARCH_i386 = "i386"
 ARCH_ALL = "all"
@@ -425,184 +419,6 @@
     return editor
 
 
-class ShellMixin(object):
-
-  def ShellCommand(self, args, quiet=False):
-    logging.debug("Calling: %s", repr(args))
-    if quiet:
-      process = subprocess.Popen(args,
-                                 stdout=subprocess.PIPE,
-                                 stderr=subprocess.PIPE)
-      stdout, stderr = process.communicate()
-      retcode = process.wait()
-    else:
-      retcode = subprocess.call(args)
-    if retcode:
-      raise Error("Running %s has failed." % repr(args))
-    return retcode
-
-
-class CswSrv4File(ShellMixin, object):
-  """Represents a package in the srv4 format (pkg)."""
-
-  def __init__(self, pkg_path, debug=False):
-    self.pkg_path = pkg_path
-    self.workdir = None
-    self.gunzipped_path = None
-    self.transformed = False
-    self.dir_format_pkg = None
-    self.debug = debug
-    self.pkgname = None
-    self.md5sum = None
-    self.mtime = None
-
-  def __repr__(self):
-    return u"CswSrv4File(%s)" % repr(self.pkg_path)
-
-  def GetWorkDir(self):
-    if not self.workdir:
-      self.workdir = tempfile.mkdtemp(prefix="pkg_")
-      fd = open(os.path.join(self.workdir, "admin"), "w")
-      fd.write(ADMIN_FILE_CONTENT)
-      fd.close()
-    return self.workdir
-
-  def GetAdminFilePath(self):
-    return os.path.join(self.GetWorkDir(), "admin")
-
-  def GetGunzippedPath(self):
-    if not self.gunzipped_path:
-      gzip_suffix = ".gz"
-      pkg_suffix = ".pkg"
-      if self.pkg_path.endswith("%s%s" % (pkg_suffix, gzip_suffix)):
-        # Causing the class to stat the .gz file.  This call throws away the
-        # result, but the result will be cached as a class instance member.
-        self.GetMtime()
-        base_name_gz = os.path.split(self.pkg_path)[1]
-        shutil.copy(self.pkg_path, self.GetWorkDir())
-        self.pkg_path = os.path.join(self.GetWorkDir(), base_name_gz)
-        args = ["gunzip", "-f", self.pkg_path]
-        unused_retcode = self.ShellCommand(args)
-        self.gunzipped_path = self.pkg_path[:(-len(gzip_suffix))]
-      elif self.pkg_path.endswith(pkg_suffix):
-        self.gunzipped_path = self.pkg_path
-      else:
-        raise Error("The file name should end in either "
-                    "%s or %s, but it's %s."
-                    % (gzip_suffix, pkg_suffix, repr(self.pkg_path)))
-    return self.gunzipped_path
-
-  def Pkgtrans(self, src_file, destdir, pkgname):
-    """A proxy for the pkgtrans command.
-
-    This requires custom-pkgtrans to be available.
-    """
-    if not os.path.isdir(destdir):
-      raise PackageError("%s doesn't exist or is not a directory" % destdir)
-    args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
-            src_file,
-            destdir,
-            pkgname ]
-    pkgtrans_proc = subprocess.Popen(args,
-                                     stdout=subprocess.PIPE,
-                                     stderr=subprocess.PIPE)
-    stdout, stderr = pkgtrans_proc.communicate()
-    ret = pkgtrans_proc.wait()
-    if ret:
-      logging.error(stdout)
-      logging.error(stderr)
-      logging.error("% has failed" % args)
-
-  def GetPkgname(self):
-    """It's necessary to figure out the pkgname from the .pkg file.
-    # nawk 'NR == 2 {print $1; exit;} $f
-    """
-    if not self.pkgname:
-      gunzipped_path = self.GetGunzippedPath()
-      args = ["nawk", "NR == 2 {print $1; exit;}", gunzipped_path]
-      nawk_proc = subprocess.Popen(args, stdout=subprocess.PIPE)
-      stdout, stderr = nawk_proc.communicate()
-      ret_code = nawk_proc.wait()
-      self.pkgname = stdout.strip()
-      logging.debug("GetPkgname(): %s", repr(self.pkgname))
-    return self.pkgname
-
-  def GetMtime(self):
-    if not self.mtime:
-      # This fails if the file is not there.
-      s = os.stat(self.pkg_path)
-      t = time.gmtime(s.st_mtime)
-      self.mtime = datetime.datetime(*t[:6])
-    return self.mtime
-
-  def TransformToDir(self):
-    """Transforms the file to the directory format.
-
-    This uses the Pkgtrans function at the top, because pkgtrans behaves
-    differently on Solaris 8 and 10.  Having our own implementation helps
-    achieve consistent behavior.
-    """
-    if not self.transformed:
-      gunzipped_path = self.GetGunzippedPath()
-      pkgname = self.GetPkgname()
-      args = [os.path.join(os.path.dirname(__file__),
-                           "..", "..", "bin", "custom-pkgtrans"),
-              gunzipped_path, self.GetWorkDir(), pkgname]
-      logging.debug("transforming: %s", args)
-      unused_retcode = self.ShellCommand(args, quiet=(not self.debug))
-      dirs = self.GetDirs()
-      if len(dirs) != 1:
-        raise Error("Need exactly one package in the package stream: "
-                    "%s." % (dirs))
-      self.dir_format_pkg = DirectoryFormatPackage(dirs[0])
-      self.transformed = True
-
-  def GetDirFormatPkg(self):
-    self.TransformToDir()
-    return self.dir_format_pkg
-
-  def GetDirs(self):
-    paths = os.listdir(self.GetWorkDir())
-    dirs = []
-    for p in paths:
-      abspath = os.path.join(self.GetWorkDir(), p)
-      if os.path.isdir(abspath):
-        dirs.append(abspath)
-    return dirs
-
-  def GetPkgmap(self, analyze_permissions, strip=None):
-    dir_format_pkg = self.GetDirFormatPkg()
-    return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
-
-  def GetMd5sum(self):
-    if not self.md5sum:
-      logging.debug("GetMd5sum() reading file %s", repr(self.pkg_path))
-      fp = open(self.pkg_path)
-      hash = hashlib.md5()
-      hash.update(fp.read())
-      fp.close()
-      self.md5sum = hash.hexdigest()
-    return self.md5sum
-
-  def GetPkgchkOutput(self):
-    """Returns: (exit code, stdout, stderr)."""
-    args = ["pkgchk", "-d", self.GetGunzippedPath(), "all"]
-    pkgchk_proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = pkgchk_proc.communicate()
-    ret = pkgchk_proc.wait()
-    return ret, stdout, stderr
-
-  def GetFileMtime(self):
-    if not self.mtime:
-      self.mtime = os.stat(self.pkg_path).st_mtime
-    return self.mtime
-
-  def __del__(self):
-    if self.workdir:
-      logging.debug("Removing %s", repr(self.workdir))
-      shutil.rmtree(self.workdir)
-
-
 def ParsePkginfo(lines):
   """Parses a pkginfo data."""
   d = {}
@@ -659,12 +475,13 @@
   catalogname_list = copy.copy(catalogname_list)
   if len(catalogname_list) == 1:
     return catalogname_list[0]
-  current_substring = catalogname_list.pop()
-  while catalogname_list and current_substring:
-    substring_set = LongestCommonSubstring(current_substring,
-                                           catalogname_list.pop())
-    if substring_set:
-      current_substring = list(substring_set)[0]
+  #current_substring = catalogname_list.pop()
+  #while catalogname_list and current_substring:
+  #  substring_set = su.LongestCommonSubstring(current_substring,
+  #                                            catalogname_list.pop())
+  #  if substring_set:
+  #    current_substring = list(substring_set)[0]
+  current_substring = su.CollectionLongestCommonSubstring(catalogname_list)
   # If it's something like foo_, make it foo.
   while current_substring and not current_substring[-1].isalnum():
     current_substring = current_substring[:-1]
@@ -673,27 +490,6 @@
   return "various packages"
 
 
-def LongestCommonSubstring(S, T):
-  """Stolen from Wikibooks
-
-  http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring#Python"""
-  m = len(S); n = len(T)
-  L = [[0] * (n+1) for i in xrange(m+1)]
-  LCS = set()
-  longest = 0
-  for i in xrange(m):
-    for j in xrange(n):
-      if S[i] == T[j]:
-        v = L[i][j] + 1
-        L[i+1][j+1] = v
-        if v > longest:
-          longest = v
-          LCS = set()
-        if v == longest:
-          LCS.add(S[i-v+1:i+1])
-  return LCS
-
-
 def PkginfoToSrv4Name(pkginfo_dict):
   SRV4_FN_TMPL = "%(catalog_name)s-%(version)s-%(osver)s-%(arch)s-%(tag)s.pkg"
   fn_data = {}
@@ -708,304 +504,6 @@
   return SRV4_FN_TMPL % fn_data
 
 
-class DirectoryFormatPackage(ShellMixin, object):
-  """Represents a package in the directory format.
-
-  Allows some read-write operations.
-  """
-  def __init__(self, directory):
-    self.directory = directory
-    self.pkgname = os.path.basename(directory)
-    self.pkgpath = self.directory
-    self.pkginfo_dict = None
-    self.binaries = None
-    self.file_paths = None
-    self.files_metadata = None
-
-  def GetCatalogname(self):
-    """Returns the catalog name of the package.
-
-    A bit hacky.  Looks for the first word of the NAME field in the package.
-    """
-    pkginfo = self.GetParsedPkginfo()
-    words = re.split(c.WS_RE, pkginfo["NAME"])
-    return words[0]
-
-  def GetParsedPkginfo(self):
-    if not self.pkginfo_dict:
-      pkginfo_fd = open(self.GetPkginfoFilename(), "r")
-      self.pkginfo_dict = ParsePkginfo(pkginfo_fd)
-      pkginfo_fd.close()
-    return self.pkginfo_dict
-
-  def GetSrv4FileName(self):
-    """Guesses the Srv4FileName based on the package directory contents."""
-    return PkginfoToSrv4Name(self.GetParsedPkginfo())
-
-  def ToSrv4(self, target_dir):
-    target_file_name = self.GetSrv4FileName()
-    target_path = os.path.join(target_dir, target_file_name)
-    if os.path.exists(target_path):
-      return target_path
-    pkg_container_dir, pkg_dir = os.path.split(self.directory)
-    if not os.path.isdir(target_dir):
-      os.makedirs(target_dir)
-    args = ["pkgtrans", "-s", pkg_container_dir, target_path, pkg_dir]
-    self.ShellCommand(args, quiet=True)
-    args = ["gzip", "-f", target_path]
-    self.ShellCommand(args, quiet=True)
-    return target_path
-
-  def GetPkgmap(self, analyze_permissions=False, strip=None):
-    fd = open(os.path.join(self.directory, "pkgmap"), "r")
-    return Pkgmap(fd, analyze_permissions, strip)
-
-  def SetPkginfoEntry(self, key, value):
-    pkginfo = self.GetParsedPkginfo()
-    logging.debug("Setting %s to %s", repr(key), repr(value))
-    pkginfo[key] = value
-    self.WritePkginfo(pkginfo)
-    pkgmap_path = os.path.join(self.directory, "pkgmap")
-    pkgmap_fd = open(pkgmap_path, "r")
-    new_pkgmap_lines = []
-    pkginfo_re = re.compile("1 i pkginfo")
-    ws_re = re.compile(r"\s+")
-    for line in pkgmap_fd:
-      if pkginfo_re.search(line):
-        fields = ws_re.split(line)
-        # 3: size
-        # 4: sum
-        pkginfo_path = os.path.join(self.directory, "pkginfo")
-        args = ["cksum", pkginfo_path]
-        cksum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
-        stdout, stderr = cksum_process.communicate()
-        cksum_process.wait()
-        size = ws_re.split(stdout)[1]
-        args = ["sum", pkginfo_path]
-        sum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
-        stdout, stderr = sum_process.communicate()
-        sum_process.wait()
-        sum_value = ws_re.split(stdout)[0]
-        fields[3] = size
-        fields[4] = sum_value
-        logging.debug("New pkgmap line: %s", fields)
-        line = " ".join(fields)
-      new_pkgmap_lines.append(line.strip())
-    pkgmap_fd.close()
-    # Write that back
-    pkgmap_path_new = pkgmap_path + ".new"
-    logging.debug("Writing back to %s", pkgmap_path_new)
-    pkgmap_fd = open(pkgmap_path_new, "w")
-    pkgmap_fd.write("\n".join(new_pkgmap_lines))
-    pkgmap_fd.close()
-    shutil.move(pkgmap_path_new, pkgmap_path)
-
-    # TODO(maciej): Need to update the relevant line on pkgmap too
-
-  def GetPkginfoFilename(self):
-    return os.path.join(self.directory, "pkginfo")
-
-  def WritePkginfo(self, pkginfo_dict):
-    # Some packages extract read-only. To be sure, change them to be
-    # user-writable.
-    args = ["chmod", "-R", "u+w", self.directory]
-    self.ShellCommand(args)
-    pkginfo_filename = self.GetPkginfoFilename()
-    os.chmod(pkginfo_filename, 0644)
-    pkginfo_fd = open(pkginfo_filename, "w")
-    pkginfo_dict = self.GetParsedPkginfo()
-    for k, v in pkginfo_dict.items():
-      pkginfo_fd.write("%s=%s\n" % (k, pkginfo_dict[k]))
-    pkginfo_fd.close()
-
-  def ResetNameProperty(self):
-    """Sometimes, NAME= contains useless data. This method resets them."""
-    pkginfo_dict = self.GetParsedPkginfo()
-    catalog_name = PkgnameToCatName(pkginfo_dict["PKG"])
-    description = pkginfo_dict["DESC"]
-    pkginfo_name = "%s - %s" % (catalog_name, description)
-    self.SetPkginfoEntry("NAME", pkginfo_name)
-
-  def GetDependencies(self):
-    depends = []
-    depend_file_path = os.path.join(self.directory, "install", "depend")
-    if not os.path.exists(depend_file_path):
-      return depends
-    fd = open(os.path.join(self.directory, "install", "depend"), "r")
-    # It needs to be a list because there might be duplicates and it's
-    # necessary to carry that information.
-    for line in fd:
-      fields = re.split(c.WS_RE, line)
-      if fields[0] == "P":
-        pkgname = fields[1]
-        pkg_desc = " ".join(fields[1:])
-        depends.append((pkgname, pkg_desc))
-    fd.close()
-    return depends
-
-  def CheckPkgpathExists(self):
-    if not os.path.isdir(self.directory):
-      raise PackageError("%s does not exist or is not a directory"
-                         % self.directory)
-
-  def GetFilesMetadata(self):
-    """Returns a data structure with all the files plus their metadata.
-
-    [
-      {
-        "path": ...,
-        "mime_type": ...,
-      },
-    ]
-    """
-    if not self.files_metadata:
-      self.CheckPkgpathExists()
-      self.files_metadata = []
-      files_root = os.path.join(self.directory, "root")
-      if not os.path.exists(files_root):
-        return self.files_metadata
-      all_files = self.GetAllFilePaths()
-      def StripRe(x, strip_re):
-        return re.sub(strip_re, "", x)
-      root_re = re.compile(r"^root/")
-      file_magic = FileMagic()
-      for file_path in all_files:
-        full_path = unicode(self.MakeAbsolutePath(file_path))
-        file_info = {
-            "path": StripRe(file_path, root_re),
-            "mime_type": file_magic.GetFileMimeType(full_path)
-        }
-        if not file_info["mime_type"]:
-          logging.error("Could not establish the mime type of %s",
-                        full_path)
-          # We really don't want that, as it misses binaries.
-          raise PackageError("Could not establish the mime type of %s"
-                             % full_path)
-        if IsBinary(file_info):
-          parser = hp.createParser(full_path)
-          if not parser:
-            logging.warning("Can't parse file %s", file_path)
-          else:
-            file_info["mime_type_by_hachoir"] = parser.mime_type
-            machine_id = parser["/header/machine"].value
-            file_info["machine_id"] = machine_id
-            file_info["endian"] = parser["/header/endian"].display
-        self.files_metadata.append(file_info)
-    return self.files_metadata
-
-  def ListBinaries(self):
-    """Lists all the binaries from a given package.
-
-    Original checkpkg code:
-
-    #########################################
-    # find all executables and dynamic libs,and list their filenames.
-    listbinaries() {
-      if [ ! -d $1 ] ; then
-        print errmsg $1 not a directory
-        rm -rf $EXTRACTDIR
-        exit 1
-      fi
-      find $1 -print | xargs file |grep ELF |nawk -F: '{print $1}'
-    }
-
-    Returns a list of absolute paths.
-
-    Now that there are files_metadata, this function can safely go away, once
-    all its callers are modified to use files_metadata instead.
-    """
-    if self.binaries is None:
-      self.CheckPkgpathExists()
-      files_metadata = self.GetFilesMetadata()
-      self.binaries = []
-      # The nested for-loop looks inefficient.
-      for file_info in files_metadata:
-        if IsBinary(file_info):
-          self.binaries.append(file_info["path"])
-      self.binaries.sort()
-    return self.binaries
-
-  def GetAllFilePaths(self):
-    """Returns a list of all paths from the package."""
-    if not self.file_paths:
-      self.CheckPkgpathExists()
-      remove_prefix = "%s/" % self.pkgpath
-      self.file_paths = []
-      for root, dirs, files in os.walk(os.path.join(self.pkgpath, "root")):
-        full_paths = [os.path.join(root, f) for f in files]
-        self.file_paths.extend([f.replace(remove_prefix, "") for f in full_paths])
-    return self.file_paths
-
-  def _GetOverridesStream(self, file_path):
-    # This might potentially cause a file descriptor leak, but I'm not going to
-    # worry about that at this stage.
-    # NB, the whole catalog run doesn't seem to be suffering. (~2500 packages)
-    #
-    # There is a race condition here, but it's executing sequentially, I don't
-    # expect any concurrency problems.
-    if os.path.isfile(file_path):
-      logging.debug("Opening %s override file." % repr(file_path))
-      return open(file_path, "r")
-    else:
-      logging.debug("Override file %s not found." % repr(file_path))
-      return None
-
-  def _ParseOverridesStream(self, stream):
-    override_list = []
-    for line in stream:
-      if line.startswith("#"):
-        continue
-      override_list.append(overrides.ParseOverrideLine(line))
-    return override_list
-
-  def GetOverrides(self):
-    """Returns overrides, a list of overrides.Override instances."""
-    overrides = []
-    catalogname = self.GetCatalogname()
-    override_paths = (
-        [self.directory,
-         "root",
-         "opt/csw/share/checkpkg/overrides", catalogname],
-        [self.directory,
-         "install",
-         "checkpkg_override"],
-    )
-    for override_path in override_paths:
-      file_path = os.path.join(*override_path)
-      stream = self._GetOverridesStream(file_path)
-      if stream:
-        overrides.extend(self._ParseOverridesStream(stream))
-    return overrides
-
-  def GetFileContent(self, pkg_file_path):
-    if pkg_file_path.startswith("/"):
-      pkg_file_path = pkg_file_path[1:]
-    # TODO: Write a unit test for the right path
-    file_path = os.path.join(self.directory, "root", pkg_file_path)
-    try:
-      fd = open(file_path, "r")
-      content = fd.read()
-      fd.close()
-      return content
-    except IOError, e:
-      raise PackageError(e)
-
-  def GetFilesContaining(self, regex_list):
-    full_paths = self.GetAllFilePaths()
-    files_by_pattern = {}
-    for full_path in full_paths:
-      content = open(self.MakeAbsolutePath(full_path), "rb").read()
-      for regex in regex_list:
-        if re.search(regex, content):
-          if regex not in files_by_pattern:
-            files_by_pattern[regex] = []
-          files_by_pattern[regex].append(full_path)
-    return files_by_pattern
-
-  def MakeAbsolutePath(self, p):
-    return os.path.join(self.pkgpath, p)
-
-
 class Pkgmap(object):
   """Represents the pkgmap of the package.
 
@@ -1091,168 +589,6 @@
     return self.classes
 
 
-class PackageComparator(object):
-
-  def __init__(self, file_name_a, file_name_b,
-               permissions=False,
-               strip_a=None,
-               strip_b=None):
-    self.analyze_permissions = permissions
-    self.pkg_a = CswSrv4File(file_name_a)
-    self.pkg_b = CswSrv4File(file_name_b)
-    self.strip_a = strip_a
-    self.strip_b = strip_b
-
-  def Run(self):
-    pkgmap_a = self.pkg_a.GetPkgmap(self.analyze_permissions, strip=self.strip_a)
-    pkgmap_b = self.pkg_b.GetPkgmap(self.analyze_permissions, strip=self.strip_b)
-    diff_ab = difflib.unified_diff(sorted(pkgmap_a.paths),
-                                   sorted(pkgmap_b.paths),
-                                   fromfile=self.pkg_a.pkg_path,
-                                   tofile=self.pkg_b.pkg_path)
-    diff_text = "\n".join(diff_ab)
-    if diff_text:
-      less_proc = subprocess.Popen(["less"], stdin=subprocess.PIPE)
-      less_stdout, less_stderr = less_proc.communicate(input=diff_text)
-      less_proc.wait()
-    else:
-      print "No differences found."
-
-
-class OpencswCatalogBuilder(object):
-
-  def __init__(self, product_dir, catalog_dir):
-    self.product_dir = product_dir
-    self.catalog_dir = catalog_dir
-
-  def Run(self):
-    pkg_dirs = os.listdir(self.product_dir)
-    for pkg_dir in pkg_dirs:
-      pkg_path = os.path.join(self.product_dir, pkg_dir)
-      pkginfo_path = os.path.join(pkg_path, "pkginfo")
-      if (os.path.isdir(pkg_path)
-            and
-          os.path.exists(pkginfo_path)):
-        if not self.Srv4Exists(pkg_path):
-          pkg = None
-          tmpdir = None
-          try:
-            tmpdir = tempfile.mkdtemp(prefix="sunw-pkg-")
-            logging.debug("Copying %s to %s", repr(pkg_path), repr(tmpdir))
-            tmp_pkg_dir = os.path.join(tmpdir, pkg_dir)
-            shutil.copytree(pkg_path, tmp_pkg_dir, symlinks=True)
-            pkg = DirectoryFormatPackage(tmp_pkg_dir)
-            # Replacing NAME= in the pkginfo, setting it to the catalog name.
-            pkg.ResetNameProperty()
-            pkg.ToSrv4(self.catalog_dir)
-          except IOError, e:
-            logging.warn("%s has failed: %s", pkg_path, e)
-          finally:
-            if pkg:
-              del(pkg)
-            if os.path.exists(tmpdir):
-              shutil.rmtree(tmpdir)
-        else:
-          logging.warn("srv4 file for %s already exists, skipping", pkg_path)
-      else:
-        logging.warn("%s is not a directory.", pkg_path)
-
-
-  def Srv4Exists(self, pkg_dir):
-    pkg = DirectoryFormatPackage(pkg_dir)
-    srv4_name = pkg.GetSrv4FileName()
-    srv4_name += ".gz"
-    srv4_path = os.path.join(self.catalog_dir, srv4_name)
-    result = os.path.exists(srv4_path)
-    logging.debug("Srv4Exists(%s) => %s, %s", pkg_dir, repr(srv4_path), result)
-    return result
-
-
-class OpencswCatalog(object):
-  """Represents a catalog file."""
-
-  def __init__(self, file_name):
-    self.file_name = file_name
-    self.by_basename = None
-    self.catalog_data = None
-
-  def _ParseCatalogLine(self, line):
-    cline_re_str_list = [
-        (
-            r"^"
-            # tmux
-            r"(?P<catalogname>\S+)"
-            r"\s+"
-            # 1.2,REV=2010.05.17
-            r"(?P<version>\S+)"
-            r"\s+"
-            # CSWtmux
-            r"(?P<pkgname>\S+)"
-            r"\s+"
-            # tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz
-            r"(?P<file_basename>\S+)"
-            r"\s+"
-            # 145351cf6186fdcadcd169b66387f72f
-            r"(?P<md5sum>\S+)"
-            r"\s+"
-            # 214091
-            r"(?P<size>\S+)"
-            r"\s+"
-            # CSWcommon|CSWlibevent
-            r"(?P<deps>\S+)"
-            r"\s+"
-            # none
-            r"(?P<none_thing_1>\S+)"
-            # An optional empty field.
-            r"("
-              r"\s+"
-              # none\n'
-              r"(?P<none_thing_2>\S+)"
-            r")?"
-            r"$"
-        ),
-    ]
-    cline_re_list = [re.compile(x) for x in cline_re_str_list]
-    matched = False
-    d = None
-    for cline_re in cline_re_list:
-      m = cline_re.match(line)
-      if m:
-        d = m.groupdict()
-        matched = True
-        if not d:
-          raise CatalogLineParseError("Parsed %s data is empty" % repr(line))
-    if not matched:
-      raise CatalogLineParseError("No regexes matched %s" % repr(line))
-    return d
-
-  def _GetCatalogData(self, fd):
-    catalog_data = []
-    for line in fd:
-      try:
-        parsed = self._ParseCatalogLine(line)
-        catalog_data.append(parsed)
-      except CatalogLineParseError, e:
-        logging.debug("Could not parse %s, %s", repr(line), e)
-    return catalog_data
-
-  def GetCatalogData(self):
-    if not self.catalog_data:
-      fd = open(self.file_name, "r")
-      self.catalog_data = self._GetCatalogData(fd)
-    return self.catalog_data
-
-  def GetDataByBasename(self):
-    if not self.by_basename:
-      self.by_basename = {}
-      cd = self.GetCatalogData()
-      for d in cd:
-        if "file_basename" not in d:
-          logging.error("%s is missing the file_basename field", d)
-        self.by_basename[d["file_basename"]] = d
-    return self.by_basename
-
-
 def IsBinary(file_info):
   """Returns True or False depending on file metadata."""
   is_a_binary = False
@@ -1268,39 +604,3 @@
       is_a_binary = True
       break
   return is_a_binary
-
-
-class FileMagic(object):
-  """Libmagic sometimes returns None, which I think is a bug.
-  Trying to come up with a way to work around that.
-  """
-
-  def __init__(self):
-    self.cookie_count = 0
-    self.magic_cookie = None
-
-  def _GetCookie(self):
-    magic_cookie = magic.open(self.cookie_count)
-    self.cookie_count += 1
-    magic_cookie.load()
-    magic_cookie.setflags(magic.MAGIC_MIME)
-    return magic_cookie
-
-  def _LazyInit(self):
-    if not self.magic_cookie:
-      self.magic_cookie = self._GetCookie()
-
-  def GetFileMimeType(self, full_path):
-    """Trying to run magic.file() a few times, not accepting None."""
-    self._LazyInit()
-    mime = None
-    for i in xrange(10):
-      mime = self.magic_cookie.file(full_path)
-      if mime:
-        break;
-      else:
-        # Returned mime is null. Re-initializing the cookie and trying again.
-        logging.error("magic_cookie.file(%s) returned None. Retrying.",
-                      full_path)
-        self.magic_cookie = self._GetCookie()
-    return mime

Modified: csw/mgar/gar/v2/lib/python/opencsw_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw_test.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/opencsw_test.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -352,19 +352,7 @@
                                                repr(expected_name),
                                                repr(result)))
 
-  def testLongestCommonSubstring_1(self):
-    self.assertEqual(set(["foo"]), opencsw.LongestCommonSubstring("foo", "foo"))
 
-  def testLongestCommonSubstring_2(self):
-    self.assertEqual(set([]), opencsw.LongestCommonSubstring("foo", "bar"))
-
-  def testLongestCommonSubstring_3(self):
-    self.assertEqual(set(["bar"]), opencsw.LongestCommonSubstring("barfoobar", "bar"))
-
-  def testLongestCommonSubstring_4(self):
-    self.assertEqual(set(['bcd', 'hij']), opencsw.LongestCommonSubstring("abcdefghijk", "bcdhij"))
-
-
 class PkgmapUnitTest(unittest.TestCase):
 
   def test_1(self):
@@ -447,28 +435,6 @@
                           searchList=[submitpkg_data])
     self.assertTrue(re.search(r"new package", unicode(t)), unicode(t))
 
-class OpencswCatalogUnitTest(unittest.TestCase):
 
-  def test_ParseCatalogLine_1(self):
-    line = (
-        'tmux 1.2,REV=2010.05.17 CSWtmux '
-        'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz '
-        '145351cf6186fdcadcd169b66387f72f 214091 '
-        'CSWcommon|CSWlibevent none none\n')
-    oc = opencsw.OpencswCatalog(None)
-    parsed = oc._ParseCatalogLine(line)
-    expected = {'catalogname': 'tmux',
-                'deps': 'CSWcommon|CSWlibevent',
-                'file_basename': 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz',
-                'md5sum': '145351cf6186fdcadcd169b66387f72f',
-                'none_thing_1': 'none',
-                'none_thing_2': 'none',
-                'pkgname': 'CSWtmux',
-                'size': '214091',
-                'version': '1.2,REV=2010.05.17'}
-    self.assertEquals(expected, parsed)
-
-
-
 if __name__ == '__main__':
   unittest.main()

Added: csw/mgar/gar/v2/lib/python/package.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/package.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,550 @@
+#!/usr/bin/env python2.6
+
+import magic
+import subprocess
+import hachoir_parser as hp
+
+# Suppress unhelpful warnings
+# http://bitbucket.org/haypo/hachoir/issue/23
+import hachoir_core.config
+hachoir_core.config.quiet = True
+
+
+class ShellMixin(object):
+
+  def ShellCommand(self, args, quiet=False):
+    logging.debug("Calling: %s", repr(args))
+    if quiet:
+      process = subprocess.Popen(args,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
+      stdout, stderr = process.communicate()
+      retcode = process.wait()
+    else:
+      retcode = subprocess.call(args)
+    if retcode:
+      raise Error("Running %s has failed." % repr(args))
+    return retcode
+
+
+class CswSrv4File(ShellMixin, object):
+  """Represents a package in the srv4 format (pkg)."""
+
+  def __init__(self, pkg_path, debug=False):
+    self.pkg_path = pkg_path
+    self.workdir = None
+    self.gunzipped_path = None
+    self.transformed = False
+    self.dir_format_pkg = None
+    self.debug = debug
+    self.pkgname = None
+    self.md5sum = None
+    self.mtime = None
+
+  def __repr__(self):
+    return u"CswSrv4File(%s)" % repr(self.pkg_path)
+
+  def GetWorkDir(self):
+    if not self.workdir:
+      self.workdir = tempfile.mkdtemp(prefix="pkg_")
+      fd = open(os.path.join(self.workdir, "admin"), "w")
+      fd.write(ADMIN_FILE_CONTENT)
+      fd.close()
+    return self.workdir
+
+  def GetAdminFilePath(self):
+    return os.path.join(self.GetWorkDir(), "admin")
+
+  def GetGunzippedPath(self):
+    if not self.gunzipped_path:
+      gzip_suffix = ".gz"
+      pkg_suffix = ".pkg"
+      if self.pkg_path.endswith("%s%s" % (pkg_suffix, gzip_suffix)):
+        # Causing the class to stat the .gz file.  This call throws away the
+        # result, but the result will be cached as a class instance member.
+        self.GetMtime()
+        base_name_gz = os.path.split(self.pkg_path)[1]
+        shutil.copy(self.pkg_path, self.GetWorkDir())
+        self.pkg_path = os.path.join(self.GetWorkDir(), base_name_gz)
+        args = ["gunzip", "-f", self.pkg_path]
+        unused_retcode = self.ShellCommand(args)
+        self.gunzipped_path = self.pkg_path[:(-len(gzip_suffix))]
+      elif self.pkg_path.endswith(pkg_suffix):
+        self.gunzipped_path = self.pkg_path
+      else:
+        raise Error("The file name should end in either "
+                    "%s or %s, but it's %s."
+                    % (gzip_suffix, pkg_suffix, repr(self.pkg_path)))
+    return self.gunzipped_path
+
+  def Pkgtrans(self, src_file, destdir, pkgname):
+    """A proxy for the pkgtrans command.
+
+    This requires custom-pkgtrans to be available.
+    """
+    if not os.path.isdir(destdir):
+      raise PackageError("%s doesn't exist or is not a directory" % destdir)
+    args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
+            src_file,
+            destdir,
+            pkgname ]
+    pkgtrans_proc = subprocess.Popen(args,
+                                     stdout=subprocess.PIPE,
+                                     stderr=subprocess.PIPE)
+    stdout, stderr = pkgtrans_proc.communicate()
+    ret = pkgtrans_proc.wait()
+    if ret:
+      logging.error(stdout)
+      logging.error(stderr)
+      logging.error("% has failed" % args)
+
+  def GetPkgname(self):
+    """It's necessary to figure out the pkgname from the .pkg file.
+    # nawk 'NR == 2 {print $1; exit;} $f
+    """
+    if not self.pkgname:
+      gunzipped_path = self.GetGunzippedPath()
+      args = ["nawk", "NR == 2 {print $1; exit;}", gunzipped_path]
+      nawk_proc = subprocess.Popen(args, stdout=subprocess.PIPE)
+      stdout, stderr = nawk_proc.communicate()
+      ret_code = nawk_proc.wait()
+      self.pkgname = stdout.strip()
+      logging.debug("GetPkgname(): %s", repr(self.pkgname))
+    return self.pkgname
+
+  def GetMtime(self):
+    if not self.mtime:
+      # This fails if the file is not there.
+      s = os.stat(self.pkg_path)
+      t = time.gmtime(s.st_mtime)
+      self.mtime = datetime.datetime(*t[:6])
+    return self.mtime
+
+  def TransformToDir(self):
+    """Transforms the file to the directory format.
+
+    This uses the Pkgtrans function at the top, because pkgtrans behaves
+    differently on Solaris 8 and 10.  Having our own implementation helps
+    achieve consistent behavior.
+    """
+    if not self.transformed:
+      gunzipped_path = self.GetGunzippedPath()
+      pkgname = self.GetPkgname()
+      args = [os.path.join(os.path.dirname(__file__),
+                           "..", "..", "bin", "custom-pkgtrans"),
+              gunzipped_path, self.GetWorkDir(), pkgname]
+      logging.debug("transforming: %s", args)
+      unused_retcode = self.ShellCommand(args, quiet=(not self.debug))
+      dirs = self.GetDirs()
+      if len(dirs) != 1:
+        raise Error("Need exactly one package in the package stream: "
+                    "%s." % (dirs))
+      self.dir_format_pkg = DirectoryFormatPackage(dirs[0])
+      self.transformed = True
+
+  def GetDirFormatPkg(self):
+    self.TransformToDir()
+    return self.dir_format_pkg
+
+  def GetDirs(self):
+    paths = os.listdir(self.GetWorkDir())
+    dirs = []
+    for p in paths:
+      abspath = os.path.join(self.GetWorkDir(), p)
+      if os.path.isdir(abspath):
+        dirs.append(abspath)
+    return dirs
+
+  def GetPkgmap(self, analyze_permissions, strip=None):
+    dir_format_pkg = self.GetDirFormatPkg()
+    return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
+
+  def GetMd5sum(self):
+    if not self.md5sum:
+      logging.debug("GetMd5sum() reading file %s", repr(self.pkg_path))
+      fp = open(self.pkg_path)
+      hash = hashlib.md5()
+      hash.update(fp.read())
+      fp.close()
+      self.md5sum = hash.hexdigest()
+    return self.md5sum
+
+  def GetPkgchkOutput(self):
+    """Returns: (exit code, stdout, stderr)."""
+    args = ["pkgchk", "-d", self.GetGunzippedPath(), "all"]
+    pkgchk_proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = pkgchk_proc.communicate()
+    ret = pkgchk_proc.wait()
+    return ret, stdout, stderr
+
+  def GetFileMtime(self):
+    if not self.mtime:
+      self.mtime = os.stat(self.pkg_path).st_mtime
+    return self.mtime
+
+  def __del__(self):
+    if self.workdir:
+      logging.debug("Removing %s", repr(self.workdir))
+      shutil.rmtree(self.workdir)
+
+
+class DirectoryFormatPackage(ShellMixin, object):
+  """Represents a package in the directory format.
+
+  Allows some read-write operations.
+  """
+  def __init__(self, directory):
+    self.directory = directory
+    self.pkgname = os.path.basename(directory)
+    self.pkgpath = self.directory
+    self.pkginfo_dict = None
+    self.binaries = None
+    self.file_paths = None
+    self.files_metadata = None
+
+  def GetCatalogname(self):
+    """Returns the catalog name of the package.
+
+    A bit hacky.  Looks for the first word of the NAME field in the package.
+    """
+    pkginfo = self.GetParsedPkginfo()
+    words = re.split(c.WS_RE, pkginfo["NAME"])
+    return words[0]
+
+  def GetParsedPkginfo(self):
+    if not self.pkginfo_dict:
+      pkginfo_fd = open(self.GetPkginfoFilename(), "r")
+      self.pkginfo_dict = ParsePkginfo(pkginfo_fd)
+      pkginfo_fd.close()
+    return self.pkginfo_dict
+
+  def GetSrv4FileName(self):
+    """Guesses the Srv4FileName based on the package directory contents."""
+    return PkginfoToSrv4Name(self.GetParsedPkginfo())
+
+  def ToSrv4(self, target_dir):
+    target_file_name = self.GetSrv4FileName()
+    target_path = os.path.join(target_dir, target_file_name)
+    if os.path.exists(target_path):
+      return target_path
+    pkg_container_dir, pkg_dir = os.path.split(self.directory)
+    if not os.path.isdir(target_dir):
+      os.makedirs(target_dir)
+    args = ["pkgtrans", "-s", pkg_container_dir, target_path, pkg_dir]
+    self.ShellCommand(args, quiet=True)
+    args = ["gzip", "-f", target_path]
+    self.ShellCommand(args, quiet=True)
+    return target_path
+
+  def GetPkgmap(self, analyze_permissions=False, strip=None):
+    fd = open(os.path.join(self.directory, "pkgmap"), "r")
+    return Pkgmap(fd, analyze_permissions, strip)
+
+  def SetPkginfoEntry(self, key, value):
+    pkginfo = self.GetParsedPkginfo()
+    logging.debug("Setting %s to %s", repr(key), repr(value))
+    pkginfo[key] = value
+    self.WritePkginfo(pkginfo)
+    pkgmap_path = os.path.join(self.directory, "pkgmap")
+    pkgmap_fd = open(pkgmap_path, "r")
+    new_pkgmap_lines = []
+    pkginfo_re = re.compile("1 i pkginfo")
+    ws_re = re.compile(r"\s+")
+    for line in pkgmap_fd:
+      if pkginfo_re.search(line):
+        fields = ws_re.split(line)
+        # 3: size
+        # 4: sum
+        pkginfo_path = os.path.join(self.directory, "pkginfo")
+        args = ["cksum", pkginfo_path]
+        cksum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+        stdout, stderr = cksum_process.communicate()
+        cksum_process.wait()
+        size = ws_re.split(stdout)[1]
+        args = ["sum", pkginfo_path]
+        sum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+        stdout, stderr = sum_process.communicate()
+        sum_process.wait()
+        sum_value = ws_re.split(stdout)[0]
+        fields[3] = size
+        fields[4] = sum_value
+        logging.debug("New pkgmap line: %s", fields)
+        line = " ".join(fields)
+      new_pkgmap_lines.append(line.strip())
+    pkgmap_fd.close()
+    # Write that back
+    pkgmap_path_new = pkgmap_path + ".new"
+    logging.debug("Writing back to %s", pkgmap_path_new)
+    pkgmap_fd = open(pkgmap_path_new, "w")
+    pkgmap_fd.write("\n".join(new_pkgmap_lines))
+    pkgmap_fd.close()
+    shutil.move(pkgmap_path_new, pkgmap_path)
+
+    # TODO(maciej): Need to update the relevant line on pkgmap too
+
+  def GetPkginfoFilename(self):
+    return os.path.join(self.directory, "pkginfo")
+
+  def WritePkginfo(self, pkginfo_dict):
+    # Some packages extract read-only. To be sure, change them to be
+    # user-writable.
+    args = ["chmod", "-R", "u+w", self.directory]
+    self.ShellCommand(args)
+    pkginfo_filename = self.GetPkginfoFilename()
+    os.chmod(pkginfo_filename, 0644)
+    pkginfo_fd = open(pkginfo_filename, "w")
+    pkginfo_dict = self.GetParsedPkginfo()
+    for k, v in pkginfo_dict.items():
+      pkginfo_fd.write("%s=%s\n" % (k, pkginfo_dict[k]))
+    pkginfo_fd.close()
+
+  def ResetNameProperty(self):
+    """Sometimes, NAME= contains useless data. This method resets them."""
+    pkginfo_dict = self.GetParsedPkginfo()
+    catalog_name = PkgnameToCatName(pkginfo_dict["PKG"])
+    description = pkginfo_dict["DESC"]
+    pkginfo_name = "%s - %s" % (catalog_name, description)
+    self.SetPkginfoEntry("NAME", pkginfo_name)
+
+  def GetDependencies(self):
+    depends = []
+    depend_file_path = os.path.join(self.directory, "install", "depend")
+    if not os.path.exists(depend_file_path):
+      return depends
+    fd = open(os.path.join(self.directory, "install", "depend"), "r")
+    # It needs to be a list because there might be duplicates and it's
+    # necessary to carry that information.
+    for line in fd:
+      fields = re.split(c.WS_RE, line)
+      if fields[0] == "P":
+        pkgname = fields[1]
+        pkg_desc = " ".join(fields[1:])
+        depends.append((pkgname, pkg_desc))
+    fd.close()
+    return depends
+
+  def CheckPkgpathExists(self):
+    if not os.path.isdir(self.directory):
+      raise PackageError("%s does not exist or is not a directory"
+                         % self.directory)
+
+  def GetFilesMetadata(self):
+    """Returns a data structure with all the files plus their metadata.
+
+    [
+      {
+        "path": ...,
+        "mime_type": ...,
+      },
+    ]
+    """
+    if not self.files_metadata:
+      self.CheckPkgpathExists()
+      self.files_metadata = []
+      files_root = os.path.join(self.directory, "root")
+      if not os.path.exists(files_root):
+        return self.files_metadata
+      all_files = self.GetAllFilePaths()
+      def StripRe(x, strip_re):
+        return re.sub(strip_re, "", x)
+      root_re = re.compile(r"^root/")
+      file_magic = FileMagic()
+      for file_path in all_files:
+        full_path = unicode(self.MakeAbsolutePath(file_path))
+        file_info = {
+            "path": StripRe(file_path, root_re),
+            "mime_type": file_magic.GetFileMimeType(full_path)
+        }
+        if not file_info["mime_type"]:
+          logging.error("Could not establish the mime type of %s",
+                        full_path)
+          # We really don't want that, as it misses binaries.
+          raise PackageError("Could not establish the mime type of %s"
+                             % full_path)
+        if IsBinary(file_info):
+          parser = hp.createParser(full_path)
+          if not parser:
+            logging.warning("Can't parse file %s", file_path)
+          else:
+            file_info["mime_type_by_hachoir"] = parser.mime_type
+            machine_id = parser["/header/machine"].value
+            file_info["machine_id"] = machine_id
+            file_info["endian"] = parser["/header/endian"].display
+        self.files_metadata.append(file_info)
+    return self.files_metadata
+
+  def ListBinaries(self):
+    """Lists all the binaries from a given package.
+
+    Original checkpkg code:
+
+    #########################################
+    # find all executables and dynamic libs,and list their filenames.
+    listbinaries() {
+      if [ ! -d $1 ] ; then
+        print errmsg $1 not a directory
+        rm -rf $EXTRACTDIR
+        exit 1
+      fi
+      find $1 -print | xargs file |grep ELF |nawk -F: '{print $1}'
+    }
+
+    Returns a list of absolute paths.
+
+    Now that there are files_metadata, this function can safely go away, once
+    all its callers are modified to use files_metadata instead.
+    """
+    if self.binaries is None:
+      self.CheckPkgpathExists()
+      files_metadata = self.GetFilesMetadata()
+      self.binaries = []
+      # The nested for-loop looks inefficient.
+      for file_info in files_metadata:
+        if IsBinary(file_info):
+          self.binaries.append(file_info["path"])
+      self.binaries.sort()
+    return self.binaries
+
+  def GetAllFilePaths(self):
+    """Returns a list of all paths from the package."""
+    if not self.file_paths:
+      self.CheckPkgpathExists()
+      remove_prefix = "%s/" % self.pkgpath
+      self.file_paths = []
+      for root, dirs, files in os.walk(os.path.join(self.pkgpath, "root")):
+        full_paths = [os.path.join(root, f) for f in files]
+        self.file_paths.extend([f.replace(remove_prefix, "") for f in full_paths])
+    return self.file_paths
+
+  def _GetOverridesStream(self, file_path):
+    # This might potentially cause a file descriptor leak, but I'm not going to
+    # worry about that at this stage.
+    # NB, the whole catalog run doesn't seem to be suffering. (~2500 packages)
+    #
+    # There is a race condition here, but it's executing sequentially, I don't
+    # expect any concurrency problems.
+    if os.path.isfile(file_path):
+      logging.debug("Opening %s override file." % repr(file_path))
+      return open(file_path, "r")
+    else:
+      logging.debug("Override file %s not found." % repr(file_path))
+      return None
+
+  def _ParseOverridesStream(self, stream):
+    override_list = []
+    for line in stream:
+      if line.startswith("#"):
+        continue
+      override_list.append(overrides.ParseOverrideLine(line))
+    return override_list
+
+  def GetOverrides(self):
+    """Returns overrides, a list of overrides.Override instances."""
+    overrides = []
+    catalogname = self.GetCatalogname()
+    override_paths = (
+        [self.directory,
+         "root",
+         "opt/csw/share/checkpkg/overrides", catalogname],
+        [self.directory,
+         "install",
+         "checkpkg_override"],
+    )
+    for override_path in override_paths:
+      file_path = os.path.join(*override_path)
+      stream = self._GetOverridesStream(file_path)
+      if stream:
+        overrides.extend(self._ParseOverridesStream(stream))
+    return overrides
+
+  def GetFileContent(self, pkg_file_path):
+    if pkg_file_path.startswith("/"):
+      pkg_file_path = pkg_file_path[1:]
+    # TODO: Write a unit test for the right path
+    file_path = os.path.join(self.directory, "root", pkg_file_path)
+    try:
+      fd = open(file_path, "r")
+      content = fd.read()
+      fd.close()
+      return content
+    except IOError, e:
+      raise PackageError(e)
+
+  def GetFilesContaining(self, regex_list):
+    full_paths = self.GetAllFilePaths()
+    files_by_pattern = {}
+    for full_path in full_paths:
+      content = open(self.MakeAbsolutePath(full_path), "rb").read()
+      for regex in regex_list:
+        if re.search(regex, content):
+          if regex not in files_by_pattern:
+            files_by_pattern[regex] = []
+          files_by_pattern[regex].append(full_path)
+    return files_by_pattern
+
+  def MakeAbsolutePath(self, p):
+    return os.path.join(self.pkgpath, p)
+
+
+class FileMagic(object):
+  """Libmagic sometimes returns None, which I think is a bug.
+  Trying to come up with a way to work around that.
+  """
+
+  def __init__(self):
+    self.cookie_count = 0
+    self.magic_cookie = None
+
+  def _GetCookie(self):
+    magic_cookie = magic.open(self.cookie_count)
+    self.cookie_count += 1
+    magic_cookie.load()
+    magic_cookie.setflags(magic.MAGIC_MIME)
+    return magic_cookie
+
+  def _LazyInit(self):
+    if not self.magic_cookie:
+      self.magic_cookie = self._GetCookie()
+
+  def GetFileMimeType(self, full_path):
+    """Trying to run magic.file() a few times, not accepting None."""
+    self._LazyInit()
+    mime = None
+    for i in xrange(10):
+      mime = self.magic_cookie.file(full_path)
+      if mime:
+        break;
+      else:
+        # Returned mime is null. Re-initializing the cookie and trying again.
+        logging.error("magic_cookie.file(%s) returned None. Retrying.",
+                      full_path)
+        self.magic_cookie = self._GetCookie()
+    return mime
+
+
+class PackageComparator(object):
+
+  def __init__(self, file_name_a, file_name_b,
+               permissions=False,
+               strip_a=None,
+               strip_b=None):
+    self.analyze_permissions = permissions
+    self.pkg_a = CswSrv4File(file_name_a)
+    self.pkg_b = CswSrv4File(file_name_b)
+    self.strip_a = strip_a
+    self.strip_b = strip_b
+
+  def Run(self):
+    pkgmap_a = self.pkg_a.GetPkgmap(self.analyze_permissions, strip=self.strip_a)
+    pkgmap_b = self.pkg_b.GetPkgmap(self.analyze_permissions, strip=self.strip_b)
+    diff_ab = difflib.unified_diff(sorted(pkgmap_a.paths),
+                                   sorted(pkgmap_b.paths),
+                                   fromfile=self.pkg_a.pkg_path,
+                                   tofile=self.pkg_b.pkg_path)
+    diff_text = "\n".join(diff_ab)
+    if diff_text:
+      less_proc = subprocess.Popen(["less"], stdin=subprocess.PIPE)
+      less_stdout, less_stderr = less_proc.communicate(input=diff_text)
+      less_proc.wait()
+    else:
+      print "No differences found."

Modified: csw/mgar/gar/v2/lib/python/package_checks.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_checks.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/package_checks.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -1000,8 +1000,11 @@
         if pkgname not in policy_pkgname_list:
           error_mgr.ReportError(
               "shared-lib-pkgname-mismatch",
-              "file=%s pkgname=%s expected=%s"
-              % (binary_info["path"], pkgname, policy_pkgname_list))
+              "file=%s "
+              "soname=%s "
+              "pkgname=%s "
+              "expected=%s"
+              % (binary_info["path"], soname, pkgname, policy_pkgname_list))
           messenger.OneTimeMessage(
               soname,
               "This shared library (%s) is in a directory indicating that it "

Modified: csw/mgar/gar/v2/lib/python/package_checks_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_checks_test.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/package_checks_test.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -1320,19 +1320,27 @@
     self.pkg_data = neon_stats[0]
     self.error_mgr_mock.ReportError(
         'shared-lib-pkgname-mismatch',
-        "file=opt/csw/lib/libneon.so.26.0.4 pkgname=CSWneon "
+        "file=opt/csw/lib/libneon.so.26.0.4 "
+        "soname=libneon.so.26 "
+        "pkgname=CSWneon "
         "expected=['CSWlibneon26', 'CSWlibneon-26']")
     self.error_mgr_mock.ReportError(
         'shared-lib-pkgname-mismatch',
-        "file=opt/csw/lib/libneon.so.27.2.0 pkgname=CSWneon "
+        "file=opt/csw/lib/libneon.so.27.2.0 "
+        "soname=libneon.so.27 "
+        "pkgname=CSWneon "
         "expected=['CSWlibneon27', 'CSWlibneon-27']")
     self.error_mgr_mock.ReportError(
         'shared-lib-pkgname-mismatch',
-        "file=opt/csw/lib/sparcv9/libneon.so.26.0.4 pkgname=CSWneon "
+        "file=opt/csw/lib/sparcv9/libneon.so.26.0.4 "
+        "soname=libneon.so.26 "
+        "pkgname=CSWneon "
         "expected=['CSWlibneon26', 'CSWlibneon-26']")
     self.error_mgr_mock.ReportError(
         'shared-lib-pkgname-mismatch',
-        "file=opt/csw/lib/sparcv9/libneon.so.27.2.0 pkgname=CSWneon "
+        "file=opt/csw/lib/sparcv9/libneon.so.27.2.0 "
+        "soname=libneon.so.27 "
+        "pkgname=CSWneon "
         "expected=['CSWlibneon27', 'CSWlibneon-27']")
 
 

Added: csw/mgar/gar/v2/lib/python/package_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_test.py	                        (rev 0)
+++ csw/mgar/gar/v2/lib/python/package_test.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2.6
+
+import unittest
+
+if __name__ == '__main__':
+  unittest.main()
+

Modified: csw/mgar/gar/v2/lib/python/sharedlib_utils.py
===================================================================
--- csw/mgar/gar/v2/lib/python/sharedlib_utils.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/sharedlib_utils.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -12,6 +12,7 @@
                    'i486', 'i386', 'i86')
 AMD64_PATHS = ('amd64',)
 LEGIT_CHAR_RE = re.compile(r"[a-zA-Z0-9\+]+")
+SONAME_VERSION_RE = re.compile("^(?P<name>.*)\.so\.(?P<version>[\d\.]+)$")
 
 class SonameParsingException(Exception):
   pass
@@ -51,7 +52,7 @@
   """
   soname_re = re.compile(r"(?P<basename>[\w\+]+([\.\-]+[\w\+]+)*)"
                          r"\.so"
-                         r"(\.(?P<version>\d+)(\..*)?)?"
+                         r"(\.(?P<version>[\d\.]+))?"
                          r"$")
   m = soname_re.match(soname)
   if not m:
@@ -97,3 +98,74 @@
       if "sharedlib" in metadata["mime_type"]:
         shared_libs.append(metadata["path"])
   return shared_libs
+
+
+def GetCommonVersion(sonames):
+  versions = []
+  for soname in sonames:
+    m = SONAME_VERSION_RE.search(soname)
+    if m:
+      versions.append(m.groupdict()["version"])
+  versions_set = set(versions)
+  if len(versions_set) > 1 or not versions_set:
+    return None
+  else:
+    return versions_set.pop()
+
+
+def MakePackageNameBySonameCollection(sonames):
+  """Finds a name for a collection of sonames.
+
+  Try to find the largest common prefix in the sonames, and establish
+  whether there is a common version to them.
+  """
+  common_version = GetCommonVersion(sonames)
+  if not common_version:
+    # If the sonames don't have a common version, they shouldn't be together
+    # in one package.
+    return None
+  common_substring_candidates = []
+  for soname in sonames:
+    candidate = soname
+    # We always want such package to start with the prefix "lib".  Therefore,
+    # we're stripping the prefix "lib" if it exists, and we're adding it back
+    # to the pkgname and soname at the end of the function.
+    if candidate.startswith("lib"):
+      candidate = candidate[3:]
+    m = SONAME_VERSION_RE.search(candidate)
+    common_substring_candidates.append(candidate)
+  lcs = CollectionLongestCommonSubstring(common_substring_candidates)
+  pkgname = "CSWlib%s.%s" % (SanitizeWithChar(lcs, "-"), common_version)
+  catalogname = "lib%s.%s" % (SanitizeWithChar(lcs, "_"), common_version)
+  return pkgname, catalogname
+
+
+def LongestCommonSubstring(S, T):
+  """Stolen from Wikibooks
+
+  http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring#Python"""
+  m = len(S); n = len(T)
+  L = [[0] * (n+1) for i in xrange(m+1)]
+  LCS = set()
+  longest = 0
+  for i in xrange(m):
+    for j in xrange(n):
+      if S[i] == T[j]:
+        v = L[i][j] + 1
+        L[i+1][j+1] = v
+        if v > longest:
+          longest = v
+          LCS = set()
+        if v == longest:
+          LCS.add(S[i-v+1:i+1])
+  return LCS
+
+
+def CollectionLongestCommonSubstring(collection):
+  current_substring = collection.pop()
+  while collection and current_substring:
+    substring_set = LongestCommonSubstring(current_substring,
+                                           collection.pop())
+    if substring_set:
+      current_substring = list(substring_set)[0]
+  return current_substring

Modified: csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -1,4 +1,4 @@
-#!/opt/csw/bin/python2.6
+#!/usr/bin/env python2.6
 # $Id$
 
 import re
@@ -60,6 +60,14 @@
     )
     self.assertEqual(expected, su.MakePackageNameBySoname(soname))
 
+  def testMakePackageNameBySonameMinorVersion(self):
+    soname = "libfoo.so.0.1"
+    expected = (
+        ["CSWlibfoo0-1", "CSWlibfoo-0-1"],
+        ["libfoo0_1", "libfoo_0_1"],
+    )
+    self.assertEqual(expected, su.MakePackageNameBySoname(soname))
+
   def testMakePackageNameBySonameApr(self):
     soname = "libapr-1.so.0"
     expected = (
@@ -88,7 +96,7 @@
                      su.MakePackageNameBySoname(soname))
 
   def testMakePackageNameBySonameComplexApr(self):
-    soname = "libapr-1.so.10.0.0"
+    soname = "libapr-1.so.10"
     expected = (
        ['CSWlibapr-110', 'CSWlibapr-1-10'],
        ['libapr_110', 'libapr_1_10']
@@ -127,5 +135,58 @@
     self.assertEqual("foo_0", su.SanitizeWithChar("foo-0", "_"))
 
 
+class GetCommonVersionUnitTest(unittest.TestCase):
+
+  def testGetCommonVersionSimple(self):
+    sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+    self.assertEqual("0", su.GetCommonVersion(sonames))
+
+  def testGetCommonVersionMore(self):
+    sonames = ["libfoo.so.0.2.1", "libfoo_util.so.0.2.1"]
+    self.assertEqual("0.2.1", su.GetCommonVersion(sonames))
+
+  def testGetCommonVersionInvalid(self):
+    sonames = ["libfoo.so.0.2.1", "libfoo_util.so.0.2.3"]
+    self.assertEqual(None, su.GetCommonVersion(sonames))
+
+
+class MakePackageNameBySonameCollectionUnitTest(unittest.TestCase):
+
+  def testMakePackageNameBySonameCollectionTwo(self):
+    sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+    expected = (
+        ["CSWlibfoo0", "CSWlibfoo-0"],
+        ["libfoo0", "libfoo_0"],
+    )
+    self.assertEqual(expected, su.MakePackageNameBySonameCollection(sonames))
+
+  def testMakePackageNameBySonameCollectionBdb(self):
+    sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+    expected = (
+        ["CSWlibfoo0", "CSWlibfoo-0"],
+        ["libfoo0", "libfoo_0"],
+    )
+    self.assertEqual(expected, su.MakePackageNameBySonameCollection(sonames))
+
+  def testMakePackageNameBySonameCollectionNoCommonVersion(self):
+    sonames = ["libfoo.so.0", "libfoo_util.so.1"]
+    self.assertEqual(None, su.MakePackageNameBySonameCollection(sonames))
+
+
+class CommomSubstringTest(unittest.TestCase):
+
+  def testLongestCommonSubstring_1(self):
+    self.assertEqual(set(["foo"]), su.LongestCommonSubstring("foo", "foo"))
+
+  def testLongestCommonSubstring_2(self):
+    self.assertEqual(set([]), su.LongestCommonSubstring("foo", "bar"))
+
+  def testLongestCommonSubstring_3(self):
+    self.assertEqual(set(["bar"]), su.LongestCommonSubstring("barfoobar", "bar"))
+
+  def testLongestCommonSubstring_4(self):
+    self.assertEqual(set(['bcd', 'hij']), su.LongestCommonSubstring("abcdefghijk", "bcdhij"))
+
+
 if __name__ == '__main__':
   unittest.main()

Modified: csw/mgar/gar/v2/tests/run_tests.py
===================================================================
--- csw/mgar/gar/v2/tests/run_tests.py	2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/tests/run_tests.py	2010-10-10 20:36:33 UTC (rev 11220)
@@ -1,4 +1,4 @@
-#!/opt/csw/bin/python2.6
+#!/usr/bin/env python2.6
 # $Id$
 
 import unittest
@@ -16,6 +16,8 @@
 from package_checks_test     import *
 from dependency_checks_test  import *
 from sharedlib_utils_test    import *
+from catalog_test            import *
+from package_test            import *
 
 # These are very slow GAR tests, which I'm disabling for now.
 # from example_test            import *


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list