[csw-devel] SF.net SVN: gar:[11220] csw/mgar/gar/v2
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Sun Oct 10 22:36:33 CEST 2010
Revision: 11220
http://gar.svn.sourceforge.net/gar/?rev=11220&view=rev
Author: wahwah
Date: 2010-10-10 20:36:33 +0000 (Sun, 10 Oct 2010)
Log Message:
-----------
mGAR v2: checkpkg, refactoring: Split off of hachoir-dependent bits to
a separate module.
Modified Paths:
--------------
csw/mgar/gar/v2/lib/python/opencsw.py
csw/mgar/gar/v2/lib/python/opencsw_test.py
csw/mgar/gar/v2/lib/python/package_checks.py
csw/mgar/gar/v2/lib/python/package_checks_test.py
csw/mgar/gar/v2/lib/python/sharedlib_utils.py
csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py
csw/mgar/gar/v2/tests/run_tests.py
Added Paths:
-----------
csw/mgar/gar/v2/lib/python/catalog.py
csw/mgar/gar/v2/lib/python/catalog_test.py
csw/mgar/gar/v2/lib/python/package.py
csw/mgar/gar/v2/lib/python/package_test.py
Added: csw/mgar/gar/v2/lib/python/catalog.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog.py (rev 0)
+++ csw/mgar/gar/v2/lib/python/catalog.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,139 @@
+#!/usr/bin/env python2.6
+
+import re
+
+class OpencswCatalogBuilder(object):
+
+ def __init__(self, product_dir, catalog_dir):
+ self.product_dir = product_dir
+ self.catalog_dir = catalog_dir
+
+ def Run(self):
+ pkg_dirs = os.listdir(self.product_dir)
+ for pkg_dir in pkg_dirs:
+ pkg_path = os.path.join(self.product_dir, pkg_dir)
+ pkginfo_path = os.path.join(pkg_path, "pkginfo")
+ if (os.path.isdir(pkg_path)
+ and
+ os.path.exists(pkginfo_path)):
+ if not self.Srv4Exists(pkg_path):
+ pkg = None
+ tmpdir = None
+ try:
+ tmpdir = tempfile.mkdtemp(prefix="sunw-pkg-")
+ logging.debug("Copying %s to %s", repr(pkg_path), repr(tmpdir))
+ tmp_pkg_dir = os.path.join(tmpdir, pkg_dir)
+ shutil.copytree(pkg_path, tmp_pkg_dir, symlinks=True)
+ pkg = DirectoryFormatPackage(tmp_pkg_dir)
+ # Replacing NAME= in the pkginfo, setting it to the catalog name.
+ pkg.ResetNameProperty()
+ pkg.ToSrv4(self.catalog_dir)
+ except IOError, e:
+ logging.warn("%s has failed: %s", pkg_path, e)
+ finally:
+ if pkg:
+ del(pkg)
+ if os.path.exists(tmpdir):
+ shutil.rmtree(tmpdir)
+ else:
+ logging.warn("srv4 file for %s already exists, skipping", pkg_path)
+ else:
+ logging.warn("%s is not a directory.", pkg_path)
+
+
+ def Srv4Exists(self, pkg_dir):
+ pkg = DirectoryFormatPackage(pkg_dir)
+ srv4_name = pkg.GetSrv4FileName()
+ srv4_name += ".gz"
+ srv4_path = os.path.join(self.catalog_dir, srv4_name)
+ result = os.path.exists(srv4_path)
+ logging.debug("Srv4Exists(%s) => %s, %s", pkg_dir, repr(srv4_path), result)
+ return result
+
+
+class OpencswCatalog(object):
+ """Represents a catalog file."""
+
+ def __init__(self, file_name):
+ self.file_name = file_name
+ self.by_basename = None
+ self.catalog_data = None
+
+ def _ParseCatalogLine(self, line):
+ cline_re_str_list = [
+ (
+ r"^"
+ # tmux
+ r"(?P<catalogname>\S+)"
+ r"\s+"
+ # 1.2,REV=2010.05.17
+ r"(?P<version>\S+)"
+ r"\s+"
+ # CSWtmux
+ r"(?P<pkgname>\S+)"
+ r"\s+"
+ # tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz
+ r"(?P<file_basename>\S+)"
+ r"\s+"
+ # 145351cf6186fdcadcd169b66387f72f
+ r"(?P<md5sum>\S+)"
+ r"\s+"
+ # 214091
+ r"(?P<size>\S+)"
+ r"\s+"
+ # CSWcommon|CSWlibevent
+ r"(?P<deps>\S+)"
+ r"\s+"
+ # none
+ r"(?P<none_thing_1>\S+)"
+ # An optional empty field.
+ r"("
+ r"\s+"
+ # none\n'
+ r"(?P<none_thing_2>\S+)"
+ r")?"
+ r"$"
+ ),
+ ]
+ cline_re_list = [re.compile(x) for x in cline_re_str_list]
+ matched = False
+ d = None
+ for cline_re in cline_re_list:
+ m = cline_re.match(line)
+ if m:
+ d = m.groupdict()
+ matched = True
+ if not d:
+ raise CatalogLineParseError("Parsed %s data is empty" % repr(line))
+ if not matched:
+ raise CatalogLineParseError("No regexes matched %s" % repr(line))
+ return d
+
+ def _GetCatalogData(self, fd):
+ catalog_data = []
+ for line in fd:
+ try:
+ parsed = self._ParseCatalogLine(line)
+ catalog_data.append(parsed)
+ except CatalogLineParseError, e:
+ logging.debug("Could not parse %s, %s", repr(line), e)
+ return catalog_data
+
+ def GetCatalogData(self):
+ if not self.catalog_data:
+ fd = open(self.file_name, "r")
+ self.catalog_data = self._GetCatalogData(fd)
+ return self.catalog_data
+
+ def GetDataByBasename(self):
+ if not self.by_basename:
+ self.by_basename = {}
+ cd = self.GetCatalogData()
+ for d in cd:
+ if "file_basename" not in d:
+ logging.error("%s is missing the file_basename field", d)
+ self.by_basename[d["file_basename"]] = d
+ return self.by_basename
+
+
+
Added: csw/mgar/gar/v2/lib/python/catalog_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog_test.py (rev 0)
+++ csw/mgar/gar/v2/lib/python/catalog_test.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,29 @@
+#!/usr/bin/env python2.6
+
+import unittest
+import catalog
+
+class OpencswCatalogUnitTest(unittest.TestCase):
+
+ def test_ParseCatalogLine_1(self):
+ line = (
+ 'tmux 1.2,REV=2010.05.17 CSWtmux '
+ 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz '
+ '145351cf6186fdcadcd169b66387f72f 214091 '
+ 'CSWcommon|CSWlibevent none none\n')
+ oc = catalog.OpencswCatalog(None)
+ parsed = oc._ParseCatalogLine(line)
+ expected = {'catalogname': 'tmux',
+ 'deps': 'CSWcommon|CSWlibevent',
+ 'file_basename': 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz',
+ 'md5sum': '145351cf6186fdcadcd169b66387f72f',
+ 'none_thing_1': 'none',
+ 'none_thing_2': 'none',
+ 'pkgname': 'CSWtmux',
+ 'size': '214091',
+ 'version': '1.2,REV=2010.05.17'}
+ self.assertEquals(expected, parsed)
+
+
+if __name__ == '__main__':
+ unittest.main()
Modified: csw/mgar/gar/v2/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/opencsw.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -15,9 +15,7 @@
import copy
import datetime
import difflib
-import hachoir_parser as hp
import hashlib
-import magic
import logging
import os
import os.path
@@ -30,12 +28,8 @@
import overrides
import configuration as c
from Cheetah import Template
+import sharedlib_utils as su
-# Suppress unhelpful warnings
-# http://bitbucket.org/haypo/hachoir/issue/23
-import hachoir_core.config
-hachoir_core.config.quiet = True
-
ARCH_SPARC = "sparc"
ARCH_i386 = "i386"
ARCH_ALL = "all"
@@ -425,184 +419,6 @@
return editor
-class ShellMixin(object):
-
- def ShellCommand(self, args, quiet=False):
- logging.debug("Calling: %s", repr(args))
- if quiet:
- process = subprocess.Popen(args,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- stdout, stderr = process.communicate()
- retcode = process.wait()
- else:
- retcode = subprocess.call(args)
- if retcode:
- raise Error("Running %s has failed." % repr(args))
- return retcode
-
-
-class CswSrv4File(ShellMixin, object):
- """Represents a package in the srv4 format (pkg)."""
-
- def __init__(self, pkg_path, debug=False):
- self.pkg_path = pkg_path
- self.workdir = None
- self.gunzipped_path = None
- self.transformed = False
- self.dir_format_pkg = None
- self.debug = debug
- self.pkgname = None
- self.md5sum = None
- self.mtime = None
-
- def __repr__(self):
- return u"CswSrv4File(%s)" % repr(self.pkg_path)
-
- def GetWorkDir(self):
- if not self.workdir:
- self.workdir = tempfile.mkdtemp(prefix="pkg_")
- fd = open(os.path.join(self.workdir, "admin"), "w")
- fd.write(ADMIN_FILE_CONTENT)
- fd.close()
- return self.workdir
-
- def GetAdminFilePath(self):
- return os.path.join(self.GetWorkDir(), "admin")
-
- def GetGunzippedPath(self):
- if not self.gunzipped_path:
- gzip_suffix = ".gz"
- pkg_suffix = ".pkg"
- if self.pkg_path.endswith("%s%s" % (pkg_suffix, gzip_suffix)):
- # Causing the class to stat the .gz file. This call throws away the
- # result, but the result will be cached as a class instance member.
- self.GetMtime()
- base_name_gz = os.path.split(self.pkg_path)[1]
- shutil.copy(self.pkg_path, self.GetWorkDir())
- self.pkg_path = os.path.join(self.GetWorkDir(), base_name_gz)
- args = ["gunzip", "-f", self.pkg_path]
- unused_retcode = self.ShellCommand(args)
- self.gunzipped_path = self.pkg_path[:(-len(gzip_suffix))]
- elif self.pkg_path.endswith(pkg_suffix):
- self.gunzipped_path = self.pkg_path
- else:
- raise Error("The file name should end in either "
- "%s or %s, but it's %s."
- % (gzip_suffix, pkg_suffix, repr(self.pkg_path)))
- return self.gunzipped_path
-
- def Pkgtrans(self, src_file, destdir, pkgname):
- """A proxy for the pkgtrans command.
-
- This requires custom-pkgtrans to be available.
- """
- if not os.path.isdir(destdir):
- raise PackageError("%s doesn't exist or is not a directory" % destdir)
- args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
- src_file,
- destdir,
- pkgname ]
- pkgtrans_proc = subprocess.Popen(args,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- stdout, stderr = pkgtrans_proc.communicate()
- ret = pkgtrans_proc.wait()
- if ret:
- logging.error(stdout)
- logging.error(stderr)
- logging.error("% has failed" % args)
-
- def GetPkgname(self):
- """It's necessary to figure out the pkgname from the .pkg file.
- # nawk 'NR == 2 {print $1; exit;} $f
- """
- if not self.pkgname:
- gunzipped_path = self.GetGunzippedPath()
- args = ["nawk", "NR == 2 {print $1; exit;}", gunzipped_path]
- nawk_proc = subprocess.Popen(args, stdout=subprocess.PIPE)
- stdout, stderr = nawk_proc.communicate()
- ret_code = nawk_proc.wait()
- self.pkgname = stdout.strip()
- logging.debug("GetPkgname(): %s", repr(self.pkgname))
- return self.pkgname
-
- def GetMtime(self):
- if not self.mtime:
- # This fails if the file is not there.
- s = os.stat(self.pkg_path)
- t = time.gmtime(s.st_mtime)
- self.mtime = datetime.datetime(*t[:6])
- return self.mtime
-
- def TransformToDir(self):
- """Transforms the file to the directory format.
-
- This uses the Pkgtrans function at the top, because pkgtrans behaves
- differently on Solaris 8 and 10. Having our own implementation helps
- achieve consistent behavior.
- """
- if not self.transformed:
- gunzipped_path = self.GetGunzippedPath()
- pkgname = self.GetPkgname()
- args = [os.path.join(os.path.dirname(__file__),
- "..", "..", "bin", "custom-pkgtrans"),
- gunzipped_path, self.GetWorkDir(), pkgname]
- logging.debug("transforming: %s", args)
- unused_retcode = self.ShellCommand(args, quiet=(not self.debug))
- dirs = self.GetDirs()
- if len(dirs) != 1:
- raise Error("Need exactly one package in the package stream: "
- "%s." % (dirs))
- self.dir_format_pkg = DirectoryFormatPackage(dirs[0])
- self.transformed = True
-
- def GetDirFormatPkg(self):
- self.TransformToDir()
- return self.dir_format_pkg
-
- def GetDirs(self):
- paths = os.listdir(self.GetWorkDir())
- dirs = []
- for p in paths:
- abspath = os.path.join(self.GetWorkDir(), p)
- if os.path.isdir(abspath):
- dirs.append(abspath)
- return dirs
-
- def GetPkgmap(self, analyze_permissions, strip=None):
- dir_format_pkg = self.GetDirFormatPkg()
- return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
-
- def GetMd5sum(self):
- if not self.md5sum:
- logging.debug("GetMd5sum() reading file %s", repr(self.pkg_path))
- fp = open(self.pkg_path)
- hash = hashlib.md5()
- hash.update(fp.read())
- fp.close()
- self.md5sum = hash.hexdigest()
- return self.md5sum
-
- def GetPkgchkOutput(self):
- """Returns: (exit code, stdout, stderr)."""
- args = ["pkgchk", "-d", self.GetGunzippedPath(), "all"]
- pkgchk_proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = pkgchk_proc.communicate()
- ret = pkgchk_proc.wait()
- return ret, stdout, stderr
-
- def GetFileMtime(self):
- if not self.mtime:
- self.mtime = os.stat(self.pkg_path).st_mtime
- return self.mtime
-
- def __del__(self):
- if self.workdir:
- logging.debug("Removing %s", repr(self.workdir))
- shutil.rmtree(self.workdir)
-
-
def ParsePkginfo(lines):
"""Parses a pkginfo data."""
d = {}
@@ -659,12 +475,13 @@
catalogname_list = copy.copy(catalogname_list)
if len(catalogname_list) == 1:
return catalogname_list[0]
- current_substring = catalogname_list.pop()
- while catalogname_list and current_substring:
- substring_set = LongestCommonSubstring(current_substring,
- catalogname_list.pop())
- if substring_set:
- current_substring = list(substring_set)[0]
+ #current_substring = catalogname_list.pop()
+ #while catalogname_list and current_substring:
+ # substring_set = su.LongestCommonSubstring(current_substring,
+ # catalogname_list.pop())
+ # if substring_set:
+ # current_substring = list(substring_set)[0]
+ current_substring = su.CollectionLongestCommonSubstring(catalogname_list)
# If it's something like foo_, make it foo.
while current_substring and not current_substring[-1].isalnum():
current_substring = current_substring[:-1]
@@ -673,27 +490,6 @@
return "various packages"
-def LongestCommonSubstring(S, T):
- """Stolen from Wikibooks
-
- http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring#Python"""
- m = len(S); n = len(T)
- L = [[0] * (n+1) for i in xrange(m+1)]
- LCS = set()
- longest = 0
- for i in xrange(m):
- for j in xrange(n):
- if S[i] == T[j]:
- v = L[i][j] + 1
- L[i+1][j+1] = v
- if v > longest:
- longest = v
- LCS = set()
- if v == longest:
- LCS.add(S[i-v+1:i+1])
- return LCS
-
-
def PkginfoToSrv4Name(pkginfo_dict):
SRV4_FN_TMPL = "%(catalog_name)s-%(version)s-%(osver)s-%(arch)s-%(tag)s.pkg"
fn_data = {}
@@ -708,304 +504,6 @@
return SRV4_FN_TMPL % fn_data
-class DirectoryFormatPackage(ShellMixin, object):
- """Represents a package in the directory format.
-
- Allows some read-write operations.
- """
- def __init__(self, directory):
- self.directory = directory
- self.pkgname = os.path.basename(directory)
- self.pkgpath = self.directory
- self.pkginfo_dict = None
- self.binaries = None
- self.file_paths = None
- self.files_metadata = None
-
- def GetCatalogname(self):
- """Returns the catalog name of the package.
-
- A bit hacky. Looks for the first word of the NAME field in the package.
- """
- pkginfo = self.GetParsedPkginfo()
- words = re.split(c.WS_RE, pkginfo["NAME"])
- return words[0]
-
- def GetParsedPkginfo(self):
- if not self.pkginfo_dict:
- pkginfo_fd = open(self.GetPkginfoFilename(), "r")
- self.pkginfo_dict = ParsePkginfo(pkginfo_fd)
- pkginfo_fd.close()
- return self.pkginfo_dict
-
- def GetSrv4FileName(self):
- """Guesses the Srv4FileName based on the package directory contents."""
- return PkginfoToSrv4Name(self.GetParsedPkginfo())
-
- def ToSrv4(self, target_dir):
- target_file_name = self.GetSrv4FileName()
- target_path = os.path.join(target_dir, target_file_name)
- if os.path.exists(target_path):
- return target_path
- pkg_container_dir, pkg_dir = os.path.split(self.directory)
- if not os.path.isdir(target_dir):
- os.makedirs(target_dir)
- args = ["pkgtrans", "-s", pkg_container_dir, target_path, pkg_dir]
- self.ShellCommand(args, quiet=True)
- args = ["gzip", "-f", target_path]
- self.ShellCommand(args, quiet=True)
- return target_path
-
- def GetPkgmap(self, analyze_permissions=False, strip=None):
- fd = open(os.path.join(self.directory, "pkgmap"), "r")
- return Pkgmap(fd, analyze_permissions, strip)
-
- def SetPkginfoEntry(self, key, value):
- pkginfo = self.GetParsedPkginfo()
- logging.debug("Setting %s to %s", repr(key), repr(value))
- pkginfo[key] = value
- self.WritePkginfo(pkginfo)
- pkgmap_path = os.path.join(self.directory, "pkgmap")
- pkgmap_fd = open(pkgmap_path, "r")
- new_pkgmap_lines = []
- pkginfo_re = re.compile("1 i pkginfo")
- ws_re = re.compile(r"\s+")
- for line in pkgmap_fd:
- if pkginfo_re.search(line):
- fields = ws_re.split(line)
- # 3: size
- # 4: sum
- pkginfo_path = os.path.join(self.directory, "pkginfo")
- args = ["cksum", pkginfo_path]
- cksum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
- stdout, stderr = cksum_process.communicate()
- cksum_process.wait()
- size = ws_re.split(stdout)[1]
- args = ["sum", pkginfo_path]
- sum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
- stdout, stderr = sum_process.communicate()
- sum_process.wait()
- sum_value = ws_re.split(stdout)[0]
- fields[3] = size
- fields[4] = sum_value
- logging.debug("New pkgmap line: %s", fields)
- line = " ".join(fields)
- new_pkgmap_lines.append(line.strip())
- pkgmap_fd.close()
- # Write that back
- pkgmap_path_new = pkgmap_path + ".new"
- logging.debug("Writing back to %s", pkgmap_path_new)
- pkgmap_fd = open(pkgmap_path_new, "w")
- pkgmap_fd.write("\n".join(new_pkgmap_lines))
- pkgmap_fd.close()
- shutil.move(pkgmap_path_new, pkgmap_path)
-
- # TODO(maciej): Need to update the relevant line on pkgmap too
-
- def GetPkginfoFilename(self):
- return os.path.join(self.directory, "pkginfo")
-
- def WritePkginfo(self, pkginfo_dict):
- # Some packages extract read-only. To be sure, change them to be
- # user-writable.
- args = ["chmod", "-R", "u+w", self.directory]
- self.ShellCommand(args)
- pkginfo_filename = self.GetPkginfoFilename()
- os.chmod(pkginfo_filename, 0644)
- pkginfo_fd = open(pkginfo_filename, "w")
- pkginfo_dict = self.GetParsedPkginfo()
- for k, v in pkginfo_dict.items():
- pkginfo_fd.write("%s=%s\n" % (k, pkginfo_dict[k]))
- pkginfo_fd.close()
-
- def ResetNameProperty(self):
- """Sometimes, NAME= contains useless data. This method resets them."""
- pkginfo_dict = self.GetParsedPkginfo()
- catalog_name = PkgnameToCatName(pkginfo_dict["PKG"])
- description = pkginfo_dict["DESC"]
- pkginfo_name = "%s - %s" % (catalog_name, description)
- self.SetPkginfoEntry("NAME", pkginfo_name)
-
- def GetDependencies(self):
- depends = []
- depend_file_path = os.path.join(self.directory, "install", "depend")
- if not os.path.exists(depend_file_path):
- return depends
- fd = open(os.path.join(self.directory, "install", "depend"), "r")
- # It needs to be a list because there might be duplicates and it's
- # necessary to carry that information.
- for line in fd:
- fields = re.split(c.WS_RE, line)
- if fields[0] == "P":
- pkgname = fields[1]
- pkg_desc = " ".join(fields[1:])
- depends.append((pkgname, pkg_desc))
- fd.close()
- return depends
-
- def CheckPkgpathExists(self):
- if not os.path.isdir(self.directory):
- raise PackageError("%s does not exist or is not a directory"
- % self.directory)
-
- def GetFilesMetadata(self):
- """Returns a data structure with all the files plus their metadata.
-
- [
- {
- "path": ...,
- "mime_type": ...,
- },
- ]
- """
- if not self.files_metadata:
- self.CheckPkgpathExists()
- self.files_metadata = []
- files_root = os.path.join(self.directory, "root")
- if not os.path.exists(files_root):
- return self.files_metadata
- all_files = self.GetAllFilePaths()
- def StripRe(x, strip_re):
- return re.sub(strip_re, "", x)
- root_re = re.compile(r"^root/")
- file_magic = FileMagic()
- for file_path in all_files:
- full_path = unicode(self.MakeAbsolutePath(file_path))
- file_info = {
- "path": StripRe(file_path, root_re),
- "mime_type": file_magic.GetFileMimeType(full_path)
- }
- if not file_info["mime_type"]:
- logging.error("Could not establish the mime type of %s",
- full_path)
- # We really don't want that, as it misses binaries.
- raise PackageError("Could not establish the mime type of %s"
- % full_path)
- if IsBinary(file_info):
- parser = hp.createParser(full_path)
- if not parser:
- logging.warning("Can't parse file %s", file_path)
- else:
- file_info["mime_type_by_hachoir"] = parser.mime_type
- machine_id = parser["/header/machine"].value
- file_info["machine_id"] = machine_id
- file_info["endian"] = parser["/header/endian"].display
- self.files_metadata.append(file_info)
- return self.files_metadata
-
- def ListBinaries(self):
- """Lists all the binaries from a given package.
-
- Original checkpkg code:
-
- #########################################
- # find all executables and dynamic libs,and list their filenames.
- listbinaries() {
- if [ ! -d $1 ] ; then
- print errmsg $1 not a directory
- rm -rf $EXTRACTDIR
- exit 1
- fi
- find $1 -print | xargs file |grep ELF |nawk -F: '{print $1}'
- }
-
- Returns a list of absolute paths.
-
- Now that there are files_metadata, this function can safely go away, once
- all its callers are modified to use files_metadata instead.
- """
- if self.binaries is None:
- self.CheckPkgpathExists()
- files_metadata = self.GetFilesMetadata()
- self.binaries = []
- # The nested for-loop looks inefficient.
- for file_info in files_metadata:
- if IsBinary(file_info):
- self.binaries.append(file_info["path"])
- self.binaries.sort()
- return self.binaries
-
- def GetAllFilePaths(self):
- """Returns a list of all paths from the package."""
- if not self.file_paths:
- self.CheckPkgpathExists()
- remove_prefix = "%s/" % self.pkgpath
- self.file_paths = []
- for root, dirs, files in os.walk(os.path.join(self.pkgpath, "root")):
- full_paths = [os.path.join(root, f) for f in files]
- self.file_paths.extend([f.replace(remove_prefix, "") for f in full_paths])
- return self.file_paths
-
- def _GetOverridesStream(self, file_path):
- # This might potentially cause a file descriptor leak, but I'm not going to
- # worry about that at this stage.
- # NB, the whole catalog run doesn't seem to be suffering. (~2500 packages)
- #
- # There is a race condition here, but it's executing sequentially, I don't
- # expect any concurrency problems.
- if os.path.isfile(file_path):
- logging.debug("Opening %s override file." % repr(file_path))
- return open(file_path, "r")
- else:
- logging.debug("Override file %s not found." % repr(file_path))
- return None
-
- def _ParseOverridesStream(self, stream):
- override_list = []
- for line in stream:
- if line.startswith("#"):
- continue
- override_list.append(overrides.ParseOverrideLine(line))
- return override_list
-
- def GetOverrides(self):
- """Returns overrides, a list of overrides.Override instances."""
- overrides = []
- catalogname = self.GetCatalogname()
- override_paths = (
- [self.directory,
- "root",
- "opt/csw/share/checkpkg/overrides", catalogname],
- [self.directory,
- "install",
- "checkpkg_override"],
- )
- for override_path in override_paths:
- file_path = os.path.join(*override_path)
- stream = self._GetOverridesStream(file_path)
- if stream:
- overrides.extend(self._ParseOverridesStream(stream))
- return overrides
-
- def GetFileContent(self, pkg_file_path):
- if pkg_file_path.startswith("/"):
- pkg_file_path = pkg_file_path[1:]
- # TODO: Write a unit test for the right path
- file_path = os.path.join(self.directory, "root", pkg_file_path)
- try:
- fd = open(file_path, "r")
- content = fd.read()
- fd.close()
- return content
- except IOError, e:
- raise PackageError(e)
-
- def GetFilesContaining(self, regex_list):
- full_paths = self.GetAllFilePaths()
- files_by_pattern = {}
- for full_path in full_paths:
- content = open(self.MakeAbsolutePath(full_path), "rb").read()
- for regex in regex_list:
- if re.search(regex, content):
- if regex not in files_by_pattern:
- files_by_pattern[regex] = []
- files_by_pattern[regex].append(full_path)
- return files_by_pattern
-
- def MakeAbsolutePath(self, p):
- return os.path.join(self.pkgpath, p)
-
-
class Pkgmap(object):
"""Represents the pkgmap of the package.
@@ -1091,168 +589,6 @@
return self.classes
-class PackageComparator(object):
-
- def __init__(self, file_name_a, file_name_b,
- permissions=False,
- strip_a=None,
- strip_b=None):
- self.analyze_permissions = permissions
- self.pkg_a = CswSrv4File(file_name_a)
- self.pkg_b = CswSrv4File(file_name_b)
- self.strip_a = strip_a
- self.strip_b = strip_b
-
- def Run(self):
- pkgmap_a = self.pkg_a.GetPkgmap(self.analyze_permissions, strip=self.strip_a)
- pkgmap_b = self.pkg_b.GetPkgmap(self.analyze_permissions, strip=self.strip_b)
- diff_ab = difflib.unified_diff(sorted(pkgmap_a.paths),
- sorted(pkgmap_b.paths),
- fromfile=self.pkg_a.pkg_path,
- tofile=self.pkg_b.pkg_path)
- diff_text = "\n".join(diff_ab)
- if diff_text:
- less_proc = subprocess.Popen(["less"], stdin=subprocess.PIPE)
- less_stdout, less_stderr = less_proc.communicate(input=diff_text)
- less_proc.wait()
- else:
- print "No differences found."
-
-
-class OpencswCatalogBuilder(object):
-
- def __init__(self, product_dir, catalog_dir):
- self.product_dir = product_dir
- self.catalog_dir = catalog_dir
-
- def Run(self):
- pkg_dirs = os.listdir(self.product_dir)
- for pkg_dir in pkg_dirs:
- pkg_path = os.path.join(self.product_dir, pkg_dir)
- pkginfo_path = os.path.join(pkg_path, "pkginfo")
- if (os.path.isdir(pkg_path)
- and
- os.path.exists(pkginfo_path)):
- if not self.Srv4Exists(pkg_path):
- pkg = None
- tmpdir = None
- try:
- tmpdir = tempfile.mkdtemp(prefix="sunw-pkg-")
- logging.debug("Copying %s to %s", repr(pkg_path), repr(tmpdir))
- tmp_pkg_dir = os.path.join(tmpdir, pkg_dir)
- shutil.copytree(pkg_path, tmp_pkg_dir, symlinks=True)
- pkg = DirectoryFormatPackage(tmp_pkg_dir)
- # Replacing NAME= in the pkginfo, setting it to the catalog name.
- pkg.ResetNameProperty()
- pkg.ToSrv4(self.catalog_dir)
- except IOError, e:
- logging.warn("%s has failed: %s", pkg_path, e)
- finally:
- if pkg:
- del(pkg)
- if os.path.exists(tmpdir):
- shutil.rmtree(tmpdir)
- else:
- logging.warn("srv4 file for %s already exists, skipping", pkg_path)
- else:
- logging.warn("%s is not a directory.", pkg_path)
-
-
- def Srv4Exists(self, pkg_dir):
- pkg = DirectoryFormatPackage(pkg_dir)
- srv4_name = pkg.GetSrv4FileName()
- srv4_name += ".gz"
- srv4_path = os.path.join(self.catalog_dir, srv4_name)
- result = os.path.exists(srv4_path)
- logging.debug("Srv4Exists(%s) => %s, %s", pkg_dir, repr(srv4_path), result)
- return result
-
-
-class OpencswCatalog(object):
- """Represents a catalog file."""
-
- def __init__(self, file_name):
- self.file_name = file_name
- self.by_basename = None
- self.catalog_data = None
-
- def _ParseCatalogLine(self, line):
- cline_re_str_list = [
- (
- r"^"
- # tmux
- r"(?P<catalogname>\S+)"
- r"\s+"
- # 1.2,REV=2010.05.17
- r"(?P<version>\S+)"
- r"\s+"
- # CSWtmux
- r"(?P<pkgname>\S+)"
- r"\s+"
- # tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz
- r"(?P<file_basename>\S+)"
- r"\s+"
- # 145351cf6186fdcadcd169b66387f72f
- r"(?P<md5sum>\S+)"
- r"\s+"
- # 214091
- r"(?P<size>\S+)"
- r"\s+"
- # CSWcommon|CSWlibevent
- r"(?P<deps>\S+)"
- r"\s+"
- # none
- r"(?P<none_thing_1>\S+)"
- # An optional empty field.
- r"("
- r"\s+"
- # none\n'
- r"(?P<none_thing_2>\S+)"
- r")?"
- r"$"
- ),
- ]
- cline_re_list = [re.compile(x) for x in cline_re_str_list]
- matched = False
- d = None
- for cline_re in cline_re_list:
- m = cline_re.match(line)
- if m:
- d = m.groupdict()
- matched = True
- if not d:
- raise CatalogLineParseError("Parsed %s data is empty" % repr(line))
- if not matched:
- raise CatalogLineParseError("No regexes matched %s" % repr(line))
- return d
-
- def _GetCatalogData(self, fd):
- catalog_data = []
- for line in fd:
- try:
- parsed = self._ParseCatalogLine(line)
- catalog_data.append(parsed)
- except CatalogLineParseError, e:
- logging.debug("Could not parse %s, %s", repr(line), e)
- return catalog_data
-
- def GetCatalogData(self):
- if not self.catalog_data:
- fd = open(self.file_name, "r")
- self.catalog_data = self._GetCatalogData(fd)
- return self.catalog_data
-
- def GetDataByBasename(self):
- if not self.by_basename:
- self.by_basename = {}
- cd = self.GetCatalogData()
- for d in cd:
- if "file_basename" not in d:
- logging.error("%s is missing the file_basename field", d)
- self.by_basename[d["file_basename"]] = d
- return self.by_basename
-
-
def IsBinary(file_info):
"""Returns True or False depending on file metadata."""
is_a_binary = False
@@ -1268,39 +604,3 @@
is_a_binary = True
break
return is_a_binary
-
-
-class FileMagic(object):
- """Libmagic sometimes returns None, which I think is a bug.
- Trying to come up with a way to work around that.
- """
-
- def __init__(self):
- self.cookie_count = 0
- self.magic_cookie = None
-
- def _GetCookie(self):
- magic_cookie = magic.open(self.cookie_count)
- self.cookie_count += 1
- magic_cookie.load()
- magic_cookie.setflags(magic.MAGIC_MIME)
- return magic_cookie
-
- def _LazyInit(self):
- if not self.magic_cookie:
- self.magic_cookie = self._GetCookie()
-
- def GetFileMimeType(self, full_path):
- """Trying to run magic.file() a few times, not accepting None."""
- self._LazyInit()
- mime = None
- for i in xrange(10):
- mime = self.magic_cookie.file(full_path)
- if mime:
- break;
- else:
- # Returned mime is null. Re-initializing the cookie and trying again.
- logging.error("magic_cookie.file(%s) returned None. Retrying.",
- full_path)
- self.magic_cookie = self._GetCookie()
- return mime
Modified: csw/mgar/gar/v2/lib/python/opencsw_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw_test.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/opencsw_test.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -352,19 +352,7 @@
repr(expected_name),
repr(result)))
- def testLongestCommonSubstring_1(self):
- self.assertEqual(set(["foo"]), opencsw.LongestCommonSubstring("foo", "foo"))
- def testLongestCommonSubstring_2(self):
- self.assertEqual(set([]), opencsw.LongestCommonSubstring("foo", "bar"))
-
- def testLongestCommonSubstring_3(self):
- self.assertEqual(set(["bar"]), opencsw.LongestCommonSubstring("barfoobar", "bar"))
-
- def testLongestCommonSubstring_4(self):
- self.assertEqual(set(['bcd', 'hij']), opencsw.LongestCommonSubstring("abcdefghijk", "bcdhij"))
-
-
class PkgmapUnitTest(unittest.TestCase):
def test_1(self):
@@ -447,28 +435,6 @@
searchList=[submitpkg_data])
self.assertTrue(re.search(r"new package", unicode(t)), unicode(t))
-class OpencswCatalogUnitTest(unittest.TestCase):
- def test_ParseCatalogLine_1(self):
- line = (
- 'tmux 1.2,REV=2010.05.17 CSWtmux '
- 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz '
- '145351cf6186fdcadcd169b66387f72f 214091 '
- 'CSWcommon|CSWlibevent none none\n')
- oc = opencsw.OpencswCatalog(None)
- parsed = oc._ParseCatalogLine(line)
- expected = {'catalogname': 'tmux',
- 'deps': 'CSWcommon|CSWlibevent',
- 'file_basename': 'tmux-1.2,REV=2010.05.17-SunOS5.9-sparc-CSW.pkg.gz',
- 'md5sum': '145351cf6186fdcadcd169b66387f72f',
- 'none_thing_1': 'none',
- 'none_thing_2': 'none',
- 'pkgname': 'CSWtmux',
- 'size': '214091',
- 'version': '1.2,REV=2010.05.17'}
- self.assertEquals(expected, parsed)
-
-
-
if __name__ == '__main__':
unittest.main()
Added: csw/mgar/gar/v2/lib/python/package.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package.py (rev 0)
+++ csw/mgar/gar/v2/lib/python/package.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,550 @@
+#!/usr/bin/env python2.6
+
+import magic
+import subprocess
+import hachoir_parser as hp
+
+# Suppress unhelpful warnings
+# http://bitbucket.org/haypo/hachoir/issue/23
+import hachoir_core.config
+hachoir_core.config.quiet = True
+
+
+class ShellMixin(object):
+
+ def ShellCommand(self, args, quiet=False):
+ logging.debug("Calling: %s", repr(args))
+ if quiet:
+ process = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ stdout, stderr = process.communicate()
+ retcode = process.wait()
+ else:
+ retcode = subprocess.call(args)
+ if retcode:
+ raise Error("Running %s has failed." % repr(args))
+ return retcode
+
+
+class CswSrv4File(ShellMixin, object):
+ """Represents a package in the srv4 format (pkg)."""
+
+ def __init__(self, pkg_path, debug=False):
+ self.pkg_path = pkg_path
+ self.workdir = None
+ self.gunzipped_path = None
+ self.transformed = False
+ self.dir_format_pkg = None
+ self.debug = debug
+ self.pkgname = None
+ self.md5sum = None
+ self.mtime = None
+
+ def __repr__(self):
+ return u"CswSrv4File(%s)" % repr(self.pkg_path)
+
+ def GetWorkDir(self):
+ if not self.workdir:
+ self.workdir = tempfile.mkdtemp(prefix="pkg_")
+ fd = open(os.path.join(self.workdir, "admin"), "w")
+ fd.write(ADMIN_FILE_CONTENT)
+ fd.close()
+ return self.workdir
+
+ def GetAdminFilePath(self):
+ return os.path.join(self.GetWorkDir(), "admin")
+
+ def GetGunzippedPath(self):
+ if not self.gunzipped_path:
+ gzip_suffix = ".gz"
+ pkg_suffix = ".pkg"
+ if self.pkg_path.endswith("%s%s" % (pkg_suffix, gzip_suffix)):
+ # Causing the class to stat the .gz file. This call throws away the
+ # result, but the result will be cached as a class instance member.
+ self.GetMtime()
+ base_name_gz = os.path.split(self.pkg_path)[1]
+ shutil.copy(self.pkg_path, self.GetWorkDir())
+ self.pkg_path = os.path.join(self.GetWorkDir(), base_name_gz)
+ args = ["gunzip", "-f", self.pkg_path]
+ unused_retcode = self.ShellCommand(args)
+ self.gunzipped_path = self.pkg_path[:(-len(gzip_suffix))]
+ elif self.pkg_path.endswith(pkg_suffix):
+ self.gunzipped_path = self.pkg_path
+ else:
+ raise Error("The file name should end in either "
+ "%s or %s, but it's %s."
+ % (gzip_suffix, pkg_suffix, repr(self.pkg_path)))
+ return self.gunzipped_path
+
+ def Pkgtrans(self, src_file, destdir, pkgname):
+ """A proxy for the pkgtrans command.
+
+ This requires custom-pkgtrans to be available.
+ """
+ if not os.path.isdir(destdir):
+ raise PackageError("%s doesn't exist or is not a directory" % destdir)
+ args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
+ src_file,
+ destdir,
+ pkgname ]
+ pkgtrans_proc = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ stdout, stderr = pkgtrans_proc.communicate()
+ ret = pkgtrans_proc.wait()
+ if ret:
+ logging.error(stdout)
+ logging.error(stderr)
+ logging.error("% has failed" % args)
+
+ def GetPkgname(self):
+ """It's necessary to figure out the pkgname from the .pkg file.
+ # nawk 'NR == 2 {print $1; exit;} $f
+ """
+ if not self.pkgname:
+ gunzipped_path = self.GetGunzippedPath()
+ args = ["nawk", "NR == 2 {print $1; exit;}", gunzipped_path]
+ nawk_proc = subprocess.Popen(args, stdout=subprocess.PIPE)
+ stdout, stderr = nawk_proc.communicate()
+ ret_code = nawk_proc.wait()
+ self.pkgname = stdout.strip()
+ logging.debug("GetPkgname(): %s", repr(self.pkgname))
+ return self.pkgname
+
+ def GetMtime(self):
+ if not self.mtime:
+ # This fails if the file is not there.
+ s = os.stat(self.pkg_path)
+ t = time.gmtime(s.st_mtime)
+ self.mtime = datetime.datetime(*t[:6])
+ return self.mtime
+
+ def TransformToDir(self):
+ """Transforms the file to the directory format.
+
+ This uses the Pkgtrans function at the top, because pkgtrans behaves
+ differently on Solaris 8 and 10. Having our own implementation helps
+ achieve consistent behavior.
+ """
+ if not self.transformed:
+ gunzipped_path = self.GetGunzippedPath()
+ pkgname = self.GetPkgname()
+ args = [os.path.join(os.path.dirname(__file__),
+ "..", "..", "bin", "custom-pkgtrans"),
+ gunzipped_path, self.GetWorkDir(), pkgname]
+ logging.debug("transforming: %s", args)
+ unused_retcode = self.ShellCommand(args, quiet=(not self.debug))
+ dirs = self.GetDirs()
+ if len(dirs) != 1:
+ raise Error("Need exactly one package in the package stream: "
+ "%s." % (dirs))
+ self.dir_format_pkg = DirectoryFormatPackage(dirs[0])
+ self.transformed = True
+
+ def GetDirFormatPkg(self):
+ self.TransformToDir()
+ return self.dir_format_pkg
+
+ def GetDirs(self):
+ paths = os.listdir(self.GetWorkDir())
+ dirs = []
+ for p in paths:
+ abspath = os.path.join(self.GetWorkDir(), p)
+ if os.path.isdir(abspath):
+ dirs.append(abspath)
+ return dirs
+
+ def GetPkgmap(self, analyze_permissions, strip=None):
+ dir_format_pkg = self.GetDirFormatPkg()
+ return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
+
+ def GetMd5sum(self):
+ if not self.md5sum:
+ logging.debug("GetMd5sum() reading file %s", repr(self.pkg_path))
+ fp = open(self.pkg_path)
+ hash = hashlib.md5()
+ hash.update(fp.read())
+ fp.close()
+ self.md5sum = hash.hexdigest()
+ return self.md5sum
+
+ def GetPkgchkOutput(self):
+ """Returns: (exit code, stdout, stderr)."""
+ args = ["pkgchk", "-d", self.GetGunzippedPath(), "all"]
+ pkgchk_proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = pkgchk_proc.communicate()
+ ret = pkgchk_proc.wait()
+ return ret, stdout, stderr
+
+ def GetFileMtime(self):
+ if not self.mtime:
+ self.mtime = os.stat(self.pkg_path).st_mtime
+ return self.mtime
+
+ def __del__(self):
+ if self.workdir:
+ logging.debug("Removing %s", repr(self.workdir))
+ shutil.rmtree(self.workdir)
+
+
+class DirectoryFormatPackage(ShellMixin, object):
+ """Represents a package in the directory format.
+
+ Allows some read-write operations.
+ """
+ def __init__(self, directory):
+ self.directory = directory
+ self.pkgname = os.path.basename(directory)
+ self.pkgpath = self.directory
+ self.pkginfo_dict = None
+ self.binaries = None
+ self.file_paths = None
+ self.files_metadata = None
+
+ def GetCatalogname(self):
+ """Returns the catalog name of the package.
+
+ A bit hacky. Looks for the first word of the NAME field in the package.
+ """
+ pkginfo = self.GetParsedPkginfo()
+ words = re.split(c.WS_RE, pkginfo["NAME"])
+ return words[0]
+
+ def GetParsedPkginfo(self):
+ if not self.pkginfo_dict:
+ pkginfo_fd = open(self.GetPkginfoFilename(), "r")
+ self.pkginfo_dict = ParsePkginfo(pkginfo_fd)
+ pkginfo_fd.close()
+ return self.pkginfo_dict
+
+ def GetSrv4FileName(self):
+ """Guesses the Srv4FileName based on the package directory contents."""
+ return PkginfoToSrv4Name(self.GetParsedPkginfo())
+
+ def ToSrv4(self, target_dir):
+ target_file_name = self.GetSrv4FileName()
+ target_path = os.path.join(target_dir, target_file_name)
+ if os.path.exists(target_path):
+ return target_path
+ pkg_container_dir, pkg_dir = os.path.split(self.directory)
+ if not os.path.isdir(target_dir):
+ os.makedirs(target_dir)
+ args = ["pkgtrans", "-s", pkg_container_dir, target_path, pkg_dir]
+ self.ShellCommand(args, quiet=True)
+ args = ["gzip", "-f", target_path]
+ self.ShellCommand(args, quiet=True)
+ return target_path
+
+ def GetPkgmap(self, analyze_permissions=False, strip=None):
+ fd = open(os.path.join(self.directory, "pkgmap"), "r")
+ return Pkgmap(fd, analyze_permissions, strip)
+
+ def SetPkginfoEntry(self, key, value):
+ pkginfo = self.GetParsedPkginfo()
+ logging.debug("Setting %s to %s", repr(key), repr(value))
+ pkginfo[key] = value
+ self.WritePkginfo(pkginfo)
+ pkgmap_path = os.path.join(self.directory, "pkgmap")
+ pkgmap_fd = open(pkgmap_path, "r")
+ new_pkgmap_lines = []
+ pkginfo_re = re.compile("1 i pkginfo")
+ ws_re = re.compile(r"\s+")
+ for line in pkgmap_fd:
+ if pkginfo_re.search(line):
+ fields = ws_re.split(line)
+ # 3: size
+ # 4: sum
+ pkginfo_path = os.path.join(self.directory, "pkginfo")
+ args = ["cksum", pkginfo_path]
+ cksum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+ stdout, stderr = cksum_process.communicate()
+ cksum_process.wait()
+ size = ws_re.split(stdout)[1]
+ args = ["sum", pkginfo_path]
+ sum_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+ stdout, stderr = sum_process.communicate()
+ sum_process.wait()
+ sum_value = ws_re.split(stdout)[0]
+ fields[3] = size
+ fields[4] = sum_value
+ logging.debug("New pkgmap line: %s", fields)
+ line = " ".join(fields)
+ new_pkgmap_lines.append(line.strip())
+ pkgmap_fd.close()
+ # Write that back
+ pkgmap_path_new = pkgmap_path + ".new"
+ logging.debug("Writing back to %s", pkgmap_path_new)
+ pkgmap_fd = open(pkgmap_path_new, "w")
+ pkgmap_fd.write("\n".join(new_pkgmap_lines))
+ pkgmap_fd.close()
+ shutil.move(pkgmap_path_new, pkgmap_path)
+
+ # TODO(maciej): Need to update the relevant line on pkgmap too
+
+ def GetPkginfoFilename(self):
+ return os.path.join(self.directory, "pkginfo")
+
+ def WritePkginfo(self, pkginfo_dict):
+ # Some packages extract read-only. To be sure, change them to be
+ # user-writable.
+ args = ["chmod", "-R", "u+w", self.directory]
+ self.ShellCommand(args)
+ pkginfo_filename = self.GetPkginfoFilename()
+ os.chmod(pkginfo_filename, 0644)
+ pkginfo_fd = open(pkginfo_filename, "w")
+ pkginfo_dict = self.GetParsedPkginfo()
+ for k, v in pkginfo_dict.items():
+ pkginfo_fd.write("%s=%s\n" % (k, pkginfo_dict[k]))
+ pkginfo_fd.close()
+
+ def ResetNameProperty(self):
+ """Sometimes, NAME= contains useless data. This method resets them."""
+ pkginfo_dict = self.GetParsedPkginfo()
+ catalog_name = PkgnameToCatName(pkginfo_dict["PKG"])
+ description = pkginfo_dict["DESC"]
+ pkginfo_name = "%s - %s" % (catalog_name, description)
+ self.SetPkginfoEntry("NAME", pkginfo_name)
+
+ def GetDependencies(self):
+ depends = []
+ depend_file_path = os.path.join(self.directory, "install", "depend")
+ if not os.path.exists(depend_file_path):
+ return depends
+ fd = open(os.path.join(self.directory, "install", "depend"), "r")
+ # It needs to be a list because there might be duplicates and it's
+ # necessary to carry that information.
+ for line in fd:
+ fields = re.split(c.WS_RE, line)
+ if fields[0] == "P":
+ pkgname = fields[1]
+ pkg_desc = " ".join(fields[1:])
+ depends.append((pkgname, pkg_desc))
+ fd.close()
+ return depends
+
+ def CheckPkgpathExists(self):
+ if not os.path.isdir(self.directory):
+ raise PackageError("%s does not exist or is not a directory"
+ % self.directory)
+
+ def GetFilesMetadata(self):
+ """Returns a data structure with all the files plus their metadata.
+
+ [
+ {
+ "path": ...,
+ "mime_type": ...,
+ },
+ ]
+ """
+ if not self.files_metadata:
+ self.CheckPkgpathExists()
+ self.files_metadata = []
+ files_root = os.path.join(self.directory, "root")
+ if not os.path.exists(files_root):
+ return self.files_metadata
+ all_files = self.GetAllFilePaths()
+ def StripRe(x, strip_re):
+ return re.sub(strip_re, "", x)
+ root_re = re.compile(r"^root/")
+ file_magic = FileMagic()
+ for file_path in all_files:
+ full_path = unicode(self.MakeAbsolutePath(file_path))
+ file_info = {
+ "path": StripRe(file_path, root_re),
+ "mime_type": file_magic.GetFileMimeType(full_path)
+ }
+ if not file_info["mime_type"]:
+ logging.error("Could not establish the mime type of %s",
+ full_path)
+ # We really don't want that, as it misses binaries.
+ raise PackageError("Could not establish the mime type of %s"
+ % full_path)
+ if IsBinary(file_info):
+ parser = hp.createParser(full_path)
+ if not parser:
+ logging.warning("Can't parse file %s", file_path)
+ else:
+ file_info["mime_type_by_hachoir"] = parser.mime_type
+ machine_id = parser["/header/machine"].value
+ file_info["machine_id"] = machine_id
+ file_info["endian"] = parser["/header/endian"].display
+ self.files_metadata.append(file_info)
+ return self.files_metadata
+
+ def ListBinaries(self):
+ """Lists all the binaries from a given package.
+
+ Original checkpkg code:
+
+ #########################################
+ # find all executables and dynamic libs,and list their filenames.
+ listbinaries() {
+ if [ ! -d $1 ] ; then
+ print errmsg $1 not a directory
+ rm -rf $EXTRACTDIR
+ exit 1
+ fi
+ find $1 -print | xargs file |grep ELF |nawk -F: '{print $1}'
+ }
+
+ Returns a list of absolute paths.
+
+ Now that there are files_metadata, this function can safely go away, once
+ all its callers are modified to use files_metadata instead.
+ """
+ if self.binaries is None:
+ self.CheckPkgpathExists()
+ files_metadata = self.GetFilesMetadata()
+ self.binaries = []
+ # The nested for-loop looks inefficient.
+ for file_info in files_metadata:
+ if IsBinary(file_info):
+ self.binaries.append(file_info["path"])
+ self.binaries.sort()
+ return self.binaries
+
+ def GetAllFilePaths(self):
+ """Returns a list of all paths from the package."""
+ if not self.file_paths:
+ self.CheckPkgpathExists()
+ remove_prefix = "%s/" % self.pkgpath
+ self.file_paths = []
+ for root, dirs, files in os.walk(os.path.join(self.pkgpath, "root")):
+ full_paths = [os.path.join(root, f) for f in files]
+ self.file_paths.extend([f.replace(remove_prefix, "") for f in full_paths])
+ return self.file_paths
+
+ def _GetOverridesStream(self, file_path):
+ # This might potentially cause a file descriptor leak, but I'm not going to
+ # worry about that at this stage.
+ # NB, the whole catalog run doesn't seem to be suffering. (~2500 packages)
+ #
+ # There is a race condition here, but it's executing sequentially, I don't
+ # expect any concurrency problems.
+ if os.path.isfile(file_path):
+ logging.debug("Opening %s override file." % repr(file_path))
+ return open(file_path, "r")
+ else:
+ logging.debug("Override file %s not found." % repr(file_path))
+ return None
+
+ def _ParseOverridesStream(self, stream):
+ override_list = []
+ for line in stream:
+ if line.startswith("#"):
+ continue
+ override_list.append(overrides.ParseOverrideLine(line))
+ return override_list
+
+ def GetOverrides(self):
+ """Returns overrides, a list of overrides.Override instances."""
+ overrides = []
+ catalogname = self.GetCatalogname()
+ override_paths = (
+ [self.directory,
+ "root",
+ "opt/csw/share/checkpkg/overrides", catalogname],
+ [self.directory,
+ "install",
+ "checkpkg_override"],
+ )
+ for override_path in override_paths:
+ file_path = os.path.join(*override_path)
+ stream = self._GetOverridesStream(file_path)
+ if stream:
+ overrides.extend(self._ParseOverridesStream(stream))
+ return overrides
+
+ def GetFileContent(self, pkg_file_path):
+ if pkg_file_path.startswith("/"):
+ pkg_file_path = pkg_file_path[1:]
+ # TODO: Write a unit test for the right path
+ file_path = os.path.join(self.directory, "root", pkg_file_path)
+ try:
+ fd = open(file_path, "r")
+ content = fd.read()
+ fd.close()
+ return content
+ except IOError, e:
+ raise PackageError(e)
+
+ def GetFilesContaining(self, regex_list):
+ full_paths = self.GetAllFilePaths()
+ files_by_pattern = {}
+ for full_path in full_paths:
+ content = open(self.MakeAbsolutePath(full_path), "rb").read()
+ for regex in regex_list:
+ if re.search(regex, content):
+ if regex not in files_by_pattern:
+ files_by_pattern[regex] = []
+ files_by_pattern[regex].append(full_path)
+ return files_by_pattern
+
+ def MakeAbsolutePath(self, p):
+ return os.path.join(self.pkgpath, p)
+
+
+class FileMagic(object):
+ """Libmagic sometimes returns None, which I think is a bug.
+ Trying to come up with a way to work around that.
+ """
+
+ def __init__(self):
+ self.cookie_count = 0
+ self.magic_cookie = None
+
+ def _GetCookie(self):
+ magic_cookie = magic.open(self.cookie_count)
+ self.cookie_count += 1
+ magic_cookie.load()
+ magic_cookie.setflags(magic.MAGIC_MIME)
+ return magic_cookie
+
+ def _LazyInit(self):
+ if not self.magic_cookie:
+ self.magic_cookie = self._GetCookie()
+
+ def GetFileMimeType(self, full_path):
+ """Trying to run magic.file() a few times, not accepting None."""
+ self._LazyInit()
+ mime = None
+ for i in xrange(10):
+ mime = self.magic_cookie.file(full_path)
+ if mime:
+ break;
+ else:
+ # Returned mime is null. Re-initializing the cookie and trying again.
+ logging.error("magic_cookie.file(%s) returned None. Retrying.",
+ full_path)
+ self.magic_cookie = self._GetCookie()
+ return mime
+
+
+class PackageComparator(object):
+
+ def __init__(self, file_name_a, file_name_b,
+ permissions=False,
+ strip_a=None,
+ strip_b=None):
+ self.analyze_permissions = permissions
+ self.pkg_a = CswSrv4File(file_name_a)
+ self.pkg_b = CswSrv4File(file_name_b)
+ self.strip_a = strip_a
+ self.strip_b = strip_b
+
+ def Run(self):
+ pkgmap_a = self.pkg_a.GetPkgmap(self.analyze_permissions, strip=self.strip_a)
+ pkgmap_b = self.pkg_b.GetPkgmap(self.analyze_permissions, strip=self.strip_b)
+ diff_ab = difflib.unified_diff(sorted(pkgmap_a.paths),
+ sorted(pkgmap_b.paths),
+ fromfile=self.pkg_a.pkg_path,
+ tofile=self.pkg_b.pkg_path)
+ diff_text = "\n".join(diff_ab)
+ if diff_text:
+ less_proc = subprocess.Popen(["less"], stdin=subprocess.PIPE)
+ less_stdout, less_stderr = less_proc.communicate(input=diff_text)
+ less_proc.wait()
+ else:
+ print "No differences found."
Modified: csw/mgar/gar/v2/lib/python/package_checks.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_checks.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/package_checks.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -1000,8 +1000,11 @@
if pkgname not in policy_pkgname_list:
error_mgr.ReportError(
"shared-lib-pkgname-mismatch",
- "file=%s pkgname=%s expected=%s"
- % (binary_info["path"], pkgname, policy_pkgname_list))
+ "file=%s "
+ "soname=%s "
+ "pkgname=%s "
+ "expected=%s"
+ % (binary_info["path"], soname, pkgname, policy_pkgname_list))
messenger.OneTimeMessage(
soname,
"This shared library (%s) is in a directory indicating that it "
Modified: csw/mgar/gar/v2/lib/python/package_checks_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_checks_test.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/package_checks_test.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -1320,19 +1320,27 @@
self.pkg_data = neon_stats[0]
self.error_mgr_mock.ReportError(
'shared-lib-pkgname-mismatch',
- "file=opt/csw/lib/libneon.so.26.0.4 pkgname=CSWneon "
+ "file=opt/csw/lib/libneon.so.26.0.4 "
+ "soname=libneon.so.26 "
+ "pkgname=CSWneon "
"expected=['CSWlibneon26', 'CSWlibneon-26']")
self.error_mgr_mock.ReportError(
'shared-lib-pkgname-mismatch',
- "file=opt/csw/lib/libneon.so.27.2.0 pkgname=CSWneon "
+ "file=opt/csw/lib/libneon.so.27.2.0 "
+ "soname=libneon.so.27 "
+ "pkgname=CSWneon "
"expected=['CSWlibneon27', 'CSWlibneon-27']")
self.error_mgr_mock.ReportError(
'shared-lib-pkgname-mismatch',
- "file=opt/csw/lib/sparcv9/libneon.so.26.0.4 pkgname=CSWneon "
+ "file=opt/csw/lib/sparcv9/libneon.so.26.0.4 "
+ "soname=libneon.so.26 "
+ "pkgname=CSWneon "
"expected=['CSWlibneon26', 'CSWlibneon-26']")
self.error_mgr_mock.ReportError(
'shared-lib-pkgname-mismatch',
- "file=opt/csw/lib/sparcv9/libneon.so.27.2.0 pkgname=CSWneon "
+ "file=opt/csw/lib/sparcv9/libneon.so.27.2.0 "
+ "soname=libneon.so.27 "
+ "pkgname=CSWneon "
"expected=['CSWlibneon27', 'CSWlibneon-27']")
Added: csw/mgar/gar/v2/lib/python/package_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/package_test.py (rev 0)
+++ csw/mgar/gar/v2/lib/python/package_test.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2.6
+
+import unittest
+
+if __name__ == '__main__':
+ unittest.main()
+
Modified: csw/mgar/gar/v2/lib/python/sharedlib_utils.py
===================================================================
--- csw/mgar/gar/v2/lib/python/sharedlib_utils.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/sharedlib_utils.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -12,6 +12,7 @@
'i486', 'i386', 'i86')
AMD64_PATHS = ('amd64',)
LEGIT_CHAR_RE = re.compile(r"[a-zA-Z0-9\+]+")
+SONAME_VERSION_RE = re.compile("^(?P<name>.*)\.so\.(?P<version>[\d\.]+)$")
class SonameParsingException(Exception):
pass
@@ -51,7 +52,7 @@
"""
soname_re = re.compile(r"(?P<basename>[\w\+]+([\.\-]+[\w\+]+)*)"
r"\.so"
- r"(\.(?P<version>\d+)(\..*)?)?"
+ r"(\.(?P<version>[\d\.]+))?"
r"$")
m = soname_re.match(soname)
if not m:
@@ -97,3 +98,74 @@
if "sharedlib" in metadata["mime_type"]:
shared_libs.append(metadata["path"])
return shared_libs
+
+
+def GetCommonVersion(sonames):
+ versions = []
+ for soname in sonames:
+ m = SONAME_VERSION_RE.search(soname)
+ if m:
+ versions.append(m.groupdict()["version"])
+ versions_set = set(versions)
+ if len(versions_set) > 1 or not versions_set:
+ return None
+ else:
+ return versions_set.pop()
+
+
+def MakePackageNameBySonameCollection(sonames):
+ """Finds a name for a collection of sonames.
+
+ Try to find the largest common prefix in the sonames, and establish
+ whether there is a common version to them.
+ """
+ common_version = GetCommonVersion(sonames)
+ if not common_version:
+ # If the sonames don't have a common version, they shouldn't be together
+ # in one package.
+ return None
+ common_substring_candidates = []
+ for soname in sonames:
+ candidate = soname
+ # We always want such package to start with the prefix "lib". Therefore,
+ # we're stripping the prefix "lib" if it exists, and we're adding it back
+ # to the pkgname and soname at the end of the function.
+ if candidate.startswith("lib"):
+ candidate = candidate[3:]
+ m = SONAME_VERSION_RE.search(candidate)
+ common_substring_candidates.append(candidate)
+ lcs = CollectionLongestCommonSubstring(common_substring_candidates)
+ pkgname = "CSWlib%s.%s" % (SanitizeWithChar(lcs, "-"), common_version)
+ catalogname = "lib%s.%s" % (SanitizeWithChar(lcs, "_"), common_version)
+ return pkgname, catalogname
+
+
+def LongestCommonSubstring(S, T):
+ """Stolen from Wikibooks
+
+ http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_substring#Python"""
+ m = len(S); n = len(T)
+ L = [[0] * (n+1) for i in xrange(m+1)]
+ LCS = set()
+ longest = 0
+ for i in xrange(m):
+ for j in xrange(n):
+ if S[i] == T[j]:
+ v = L[i][j] + 1
+ L[i+1][j+1] = v
+ if v > longest:
+ longest = v
+ LCS = set()
+ if v == longest:
+ LCS.add(S[i-v+1:i+1])
+ return LCS
+
+
+def CollectionLongestCommonSubstring(collection):
+ current_substring = collection.pop()
+ while collection and current_substring:
+ substring_set = LongestCommonSubstring(current_substring,
+ collection.pop())
+ if substring_set:
+ current_substring = list(substring_set)[0]
+ return current_substring
Modified: csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py
===================================================================
--- csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/lib/python/sharedlib_utils_test.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -1,4 +1,4 @@
-#!/opt/csw/bin/python2.6
+#!/usr/bin/env python2.6
# $Id$
import re
@@ -60,6 +60,14 @@
)
self.assertEqual(expected, su.MakePackageNameBySoname(soname))
+ def testMakePackageNameBySonameMinorVersion(self):
+ soname = "libfoo.so.0.1"
+ expected = (
+ ["CSWlibfoo0-1", "CSWlibfoo-0-1"],
+ ["libfoo0_1", "libfoo_0_1"],
+ )
+ self.assertEqual(expected, su.MakePackageNameBySoname(soname))
+
def testMakePackageNameBySonameApr(self):
soname = "libapr-1.so.0"
expected = (
@@ -88,7 +96,7 @@
su.MakePackageNameBySoname(soname))
def testMakePackageNameBySonameComplexApr(self):
- soname = "libapr-1.so.10.0.0"
+ soname = "libapr-1.so.10"
expected = (
['CSWlibapr-110', 'CSWlibapr-1-10'],
['libapr_110', 'libapr_1_10']
@@ -127,5 +135,58 @@
self.assertEqual("foo_0", su.SanitizeWithChar("foo-0", "_"))
+class GetCommonVersionUnitTest(unittest.TestCase):
+
+ def testGetCommonVersionSimple(self):
+ sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+ self.assertEqual("0", su.GetCommonVersion(sonames))
+
+ def testGetCommonVersionMore(self):
+ sonames = ["libfoo.so.0.2.1", "libfoo_util.so.0.2.1"]
+ self.assertEqual("0.2.1", su.GetCommonVersion(sonames))
+
+ def testGetCommonVersionInvalid(self):
+ sonames = ["libfoo.so.0.2.1", "libfoo_util.so.0.2.3"]
+ self.assertEqual(None, su.GetCommonVersion(sonames))
+
+
+class MakePackageNameBySonameCollectionUnitTest(unittest.TestCase):
+
+ def testMakePackageNameBySonameCollectionTwo(self):
+ sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+ expected = (
+ ["CSWlibfoo0", "CSWlibfoo-0"],
+ ["libfoo0", "libfoo_0"],
+ )
+ self.assertEqual(expected, su.MakePackageNameBySonameCollection(sonames))
+
+ def testMakePackageNameBySonameCollectionBdb(self):
+ sonames = ["libfoo.so.0", "libfoo_util.so.0"]
+ expected = (
+ ["CSWlibfoo0", "CSWlibfoo-0"],
+ ["libfoo0", "libfoo_0"],
+ )
+ self.assertEqual(expected, su.MakePackageNameBySonameCollection(sonames))
+
+ def testMakePackageNameBySonameCollectionNoCommonVersion(self):
+ sonames = ["libfoo.so.0", "libfoo_util.so.1"]
+ self.assertEqual(None, su.MakePackageNameBySonameCollection(sonames))
+
+
+class CommomSubstringTest(unittest.TestCase):
+
+ def testLongestCommonSubstring_1(self):
+ self.assertEqual(set(["foo"]), su.LongestCommonSubstring("foo", "foo"))
+
+ def testLongestCommonSubstring_2(self):
+ self.assertEqual(set([]), su.LongestCommonSubstring("foo", "bar"))
+
+ def testLongestCommonSubstring_3(self):
+ self.assertEqual(set(["bar"]), su.LongestCommonSubstring("barfoobar", "bar"))
+
+ def testLongestCommonSubstring_4(self):
+ self.assertEqual(set(['bcd', 'hij']), su.LongestCommonSubstring("abcdefghijk", "bcdhij"))
+
+
if __name__ == '__main__':
unittest.main()
Modified: csw/mgar/gar/v2/tests/run_tests.py
===================================================================
--- csw/mgar/gar/v2/tests/run_tests.py 2010-10-10 20:35:47 UTC (rev 11219)
+++ csw/mgar/gar/v2/tests/run_tests.py 2010-10-10 20:36:33 UTC (rev 11220)
@@ -1,4 +1,4 @@
-#!/opt/csw/bin/python2.6
+#!/usr/bin/env python2.6
# $Id$
import unittest
@@ -16,6 +16,8 @@
from package_checks_test import *
from dependency_checks_test import *
from sharedlib_utils_test import *
+from catalog_test import *
+from package_test import *
# These are very slow GAR tests, which I'm disabling for now.
# from example_test import *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list