[csw-devel] SF.net SVN: gar:[8541] csw/mgar/gar/v2-checkpkg-stats

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sun Feb 14 21:32:12 CET 2010


Revision: 8541
          http://gar.svn.sourceforge.net/gar/?rev=8541&view=rev
Author:   wahwah
Date:     2010-02-14 20:32:11 +0000 (Sun, 14 Feb 2010)

Log Message:
-----------
mGAR v2-checkpkg-stats: First commit, new feature not yet functional.  Data collection program works, but is not integrated well into gar/bin/checkpkg.

Modified Paths:
--------------
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
    csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py
    csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py

Added Paths:
-----------
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
    csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
    csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
    csw/mgar/gar/v2-checkpkg-stats/lib/sh/
    csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg	2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg	2010-02-14 20:32:11 UTC (rev 8541)
@@ -21,10 +21,15 @@
 # you know you are tracking the most current version.
 # 
 
-
 PATH=$PATH:/usr/sbin
 readonly NAME_MAX_LENGTH=${NAME_MAX_LENGTH:-20}
 
+command_basename=`basename $0`
+command_basedir="${0%/${command_basename}}"
+libshdir="${command_basedir}/../lib/sh"
+readonly command_basename command_basedir libshdir
+. "${libshdir}/libcheckpkg.sh"
+
 LOCAL_ARCH=`uname -p`
 if [[ -z "${CHECKPKG_TMPDIR}" ]]; then
   readonly CHECKPKG_TMPDIR="/var/tmp"
@@ -44,9 +49,10 @@
 	BOLD=""
 	COLOR_RESET=""
 fi
+readonly GREEN RED BOLD COLOR_RESET
 
 readonly selfpath="$0"
-readonly selfargs="$*"
+readonly selfargs="$@"
 
 # always print out a warning message. (to stderr)
 # exit script, if quit_on_warn set
@@ -337,38 +343,9 @@
 #	exit 0
 #fi
 
-
-# This function exists, because pkgtrans is BROKEN!!
-# It leaves a directory in /var/tmp/aaXXXXXXX, even after clean quit
-# SO, emulate pkgtrans behaviour, for "pkgtrans src destdir pkgname"
-#   Except that we ignore pkgname arg, and just do first one we find.
-#  and we are a bit hacky about how we do things.
-pkgtrans(){
-	if [[ ! -d $2 ]] ; then
-		print ERROR: $2 is not a directory >/dev/fd/2
-		return 1
-	fi
-	hdrblks=`(dd if=$1 skip=1 2>/dev/null| cpio -i -t  >/dev/null) 2>&1 |
-		nawk '{print $1; exit;}'`
-
-	## print initial hdrblks=$hdrblks
-
-	hdrblks=$(($hdrblks + 1))
-	mkdir $2/$3 || return 1
-
-	dd if=$1 skip=$hdrblks 2>/dev/null | (cd $2/$3 ; cpio -ivdm)
-	# on fail, SOMETIMES cpio returns 1, but sometimes it returns 0!!
-	if [[ ! -d $2/$3/install ]] ; then
-		print retrying extract with different archive offset...
-		# no, I cant tell in advance why/when the prev fails
-		hdrblks=$(($hdrblks + 1))
-		dd if=$1 skip=$hdrblks 2>/dev/null| (cd $2/$3 ; cpio -ivdm)
-	fi
-}
-
 print ""
 print Extracing pkg for examination of files...
-pkgtrans $f $EXTRACTDIR $pkgname
+custom_pkgtrans $f $EXTRACTDIR $pkgname
 
 #############################################################
 # We now have the package expanded, in "directory" form, in
@@ -566,9 +543,7 @@
 set_variables_for_individual_package_check "$f"
 
 test_suite_ok=1
-checkpkg_scriptname=`basename $0`
-checkpkg_basedir=${0%/${checkpkg_scriptname}}
-plugindir=${checkpkg_basedir}/checkpkg.d
+plugindir=${command_basedir}/checkpkg.d
 checkpkg_module_tag="checkpkg-"
 
 # Cleaning up old *.pyc files which can cause grief.  This is because of the
@@ -582,7 +557,9 @@
 done
 
 # /var/sadm/install/contents cache update
-${checkpkg_basedir}/update_contents_cache.py
+${command_basedir}/update_contents_cache.py
+# Collects package stats to be later analyzed
+${command_basedir}/checkpkg_collect_stats.py "$@"
 
 if [[ "${DEBUG}" != "" ]]; then
 	extra_options="--debug"
@@ -651,7 +628,7 @@
 done
 
 # Collecting errors and applying the overrides.
-${checkpkg_basedir}/analyze_module_results.py \
+${command_basedir}/analyze_module_results.py \
 	-e "${EXTRACTDIR}" \
 	${pkgnames}
 if [[ "$?" -ne 0 ]]; then

Added: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py	                        (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py	2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,138 @@
+#!/opt/csw/bin/python2.6
+#
+# $Id$
+#
+# Collects statistics about a package and saves to a directory, for later use
+# by checkpkg modules.
+
+import errno
+import os
+import os.path
+import sys
+import logging
+import optparse
+import yaml
+
+# The following bit of code sets the correct path to Python libraries
+# distributed with GAR.
+path_list = [os.path.dirname(__file__),
+             "..", "lib", "python"]
+sys.path.append(os.path.join(*path_list))
+import checkpkg
+import opencsw
+
+
+STATS_VERSION = 1L
+
+
+class PackageStats(object):
+  """Collects stats about a package and saves it.
+
+  base-stats.yml
+  binaries.yml
+  """
+
+  def __init__(self, srv4_pkg):
+    self.srv4_pkg = srv4_pkg
+    self.md5sum = None
+    self.dir_format_pkg = None
+    self.stats_path = None
+
+  def GetMd5sum(self):
+    if not self.md5sum:
+    	self.md5sum = self.srv4_pkg.GetMd5sum()
+    return self.md5sum
+
+  def GetStatsPath(self, home=None):
+    if not self.stats_path:
+      if not home:
+        home = os.environ["HOME"]
+      md5sum = self.GetMd5sum()
+      two_chars = md5sum[0:2]
+      parts = [home, ".checkpkg", "stats", two_chars, md5sum]
+      self.stats_path = os.path.join(*parts)
+    return self.stats_path
+
+  def StatsExist(self):
+    """Checks if statistics of a package exist.
+
+    Returns:
+      bool
+    """
+    if not self.StatsDirExists():
+    	return False
+    # More checks can be added in the future.
+    return True
+
+  def StatsDirExists(self):
+    return os.path.isdir(self.GetStatsPath())
+
+  def GetDirFormatPkg(self):
+    if not self.dir_format_pkg:
+    	self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
+    return self.dir_format_pkg
+
+  def MakeStatsDir(self):
+    """mkdir -p equivalent.
+
+    http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+    """
+    stats_path = self.GetStatsPath()
+    try:
+      os.makedirs(stats_path)
+    except OSError, e:
+      if e.errno == errno.EEXIST:
+      	pass
+      else:
+      	raise
+
+  def CollectStats(self):
+    stats_path = self.GetStatsPath()
+    self.MakeStatsDir()
+    dir_pkg = self.GetDirFormatPkg()
+    self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
+    self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
+    self.DumpObject(dir_pkg.ListBinaries(), "binaries")
+    self.DumpObject(dir_pkg.GetDependencies(), "depends")
+    self.DumpObject(dir_pkg.GetAllFilenames(), "all_filenames")
+    overrides = dir_pkg.GetOverrides()
+    def OverrideToDict(override):
+      d = {}
+      d["pkgname"] = override.pkgname
+      d["tag_name"] = override.tag_name
+      d["tag_info"] = override.tag_info
+      return d
+    overrides_simple = [OverrideToDict(x) for x in overrides]
+    self.DumpObject(overrides_simple, "overrides")
+    basic_stats = {}
+    basic_stats["stats_version"] = STATS_VERSION
+    basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
+    basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
+    basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(basic_stats["pkg_basename"])
+    basic_stats["pkgname"] = dir_pkg.pkgname
+    basic_stats["catalogname"] = dir_pkg.GetCatalogname()
+    self.DumpObject(basic_stats, "basic_stats")
+
+  def DumpObject(self, obj, name):
+    stats_path = self.GetStatsPath()
+    out_file_name = os.path.join(self.GetStatsPath(), "%s.yml" % name)
+    logging.debug("DumpObject(): writing %s", repr(out_file_name))
+    f = open(out_file_name, "w")
+    f.write(yaml.safe_dump(obj))
+    f.close()
+
+
+def main():
+  logging.basicConfig(level=logging.DEBUG)
+  parser = optparse.OptionParser()
+  options, args = parser.parse_args()
+  logging.basicConfig(level=logging.INFO)
+  logging.info("Collecting statistics about given package files.")
+  logging.debug("args: %s", args)
+  packages = [opencsw.CswSrv4File(x) for x in args]
+  stats_list = [PackageStats(pkg) for pkg in packages]
+  for pkg_stats in stats_list:
+  	pkg_stats.CollectStats()
+
+if __name__ == '__main__':
+	main()


Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:keywords
   + Id

Added: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py	                        (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py	2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,34 @@
+#!/opt/csw/bin/python2.6
+
+import os
+import sys
+import unittest
+import mox
+import checkpkg_collect_stats as ccs
+
+# The following bit of code sets the correct path to Python libraries
+# distributed with GAR.
+path_list = [os.path.dirname(__file__),
+             "..", "lib", "python"]
+sys.path.append(os.path.join(*path_list))
+import checkpkg
+import opencsw
+
+
+class PackageStatsUnitTest(unittest.TestCase):
+
+  def setUp(self):
+    self.mocker = mox.Mox()
+
+  def testGetStatsPath(self):
+    mock_pkg = self.mocker.CreateMock(opencsw.CswSrv4File)
+    mock_pkg.GetMd5sum().AndReturn("abcdef")
+    self.mocker.ReplayAll()
+    sc = ccs.PackageStats(mock_pkg)
+    expected = "/home/joe/.checkpkg/stats/ab/abcdef"
+    self.assertEqual(expected, sc.GetStatsPath("/home/joe"))
+    self.mocker.VerifyAll()
+
+
+if __name__ == '__main__':
+	unittest.main()


Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:keywords
   + Id

Added: csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans	                        (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans	2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,18 @@
+#!/bin/ksh -p
+# 
+# $Id$
+#
+# This file exists in order to avoid implementing pipelines in Python.  It
+# could be integrated into the package stats collection program.
+
+command_basename=`basename $0`
+command_basedir="${0%/${command_basename}}"
+libshdir="${command_basedir}/../lib/sh"
+readonly command_basename command_basedir libshdir
+. "${libshdir}/libcheckpkg.sh"
+
+if [[ -z "$1" || -z "$2" || -z "$3" ]]; then
+	print "usage: $0 <file.pkg> <targetdir> <pkgname>"
+	exit 1
+fi
+custom_pkgtrans "$1" "$2" "$3"


Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:keywords
   + Id

Modified: csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py	2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py	2010-02-14 20:32:11 UTC (rev 8541)
@@ -12,8 +12,7 @@
 
 # The following bit of code sets the correct path to Python libraries
 # distributed with GAR.
-path_list = [os.getcwd(),
-             os.path.split(sys.argv[0])[0],
+path_list = [os.path.dirname(__file__),
              "..", "lib", "python"]
 sys.path.append(os.path.join(*path_list))
 import checkpkg

Modified: csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py	2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py	2010-02-14 20:32:11 UTC (rev 8541)
@@ -15,6 +15,7 @@
 import copy
 import datetime
 import difflib
+import hashlib
 import logging
 import os
 import os.path
@@ -366,6 +367,9 @@
     self.transformed = False
     self.dir_format_pkg = None
 
+  def __repr__(self):
+    return u"CswSrv4File(%s)" % repr(self.pkg_path)
+
   def GetWorkDir(self):
     if not self.workdir:
       self.workdir = tempfile.mkdtemp(prefix="pkg_")
@@ -395,7 +399,27 @@
                     "%s or %s." % (gzip_suffix, pkg_suffix))
     return self.gunzipped_path
 
+  def Pkgtrans(self, src_file, destdir, pkgname):
+    """A proxy for the pkgtrans command.
+
+    This requires custom-pkgtrans to be available.
+    """
+    if not os.path.isdir(destdir):
+    	raise PackageError("%s doesn't exist or is not a directory" % destdir)
+    args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
+           src_file, destdir, pkgname ]
+    pkgtrans_proc = subprocess.Popen(args)
+    pkgtrans_proc.communicate()
+    ret = pkgtrans_proc.wait()
+    if ret:
+    	logging.error("% has failed" % args)
+
   def TransformToDir(self):
+    """Transforms the file to the directory format.
+
+    This could use the Pkgtrans command at the top, because pkgtrans supposedly
+    leaves temporary files behind.
+    """
     if not self.transformed:
       args = ["pkgtrans", "-a", self.GetAdminFilePath(),
               self.GetGunzippedPath(), self.GetWorkDir(), "all"]
@@ -424,6 +448,13 @@
     dir_format_pkg = self.GetDirFormatPkg()
     return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
 
+  def GetMd5sum(self):
+    fp = open(self.pkg_path)
+    hash = hashlib.md5()
+    hash.update(fp.read())
+    fp.close()
+    return hash.hexdigest()
+
   def __del__(self):
     if self.workdir:
       logging.debug("Removing %s", repr(self.workdir))
@@ -789,7 +820,7 @@
       if line_to_add:
         self.paths.add(line_to_add)
       entry = {
-          "line": line,
+          "line": line.strip(),
           "type": line_type,
       }
       entry["path"] = installed_path
@@ -879,6 +910,7 @@
       else:
         logging.warn("%s is not a directory.", pkg_path)
 
+
   def Srv4Exists(self, pkg_dir):
     pkg = DirectoryFormatPackage(pkg_dir)
     srv4_name = pkg.GetSrv4FileName()

Added: csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh	                        (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh	2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+# 
+# $Id$
+
+# pkgtrans leaves a directory in /var/tmp/aaXXXXXXX even after clean quit.
+# Emulating pkgtrans behaviour, for "pkgtrans src destdir pkgname".  Except
+# that the pkgname arg is ignored, and only the first pkg is processed.
+
+custom_pkgtrans(){
+	if [[ ! -d $2 ]] ; then
+		print ERROR: $2 is not a directory >/dev/fd/2
+		return 1
+	fi
+	hdrblks=`(dd if=$1 skip=1 2>/dev/null| cpio -i -t  >/dev/null) 2>&1 |
+		nawk '{print $1; exit;}'`
+
+	## print initial hdrblks=$hdrblks
+
+	hdrblks=$(($hdrblks + 1))
+	mkdir $2/$3 || return 1
+
+	dd if=$1 skip=$hdrblks 2>/dev/null | (cd $2/$3 ; cpio -ivdm)
+	# on fail, SOMETIMES cpio returns 1, but sometimes it returns 0!!
+	if [[ ! -d $2/$3/install ]] ; then
+		print retrying extract with different archive offset...
+		# no, I can't tell in advance why/when the prev fails
+		hdrblks=$(($hdrblks + 1))
+		dd if=$1 skip=$hdrblks 2>/dev/null| (cd $2/$3 ; cpio -ivdm)
+	fi
+}


Property changes on: csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh
___________________________________________________________________
Added: svn:keywords
   + Id


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list