[csw-devel] SF.net SVN: gar:[8541] csw/mgar/gar/v2-checkpkg-stats
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Sun Feb 14 21:32:12 CET 2010
Revision: 8541
http://gar.svn.sourceforge.net/gar/?rev=8541&view=rev
Author: wahwah
Date: 2010-02-14 20:32:11 +0000 (Sun, 14 Feb 2010)
Log Message:
-----------
mGAR v2-checkpkg-stats: First commit, new feature not yet functional. Data collection program works, but is not integrated well into gar/bin/checkpkg.
Modified Paths:
--------------
csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py
csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py
Added Paths:
-----------
csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
csw/mgar/gar/v2-checkpkg-stats/lib/sh/
csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh
Modified: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg 2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg 2010-02-14 20:32:11 UTC (rev 8541)
@@ -21,10 +21,15 @@
# you know you are tracking the most current version.
#
-
PATH=$PATH:/usr/sbin
readonly NAME_MAX_LENGTH=${NAME_MAX_LENGTH:-20}
+command_basename=`basename $0`
+command_basedir="${0%/${command_basename}}"
+libshdir="${command_basedir}/../lib/sh"
+readonly command_basename command_basedir libshdir
+. "${libshdir}/libcheckpkg.sh"
+
LOCAL_ARCH=`uname -p`
if [[ -z "${CHECKPKG_TMPDIR}" ]]; then
readonly CHECKPKG_TMPDIR="/var/tmp"
@@ -44,9 +49,10 @@
BOLD=""
COLOR_RESET=""
fi
+readonly GREEN RED BOLD COLOR_RESET
readonly selfpath="$0"
-readonly selfargs="$*"
+readonly selfargs="$@"
# always print out a warning message. (to stderr)
# exit script, if quit_on_warn set
@@ -337,38 +343,9 @@
# exit 0
#fi
-
-# This function exists, because pkgtrans is BROKEN!!
-# It leaves a directory in /var/tmp/aaXXXXXXX, even after clean quit
-# SO, emulate pkgtrans behaviour, for "pkgtrans src destdir pkgname"
-# Except that we ignore pkgname arg, and just do first one we find.
-# and we are a bit hacky about how we do things.
-pkgtrans(){
- if [[ ! -d $2 ]] ; then
- print ERROR: $2 is not a directory >/dev/fd/2
- return 1
- fi
- hdrblks=`(dd if=$1 skip=1 2>/dev/null| cpio -i -t >/dev/null) 2>&1 |
- nawk '{print $1; exit;}'`
-
- ## print initial hdrblks=$hdrblks
-
- hdrblks=$(($hdrblks + 1))
- mkdir $2/$3 || return 1
-
- dd if=$1 skip=$hdrblks 2>/dev/null | (cd $2/$3 ; cpio -ivdm)
- # on fail, SOMETIMES cpio returns 1, but sometimes it returns 0!!
- if [[ ! -d $2/$3/install ]] ; then
- print retrying extract with different archive offset...
- # no, I cant tell in advance why/when the prev fails
- hdrblks=$(($hdrblks + 1))
- dd if=$1 skip=$hdrblks 2>/dev/null| (cd $2/$3 ; cpio -ivdm)
- fi
-}
-
print ""
print Extracing pkg for examination of files...
-pkgtrans $f $EXTRACTDIR $pkgname
+custom_pkgtrans $f $EXTRACTDIR $pkgname
#############################################################
# We now have the package expanded, in "directory" form, in
@@ -566,9 +543,7 @@
set_variables_for_individual_package_check "$f"
test_suite_ok=1
-checkpkg_scriptname=`basename $0`
-checkpkg_basedir=${0%/${checkpkg_scriptname}}
-plugindir=${checkpkg_basedir}/checkpkg.d
+plugindir=${command_basedir}/checkpkg.d
checkpkg_module_tag="checkpkg-"
# Cleaning up old *.pyc files which can cause grief. This is because of the
@@ -582,7 +557,9 @@
done
# /var/sadm/install/contents cache update
-${checkpkg_basedir}/update_contents_cache.py
+${command_basedir}/update_contents_cache.py
+# Collects package stats to be later analyzed
+${command_basedir}/checkpkg_collect_stats.py "$@"
if [[ "${DEBUG}" != "" ]]; then
extra_options="--debug"
@@ -651,7 +628,7 @@
done
# Collecting errors and applying the overrides.
-${checkpkg_basedir}/analyze_module_results.py \
+${command_basedir}/analyze_module_results.py \
-e "${EXTRACTDIR}" \
${pkgnames}
if [[ "$?" -ne 0 ]]; then
Added: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py 2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,138 @@
+#!/opt/csw/bin/python2.6
+#
+# $Id$
+#
+# Collects statistics about a package and saves to a directory, for later use
+# by checkpkg modules.
+
+import errno
+import os
+import os.path
+import sys
+import logging
+import optparse
+import yaml
+
+# The following bit of code sets the correct path to Python libraries
+# distributed with GAR.
+path_list = [os.path.dirname(__file__),
+ "..", "lib", "python"]
+sys.path.append(os.path.join(*path_list))
+import checkpkg
+import opencsw
+
+
+STATS_VERSION = 1L
+
+
+class PackageStats(object):
+ """Collects stats about a package and saves it.
+
+ base-stats.yml
+ binaries.yml
+ """
+
+ def __init__(self, srv4_pkg):
+ self.srv4_pkg = srv4_pkg
+ self.md5sum = None
+ self.dir_format_pkg = None
+ self.stats_path = None
+
+ def GetMd5sum(self):
+ if not self.md5sum:
+ self.md5sum = self.srv4_pkg.GetMd5sum()
+ return self.md5sum
+
+ def GetStatsPath(self, home=None):
+ if not self.stats_path:
+ if not home:
+ home = os.environ["HOME"]
+ md5sum = self.GetMd5sum()
+ two_chars = md5sum[0:2]
+ parts = [home, ".checkpkg", "stats", two_chars, md5sum]
+ self.stats_path = os.path.join(*parts)
+ return self.stats_path
+
+ def StatsExist(self):
+ """Checks if statistics of a package exist.
+
+ Returns:
+ bool
+ """
+ if not self.StatsDirExists():
+ return False
+ # More checks can be added in the future.
+ return True
+
+ def StatsDirExists(self):
+ return os.path.isdir(self.GetStatsPath())
+
+ def GetDirFormatPkg(self):
+ if not self.dir_format_pkg:
+ self.dir_format_pkg = self.srv4_pkg.GetDirFormatPkg()
+ return self.dir_format_pkg
+
+ def MakeStatsDir(self):
+ """mkdir -p equivalent.
+
+ http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
+ """
+ stats_path = self.GetStatsPath()
+ try:
+ os.makedirs(stats_path)
+ except OSError, e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ def CollectStats(self):
+ stats_path = self.GetStatsPath()
+ self.MakeStatsDir()
+ dir_pkg = self.GetDirFormatPkg()
+ self.DumpObject(dir_pkg.GetParsedPkginfo(), "pkginfo")
+ self.DumpObject(dir_pkg.GetPkgmap().entries, "pkgmap")
+ self.DumpObject(dir_pkg.ListBinaries(), "binaries")
+ self.DumpObject(dir_pkg.GetDependencies(), "depends")
+ self.DumpObject(dir_pkg.GetAllFilenames(), "all_filenames")
+ overrides = dir_pkg.GetOverrides()
+ def OverrideToDict(override):
+ d = {}
+ d["pkgname"] = override.pkgname
+ d["tag_name"] = override.tag_name
+ d["tag_info"] = override.tag_info
+ return d
+ overrides_simple = [OverrideToDict(x) for x in overrides]
+ self.DumpObject(overrides_simple, "overrides")
+ basic_stats = {}
+ basic_stats["stats_version"] = STATS_VERSION
+ basic_stats["pkg_path"] = self.srv4_pkg.pkg_path
+ basic_stats["pkg_basename"] = os.path.basename(self.srv4_pkg.pkg_path)
+ basic_stats["parsed_basename"] = opencsw.ParsePackageFileName(basic_stats["pkg_basename"])
+ basic_stats["pkgname"] = dir_pkg.pkgname
+ basic_stats["catalogname"] = dir_pkg.GetCatalogname()
+ self.DumpObject(basic_stats, "basic_stats")
+
+ def DumpObject(self, obj, name):
+ stats_path = self.GetStatsPath()
+ out_file_name = os.path.join(self.GetStatsPath(), "%s.yml" % name)
+ logging.debug("DumpObject(): writing %s", repr(out_file_name))
+ f = open(out_file_name, "w")
+ f.write(yaml.safe_dump(obj))
+ f.close()
+
+
+def main():
+ logging.basicConfig(level=logging.DEBUG)
+ parser = optparse.OptionParser()
+ options, args = parser.parse_args()
+ logging.basicConfig(level=logging.INFO)
+ logging.info("Collecting statistics about given package files.")
+ logging.debug("args: %s", args)
+ packages = [opencsw.CswSrv4File(x) for x in args]
+ stats_list = [PackageStats(pkg) for pkg in packages]
+ for pkg_stats in stats_list:
+ pkg_stats.CollectStats()
+
+if __name__ == '__main__':
+ main()
Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats.py
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:keywords
+ Id
Added: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py 2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,34 @@
+#!/opt/csw/bin/python2.6
+
+import os
+import sys
+import unittest
+import mox
+import checkpkg_collect_stats as ccs
+
+# The following bit of code sets the correct path to Python libraries
+# distributed with GAR.
+path_list = [os.path.dirname(__file__),
+ "..", "lib", "python"]
+sys.path.append(os.path.join(*path_list))
+import checkpkg
+import opencsw
+
+
+class PackageStatsUnitTest(unittest.TestCase):
+
+ def setUp(self):
+ self.mocker = mox.Mox()
+
+ def testGetStatsPath(self):
+ mock_pkg = self.mocker.CreateMock(opencsw.CswSrv4File)
+ mock_pkg.GetMd5sum().AndReturn("abcdef")
+ self.mocker.ReplayAll()
+ sc = ccs.PackageStats(mock_pkg)
+ expected = "/home/joe/.checkpkg/stats/ab/abcdef"
+ self.assertEqual(expected, sc.GetStatsPath("/home/joe"))
+ self.mocker.VerifyAll()
+
+
+if __name__ == '__main__':
+ unittest.main()
Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/checkpkg_collect_stats_test.py
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:keywords
+ Id
Added: csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans 2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,18 @@
+#!/bin/ksh -p
+#
+# $Id$
+#
+# This file exists in order to avoid implementing pipelines in Python. It
+# could be integrated into the package stats collection program.
+
+command_basename=`basename $0`
+command_basedir="${0%/${command_basename}}"
+libshdir="${command_basedir}/../lib/sh"
+readonly command_basename command_basedir libshdir
+. "${libshdir}/libcheckpkg.sh"
+
+if [[ -z "$1" || -z "$2" || -z "$3" ]]; then
+ print "usage: $0 <file.pkg> <targetdir> <pkgname>"
+ exit 1
+fi
+custom_pkgtrans "$1" "$2" "$3"
Property changes on: csw/mgar/gar/v2-checkpkg-stats/bin/custom-pkgtrans
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:keywords
+ Id
Modified: csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py 2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/bin/update_contents_cache.py 2010-02-14 20:32:11 UTC (rev 8541)
@@ -12,8 +12,7 @@
# The following bit of code sets the correct path to Python libraries
# distributed with GAR.
-path_list = [os.getcwd(),
- os.path.split(sys.argv[0])[0],
+path_list = [os.path.dirname(__file__),
"..", "lib", "python"]
sys.path.append(os.path.join(*path_list))
import checkpkg
Modified: csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py 2010-02-14 19:59:12 UTC (rev 8540)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/python/opencsw.py 2010-02-14 20:32:11 UTC (rev 8541)
@@ -15,6 +15,7 @@
import copy
import datetime
import difflib
+import hashlib
import logging
import os
import os.path
@@ -366,6 +367,9 @@
self.transformed = False
self.dir_format_pkg = None
+ def __repr__(self):
+ return u"CswSrv4File(%s)" % repr(self.pkg_path)
+
def GetWorkDir(self):
if not self.workdir:
self.workdir = tempfile.mkdtemp(prefix="pkg_")
@@ -395,7 +399,27 @@
"%s or %s." % (gzip_suffix, pkg_suffix))
return self.gunzipped_path
+ def Pkgtrans(self, src_file, destdir, pkgname):
+ """A proxy for the pkgtrans command.
+
+ This requires custom-pkgtrans to be available.
+ """
+ if not os.path.isdir(destdir):
+ raise PackageError("%s doesn't exist or is not a directory" % destdir)
+ args = [os.path.join(os.path.dirname(__file__), "custom-pkgtrans"),
+ src_file, destdir, pkgname ]
+ pkgtrans_proc = subprocess.Popen(args)
+ pkgtrans_proc.communicate()
+ ret = pkgtrans_proc.wait()
+ if ret:
+ logging.error("% has failed" % args)
+
def TransformToDir(self):
+ """Transforms the file to the directory format.
+
+ This could use the Pkgtrans command at the top, because pkgtrans supposedly
+ leaves temporary files behind.
+ """
if not self.transformed:
args = ["pkgtrans", "-a", self.GetAdminFilePath(),
self.GetGunzippedPath(), self.GetWorkDir(), "all"]
@@ -424,6 +448,13 @@
dir_format_pkg = self.GetDirFormatPkg()
return dir_format_pkg.GetPkgmap(analyze_permissions, strip)
+ def GetMd5sum(self):
+ fp = open(self.pkg_path)
+ hash = hashlib.md5()
+ hash.update(fp.read())
+ fp.close()
+ return hash.hexdigest()
+
def __del__(self):
if self.workdir:
logging.debug("Removing %s", repr(self.workdir))
@@ -789,7 +820,7 @@
if line_to_add:
self.paths.add(line_to_add)
entry = {
- "line": line,
+ "line": line.strip(),
"type": line_type,
}
entry["path"] = installed_path
@@ -879,6 +910,7 @@
else:
logging.warn("%s is not a directory.", pkg_path)
+
def Srv4Exists(self, pkg_dir):
pkg = DirectoryFormatPackage(pkg_dir)
srv4_name = pkg.GetSrv4FileName()
Added: csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh
===================================================================
--- csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh (rev 0)
+++ csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh 2010-02-14 20:32:11 UTC (rev 8541)
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# $Id$
+
+# pkgtrans leaves a directory in /var/tmp/aaXXXXXXX even after clean quit.
+# Emulating pkgtrans behaviour, for "pkgtrans src destdir pkgname". Except
+# that the pkgname arg is ignored, and only the first pkg is processed.
+
+custom_pkgtrans(){
+ if [[ ! -d $2 ]] ; then
+ print ERROR: $2 is not a directory >/dev/fd/2
+ return 1
+ fi
+ hdrblks=`(dd if=$1 skip=1 2>/dev/null| cpio -i -t >/dev/null) 2>&1 |
+ nawk '{print $1; exit;}'`
+
+ ## print initial hdrblks=$hdrblks
+
+ hdrblks=$(($hdrblks + 1))
+ mkdir $2/$3 || return 1
+
+ dd if=$1 skip=$hdrblks 2>/dev/null | (cd $2/$3 ; cpio -ivdm)
+ # on fail, SOMETIMES cpio returns 1, but sometimes it returns 0!!
+ if [[ ! -d $2/$3/install ]] ; then
+ print retrying extract with different archive offset...
+ # no, I can't tell in advance why/when the prev fails
+ hdrblks=$(($hdrblks + 1))
+ dd if=$1 skip=$hdrblks 2>/dev/null| (cd $2/$3 ; cpio -ivdm)
+ fi
+}
Property changes on: csw/mgar/gar/v2-checkpkg-stats/lib/sh/libcheckpkg.sh
___________________________________________________________________
Added: svn:keywords
+ Id
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list