[csw-devel] SF.net SVN: gar:[10173] csw/mgar/gar/v2

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Thu Jun 10 15:28:43 CEST 2010


Revision: 10173
          http://gar.svn.sourceforge.net/gar/?rev=10173&view=rev
Author:   wahwah
Date:     2010-06-10 13:28:43 +0000 (Thu, 10 Jun 2010)

Log Message:
-----------
mGAR v2: checkpkg, adding libmagic support, no more forking / execing to get the file type\!

Modified Paths:
--------------
    csw/mgar/gar/v2/gar.conf.mk
    csw/mgar/gar/v2/lib/python/opencsw.py

Modified: csw/mgar/gar/v2/gar.conf.mk
===================================================================
--- csw/mgar/gar/v2/gar.conf.mk	2010-06-10 13:27:21 UTC (rev 10172)
+++ csw/mgar/gar/v2/gar.conf.mk	2010-06-10 13:28:43 UTC (rev 10173)
@@ -184,6 +184,9 @@
 DEF_BASE_PKGS += CSWgsed
 DEF_BASE_PKGS += CSWgtar
 DEF_BASE_PKGS += CSWpy-cheetah
+DEF_BASE_PKGS += CSWpy-hachoir-core
+DEF_BASE_PKGS += CSWpy-hachoir-parser
+DEF_BASE_PKGS += CSWpy-libmagic
 DEF_BASE_PKGS += CSWpy-progressbar
 DEF_BASE_PKGS += CSWpy-yaml
 DEF_BASE_PKGS += CSWpython

Modified: csw/mgar/gar/v2/lib/python/opencsw.py
===================================================================
--- csw/mgar/gar/v2/lib/python/opencsw.py	2010-06-10 13:27:21 UTC (rev 10172)
+++ csw/mgar/gar/v2/lib/python/opencsw.py	2010-06-10 13:28:43 UTC (rev 10173)
@@ -12,10 +12,16 @@
 # the terms of the GNU General Public License version 2 as published by the
 # Free Software Foundation.
 
+ENABLE_HACHOIR = False
+
 import copy
 import datetime
 import difflib
+if ENABLE_HACHOIR:
+  import hachoir_parser as hp
+  import hachoir_core as hc
 import hashlib
+import magic
 import logging
 import os
 import os.path
@@ -654,6 +660,7 @@
     self.pkginfo_dict = None
     self.binaries = None
     self.file_paths = None
+    self.files_metadata = None
 
   def GetCatalogname(self):
     """Returns the catalog name of the package.
@@ -781,8 +788,59 @@
       raise PackageError("%s does not exist or is not a directory"
                          % self.directory)
 
+  def GetFilesMetadata(self):
+    """Returns a data structure with all the files plus their metadata.
+
+    [
+      {
+        "path": ...,
+        "mime_type": ...,
+      },
+    ]
+    """
+    if not self.files_metadata:
+      self.CheckPkgpathExists()
+      self.files_metadata = []
+      files_root = os.path.join(self.directory, "root")
+      if not os.path.exists(files_root):
+        return self.files_metadata
+      all_files = self.GetAllFilePaths()
+      def StripRe(x, strip_re):
+        return re.sub(strip_re, "", x)
+      root_re = re.compile(r"^root/")
+      magic_cookie = magic.open(0)
+      magic_cookie.load()
+      magic_cookie.setflags(magic.MAGIC_MIME)
+      for file_path in all_files:
+        file_info = {
+            "path": StripRe(file_path, root_re),
+            "mime_type": None,
+        }
+        full_path = unicode(self.MakeAbsolutePath(file_path))
+        if ENABLE_HACHOIR:
+          parser = hp.createParser(full_path)
+          if not parser:
+            print "Can't parse file %s" % (file_path)
+          else:
+            print "found file: %s, it's a %s" % (file_path, parser.mime_type)
+            file_info["mime_type"] = parser.mime_type
+            f =  parser["/header/machine"]
+            print "/header/machine: ", (f, f.display, f.value)
+            i = 0
+            while True:
+              try:
+                f = parser["/header"].getField(i)
+                print "Field", i, ": ", (f, f.display, f.value)
+              except hc.field.field.MissingField:
+                print "No field number", i
+                break
+              i += 1
+        file_info["mime_type"] = magic_cookie.file(full_path)
+        self.files_metadata.append(file_info)
+    return self.files_metadata
+
   def ListBinaries(self):
-    """Shells out to list all the binaries from a given package.
+    """Lists all the binaries from a given package.
 
     Original checkpkg code:
 
@@ -798,31 +856,28 @@
     }
 
     Returns a list of absolute paths.
+
+    Now that there are files_metadata, this function can safely go away, once
+    all its callers are modified to use files_metadata instead.
     """
-    if not self.binaries:
+    bin_mimetypes = (
+        'application/x-executable',
+        'application/x-sharedlib',
+    )
+    if self.binaries is None:
       self.CheckPkgpathExists()
-      files_root = os.path.join(self.directory, "root")
-      if not os.path.exists(files_root):
-        return []
-      # FIXME: It thinks that ELFunctionMapper.html is a binary
-      find_tmpl = "find '%s' -print | xargs file | grep ELF | nawk -F: '{print $1}'"
-      find_proc = subprocess.Popen(find_tmpl % ".",
-                                   shell=True,
-                                   stdout=subprocess.PIPE,
-                                   cwd=files_root)
-      stdout, stderr = find_proc.communicate()
-      ret = find_proc.wait()
-      if ret:
-        logging.error("The %s command returned an error.", repr(find_tmpl))
-      dotslash_re = re.compile(r"^./")
-      def StripRe(x, strip_re):
-        return re.sub(strip_re, "", x)
-      self.binaries = [StripRe(x, dotslash_re) for x in stdout.splitlines()]
-      self.binaries = sorted(self.binaries)
+      files_metadata = self.GetFilesMetadata()
+      self.binaries = []
+      # The nested for-loop looks inefficient.
+      for file_info in files_metadata:
+        for mimetype in bin_mimetypes:
+          if mimetype in file_info["mime_type"]:
+            self.binaries.append(file_info["path"])
+      self.binaries.sort()
     return self.binaries
 
   def GetAllFilePaths(self):
-    """Similar to GetAllFilenames, but returns full paths."""
+    """Returns a list of all paths from the package."""
     if not self.file_paths:
       self.CheckPkgpathExists()
       remove_prefix = "%s/" % self.pkgpath
@@ -879,7 +934,7 @@
     full_paths = self.GetAllFilePaths()
     files_by_pattern = {}
     for full_path in full_paths:
-      content = open(os.path.join(self.pkgpath, full_path), "rb").read()
+      content = open(self.MakeAbsolutePath(full_path), "rb").read()
       for regex in regex_list:
         if re.search(regex, content):
           if regex not in files_by_pattern:
@@ -887,7 +942,10 @@
           files_by_pattern[regex].append(full_path)
     return files_by_pattern
 
+  def MakeAbsolutePath(self, p):
+    return os.path.join(self.pkgpath, p)
 
+
 class Pkgmap(object):
   """Represents the pkgmap of the package.
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.


More information about the devel mailing list