[csw-devel] SF.net SVN: gar:[19975] csw/mgar/gar/v2/lib/python/catalog_gc.py

wahwah at users.sourceforge.net wahwah at users.sourceforge.net
Sat Dec 29 11:44:25 CET 2012


Revision: 19975
          http://gar.svn.sourceforge.net/gar/?rev=19975&view=rev
Author:   wahwah
Date:     2012-12-29 10:44:25 +0000 (Sat, 29 Dec 2012)
Log Message:
-----------
catalog-gc: Multiple fixes

- More logging
- Getting the catalog list from via REST
- Moving files to another directory instead of unlinking

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/python/catalog_gc.py

Modified: csw/mgar/gar/v2/lib/python/catalog_gc.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog_gc.py	2012-12-29 10:44:09 UTC (rev 19974)
+++ csw/mgar/gar/v2/lib/python/catalog_gc.py	2012-12-29 10:44:25 UTC (rev 19975)
@@ -7,17 +7,21 @@
 
 import logging
 import optparse
-import os.path
+import os
 import pipes
 import re
 import common_constants
+import rest
 
-USAGE = """%prog --catalog-tree /home/mirror/opencsw-official > gc_01.sh
+USAGE = """%prog --catalog-tree /home/mirror/opencsw-official --dest_dir /home/mirror/gc > gc_01.sh
 less gc_01.sh
 
 # Looks good?
 
 bash gc_01.sh
+
+If everything is fine (catalog still generates, no files are missing that are
+necessary), you can remove files from /home/mirror/gc.
 """
 
 class Error(Exception):
@@ -31,9 +35,10 @@
 
   ADDITIONAL_CATALOGS = ("legacy",)
 
-  def __init__(self, d):
+  def __init__(self, d, dest_dir):
     logging.debug("CatalogGarbageCollector(%s)", repr(d))
     self.catalog_dir = d
+    self.dest_dir = dest_dir
 
   def GarbageCollect(self):
     allpkgs_path = os.path.join(self.catalog_dir, "allpkgs")
@@ -42,9 +47,15 @@
     catalogs_by_files = {}
     for p in os.listdir(allpkgs_path):
       allpkgs.add(p)
-    catalogs_to_check = (
-        tuple(common_constants.DEFAULT_CATALOG_RELEASES)
-        + self.ADDITIONAL_CATALOGS)
+    catalogs_to_check = tuple(common_constants.DEFAULT_CATALOG_RELEASES)
+    catalogs_to_check += self.ADDITIONAL_CATALOGS
+    rest_client = rest.RestClient()
+    catalog_triplet_list = rest_client.GetCatalogList()
+    catalogs_to_check += tuple(set([x[2] for x in catalog_triplet_list]))
+    catalogs_to_check = tuple(set(catalogs_to_check))
+    logging.info("Collecting packages from catalogs: %s",
+                 catalogs_to_check)
+    file_sizes = {}
     for catrel in catalogs_to_check:
       for arch in common_constants.PHYSICAL_ARCHITECTURES:
         for osrel_long in common_constants.OS_RELS:
@@ -58,11 +69,25 @@
           for p in os.listdir(catalog_path):
             if pkg_re.search(p):
               # It's a package
+              full_path = os.path.join(catalog_path, p)
               files_in_catalogs.add(p)
               l = catalogs_by_files.setdefault(p, [])
               l.append((catrel, arch, osrel_short))
-    for p in allpkgs.difference(files_in_catalogs):
-      print "rm", pipes.quote(os.path.join(allpkgs_path, p))
+              if full_path not in file_sizes:
+                s = os.stat(full_path)
+                file_sizes[full_path] = s.st_size
+      logging.info(
+          "Collected from %r, found references to %d files (out of %d in allpkgs)",
+          catrel, len(files_in_catalogs), len(allpkgs))
+    to_remove = allpkgs.difference(files_in_catalogs)
+    logging.debug("Collecting file sizes.")
+    total_size = sum(os.stat(os.path.join(allpkgs_path, x)).st_size
+                     for x in to_remove)
+    logging.info("Found %d packages to remove, total size: %.1fMB.",
+                 len(to_remove), float(total_size) / 1024 ** 2)
+    for p in to_remove:
+      full_path = os.path.join(allpkgs_path, p)
+      print "mv", pipes.quote(full_path), pipes.quote(self.dest_dir)
 
 
 def main():
@@ -71,12 +96,15 @@
       dest="catalog_tree",
       help=("Path to the catalog tree, that is the directory "
             "containing subdirectories unstable, testing, etc."))
+  parser.add_option("--dest-dir",
+      dest="dest_dir",
+      help=("Move files out to this catalog."))
   options, args = parser.parse_args()
   logging.basicConfig(level=logging.DEBUG)
-  if not options.catalog_tree:
+  if not options.catalog_tree or not options.dest_dir:
     parser.print_usage()
     raise UsageError("Missing the catalog tree option, see --help.")
-  gcg = CatalogGarbageCollector(options.catalog_tree)
+  gcg = CatalogGarbageCollector(options.catalog_tree, options.dest_dir)
   gcg.GarbageCollect()
 
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the devel mailing list