[csw-devel] SF.net SVN: gar:[19975] csw/mgar/gar/v2/lib/python/catalog_gc.py
wahwah at users.sourceforge.net
wahwah at users.sourceforge.net
Sat Dec 29 11:44:25 CET 2012
Revision: 19975
http://gar.svn.sourceforge.net/gar/?rev=19975&view=rev
Author: wahwah
Date: 2012-12-29 10:44:25 +0000 (Sat, 29 Dec 2012)
Log Message:
-----------
catalog-gc: Multiple fixes
- More logging
- Getting the catalog list from via REST
- Moving files to another directory instead of unlinking
Modified Paths:
--------------
csw/mgar/gar/v2/lib/python/catalog_gc.py
Modified: csw/mgar/gar/v2/lib/python/catalog_gc.py
===================================================================
--- csw/mgar/gar/v2/lib/python/catalog_gc.py 2012-12-29 10:44:09 UTC (rev 19974)
+++ csw/mgar/gar/v2/lib/python/catalog_gc.py 2012-12-29 10:44:25 UTC (rev 19975)
@@ -7,17 +7,21 @@
import logging
import optparse
-import os.path
+import os
import pipes
import re
import common_constants
+import rest
-USAGE = """%prog --catalog-tree /home/mirror/opencsw-official > gc_01.sh
+USAGE = """%prog --catalog-tree /home/mirror/opencsw-official --dest_dir /home/mirror/gc > gc_01.sh
less gc_01.sh
# Looks good?
bash gc_01.sh
+
+If everything is fine (catalog still generates, no files are missing that are
+necessary), you can remove files from /home/mirror/gc.
"""
class Error(Exception):
@@ -31,9 +35,10 @@
ADDITIONAL_CATALOGS = ("legacy",)
- def __init__(self, d):
+ def __init__(self, d, dest_dir):
logging.debug("CatalogGarbageCollector(%s)", repr(d))
self.catalog_dir = d
+ self.dest_dir = dest_dir
def GarbageCollect(self):
allpkgs_path = os.path.join(self.catalog_dir, "allpkgs")
@@ -42,9 +47,15 @@
catalogs_by_files = {}
for p in os.listdir(allpkgs_path):
allpkgs.add(p)
- catalogs_to_check = (
- tuple(common_constants.DEFAULT_CATALOG_RELEASES)
- + self.ADDITIONAL_CATALOGS)
+ catalogs_to_check = tuple(common_constants.DEFAULT_CATALOG_RELEASES)
+ catalogs_to_check += self.ADDITIONAL_CATALOGS
+ rest_client = rest.RestClient()
+ catalog_triplet_list = rest_client.GetCatalogList()
+ catalogs_to_check += tuple(set([x[2] for x in catalog_triplet_list]))
+ catalogs_to_check = tuple(set(catalogs_to_check))
+ logging.info("Collecting packages from catalogs: %s",
+ catalogs_to_check)
+ file_sizes = {}
for catrel in catalogs_to_check:
for arch in common_constants.PHYSICAL_ARCHITECTURES:
for osrel_long in common_constants.OS_RELS:
@@ -58,11 +69,25 @@
for p in os.listdir(catalog_path):
if pkg_re.search(p):
# It's a package
+ full_path = os.path.join(catalog_path, p)
files_in_catalogs.add(p)
l = catalogs_by_files.setdefault(p, [])
l.append((catrel, arch, osrel_short))
- for p in allpkgs.difference(files_in_catalogs):
- print "rm", pipes.quote(os.path.join(allpkgs_path, p))
+ if full_path not in file_sizes:
+ s = os.stat(full_path)
+ file_sizes[full_path] = s.st_size
+ logging.info(
+ "Collected from %r, found references to %d files (out of %d in allpkgs)",
+ catrel, len(files_in_catalogs), len(allpkgs))
+ to_remove = allpkgs.difference(files_in_catalogs)
+ logging.debug("Collecting file sizes.")
+ total_size = sum(os.stat(os.path.join(allpkgs_path, x)).st_size
+ for x in to_remove)
+ logging.info("Found %d packages to remove, total size: %.1fMB.",
+ len(to_remove), float(total_size) / 1024 ** 2)
+ for p in to_remove:
+ full_path = os.path.join(allpkgs_path, p)
+ print "mv", pipes.quote(full_path), pipes.quote(self.dest_dir)
def main():
@@ -71,12 +96,15 @@
dest="catalog_tree",
help=("Path to the catalog tree, that is the directory "
"containing subdirectories unstable, testing, etc."))
+ parser.add_option("--dest-dir",
+ dest="dest_dir",
+ help=("Move files out to this catalog."))
options, args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG)
- if not options.catalog_tree:
+ if not options.catalog_tree or not options.dest_dir:
parser.print_usage()
raise UsageError("Missing the catalog tree option, see --help.")
- gcg = CatalogGarbageCollector(options.catalog_tree)
+ gcg = CatalogGarbageCollector(options.catalog_tree, options.dest_dir)
gcg.GarbageCollect()
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list