SF.net SVN: gar:[22910] csw/mgar/gar/v2/lib/python/compare_catalog.py
Maciej (Matchek) Bliziński
maciej at opencsw.org
Tue Jan 28 17:36:26 CET 2014
Hi Carsten,
More comments! I hope you'll be able to reduce the size of this script.
2014-01-28 <cgrzemba at users.sourceforge.net>
> Revision: 22910
> http://sourceforge.net/p/gar/code/22910
> Author: cgrzemba
> Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)
> Log Message:
> -----------
> use argparse, add out of order pkg compare
>
> Modified Paths:
> --------------
> csw/mgar/gar/v2/lib/python/compare_catalog.py
>
> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py
> ===================================================================
> --- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
> 12:36:44 UTC (rev 22909)
> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
> 16:20:56 UTC (rev 22910)
> @@ -2,55 +2,108 @@
>
> import cjson
> import logging
> -import optparse
> +import argparse
> import urllib2
> import sys
> +import re
>
> logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
> logger = logging.getLogger(__name__)
>
> +remote_scheme = ['http','https']
> +local_scheme = ['file']
> +
> +def prepareCatListFromURI(uri):
> + catlst = []
> + if '://' in uri:
>
We can say that you have to have a valid URI that either starts with
http://or https://or file://
> + scheme = uri.split(':')[0]
> + if scheme in remote_scheme:
> + logger.info("fetch remote %s", uri)
> + data = urllib2.urlopen(uri).read()
>
Let's use the requests module. We have a package.
http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250
> + catlst = cjson.decode(data)
> + for e in catlst:
> + del e[9]
> + return catlst
> + elif scheme in local_scheme:
> + uri = re.sub('.*://','',uri)
> + else:
> + logger.error('unsupported URI format')
> + sys.exit(4)
> + with open(uri) as lcat:
> + logger.info("fetch local %s", uri)
> + for line in lcat: # skip 4 lines header '# CREATIONDATE'
>
We already have a parser, please use it.
http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66
> + if line.startswith("# CREATIONDATE"):
> + break
> + for line in lcat:
> + if line.startswith("-----BEGIN PGP SIGNATURE"):
> + break
> + catlst.append(line.rstrip().split(' '))
> + return catlst
> +
> +def compareOutOfOrder(a_catlst, b_catlst, idx):
> + a_pkgName2Idx = {}
> + i = idx
> + for j in range(idx,len(a_catlst)):
> + a_pkgName2Idx[a_catlst[j][0]] = j
> + # import pdb; pdb.set_trace()
> + while i < len(b_catlst):
> + if b_catlst[i][0] in a_pkgName2Idx:
> + if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:
> + logger.warning("pkgs different at {0},{1}: {2}
> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))
> + sys.exit(1)
> + else:
> + logger.warning("not in acat: %s", b_catlst[i])
> + sys.exit(1)
> + i += 1
> + b_pkgName2Idx = {}
> + for j in range(idx,len(b_catlst)):
> + b_pkgName2Idx[b_catlst[j][0]] = j
> + # import pdb; pdb.set_trace()
> + i = idx
> + while i < len(a_catlst):
> + if a_catlst[i][0] not in b_pkgName2Idx:
> + logger.warning("not in bcat: %s", a_catlst[i])
> + sys.exit(1)
> + i += 1
>
Why not convert both to a data structure consisting of basic types: nested
lists and dicts? Then you can just compare them using the == operator. If
you wanted some diagnostic output to display the difference, you can always
serialize them and display the textual diff - it will save you lots of
lines of code.
> def main():
> - parser = optparse.OptionParser()
> - parser.add_option("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> - parser.add_option("-a","--existing-catalog", dest="oldcatalog",
> - help='set URI of existing catalog', metavar =
> 'catalog')
> - parser.add_option("-b","--new-catalog", dest="newcatalog",
> - help='set URI of catalog to generate', metavar =
> 'catalog')
> - options, args = parser.parse_args()
> + parser = argparse.ArgumentParser()
> + parser.add_argument("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> + parser.add_argument("acat",help="catalog URI")
> + parser.add_argument("bcat",help="catalog URI")
> + args = parser.parse_args()
> opterror = False
> - if options.verbose:
> + if args.verbose:
> logger.setLevel(logging.INFO)
> - if options.debug:
> - logger.setLevel(logging.DEBUG)
> - if options.newcatalog is None or options.oldcatalog is None:
> - logger.error("mandatory option missing")
> + if args.acat is None or args.bcat is None:
> + logger.error("mandatory args 'acat' 'bcat' missing")
> sys.exit(2)
> - oldcat = options.oldcatalog
> - newcat = options.newcatalog
> - logger.info(" compare %s with %s", oldcat, newcat)
>
> - data = urllib2.urlopen(oldcat).read()
> - a_catlst = cjson.decode(data)
> - for e in a_catlst:
> - del e[9]
> - b_catlst = []
> - with open(newcat) as nc:
> - for i in range(4): # skip 4 lines header
> - nc.readline()
> - for cl in nc.readlines():
> - if "-----BEGIN" == cl.split(' ')[0]:
> - break
> - b_catlst.append(cl.rstrip().split(' '))
> + logger.info("fetch cat_a %s", args.acat)
> + a_catlst = prepareCatListFromURI(args.acat)
> +
> + logger.info("fetch cat_b %s", args.bcat)
> + b_catlst = prepareCatListFromURI(args.bcat)
> +
> + logger.info("compare ...")
> if len(a_catlst) != len(b_catlst):
> - logger.warning("a has %d, b has %d
> packges",len(a_catlst),len(b_catlst))
> - sys.exit(1)
> + logger.warning("a has %d, b has %d
> packages",len(a_catlst),len(b_catlst))
> + # sys.exit(1)
> for i in range(len(b_catlst)):
> - if b_catlst[i] != a_catlst[i] :
> - logger.warning("a is {0}, b is
> {1}".format(a_catlst[i],b_catlst[i]))
> - sys.exit(1)
> + try:
> + if b_catlst[i] != a_catlst[i] :
> + if b_catlst[i][0] != a_catlst[i][0]:
> + logger.warning("packages out of order: A: %s; B:
> %s",a_catlst[i][0], b_catlst[i][0])
>
Hm, what I meant is that out of order comparing:
1. should just work
2. should not be a special case
The code should use such data structures that the ordering doesn't matter.
For example, if you use a dict, then the ordering doesn't matter:
>>> a = dict([('a', 1), ('b', 2)])
>>> b = dict([('b', 2), ('a', 1)])
>>> a == b
True
> + compareOutOfOrder(a_catlst, b_catlst, i)
> + break
> + else:
> + logger.warning("pkgs different: {0}
> {1}".format(a_catlst[i],b_catlst[i]))
> + sys.exit(1)
> + except IndexError as e:
> + logger.info("package %s not in acat", b_catlst[i])
>
> # import pdb; pdb.set_trace()
> - logger.debug("catalogs are same")
> + logger.info("catalogs are same")
> sys.exit(0)
>
>
>
> This was sent by the SourceForge.net collaborative development platform,
> the world's largest Open Source development site.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opencsw.org/pipermail/devel/attachments/20140128/ca17a4d7/attachment-0001.html>
More information about the devel
mailing list