SF.net SVN: gar:[22910] csw/mgar/gar/v2/lib/python/compare_catalog.py
Maciej (Matchek) Bliziński
maciej at opencsw.org
Sat Feb 1 18:02:58 CET 2014
Sending again
Em 28/01/2014 16:36, "Maciej (Matchek) Bliziński" <maciej at opencsw.org>
escreveu:
> Hi Carsten,
>
> More comments! I hope you'll be able to reduce the size of this script.
>
> 2014-01-28 <cgrzemba at users.sourceforge.net>
>
>> Revision: 22910
>> http://sourceforge.net/p/gar/code/22910
>> Author: cgrzemba
>> Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)
>> Log Message:
>> -----------
>> use argparse, add out of order pkg compare
>>
>> Modified Paths:
>> --------------
>> csw/mgar/gar/v2/lib/python/compare_catalog.py
>>
>> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py
>> ===================================================================
>> --- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
>> 12:36:44 UTC (rev 22909)
>> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
>> 16:20:56 UTC (rev 22910)
>> @@ -2,55 +2,108 @@
>>
>> import cjson
>> import logging
>> -import optparse
>> +import argparse
>> import urllib2
>> import sys
>> +import re
>>
>> logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
>> logger = logging.getLogger(__name__)
>>
>> +remote_scheme = ['http','https']
>> +local_scheme = ['file']
>> +
>> +def prepareCatListFromURI(uri):
>> + catlst = []
>> + if '://' in uri:
>>
>
> We can say that you have to have a valid URI that either starts with
> http:// or https:// or file://
>
>
>> + scheme = uri.split(':')[0]
>> + if scheme in remote_scheme:
>> + logger.info("fetch remote %s", uri)
>> + data = urllib2.urlopen(uri).read()
>>
>
> Let's use the requests module. We have a package.
>
>
> http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250
>
>
>> + catlst = cjson.decode(data)
>> + for e in catlst:
>> + del e[9]
>> + return catlst
>> + elif scheme in local_scheme:
>> + uri = re.sub('.*://','',uri)
>> + else:
>> + logger.error('unsupported URI format')
>> + sys.exit(4)
>> + with open(uri) as lcat:
>> + logger.info("fetch local %s", uri)
>> + for line in lcat: # skip 4 lines header '# CREATIONDATE'
>>
>
> We already have a parser, please use it.
>
> http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66
>
>
>> + if line.startswith("# CREATIONDATE"):
>> + break
>> + for line in lcat:
>> + if line.startswith("-----BEGIN PGP SIGNATURE"):
>> + break
>> + catlst.append(line.rstrip().split(' '))
>> + return catlst
>> +
>> +def compareOutOfOrder(a_catlst, b_catlst, idx):
>> + a_pkgName2Idx = {}
>> + i = idx
>> + for j in range(idx,len(a_catlst)):
>> + a_pkgName2Idx[a_catlst[j][0]] = j
>> + # import pdb; pdb.set_trace()
>> + while i < len(b_catlst):
>> + if b_catlst[i][0] in a_pkgName2Idx:
>> + if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:
>> + logger.warning("pkgs different at {0},{1}: {2}
>> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))
>> + sys.exit(1)
>> + else:
>> + logger.warning("not in acat: %s", b_catlst[i])
>> + sys.exit(1)
>> + i += 1
>> + b_pkgName2Idx = {}
>> + for j in range(idx,len(b_catlst)):
>> + b_pkgName2Idx[b_catlst[j][0]] = j
>> + # import pdb; pdb.set_trace()
>> + i = idx
>> + while i < len(a_catlst):
>> + if a_catlst[i][0] not in b_pkgName2Idx:
>> + logger.warning("not in bcat: %s", a_catlst[i])
>> + sys.exit(1)
>> + i += 1
>>
>
> Why not convert both to a data structure consisting of basic types: nested
> lists and dicts? Then you can just compare them using the == operator. If
> you wanted some diagnostic output to display the difference, you can always
> serialize them and display the textual diff - it will save you lots of
> lines of code.
>
>
>> def main():
>> - parser = optparse.OptionParser()
>> - parser.add_option("-v","--verbose", dest="verbose",
>> action="store_true",default=False)
>> - parser.add_option("-a","--existing-catalog", dest="oldcatalog",
>> - help='set URI of existing catalog', metavar =
>> 'catalog')
>> - parser.add_option("-b","--new-catalog", dest="newcatalog",
>> - help='set URI of catalog to generate', metavar =
>> 'catalog')
>> - options, args = parser.parse_args()
>> + parser = argparse.ArgumentParser()
>> + parser.add_argument("-v","--verbose", dest="verbose",
>> action="store_true",default=False)
>> + parser.add_argument("acat",help="catalog URI")
>> + parser.add_argument("bcat",help="catalog URI")
>> + args = parser.parse_args()
>> opterror = False
>> - if options.verbose:
>> + if args.verbose:
>> logger.setLevel(logging.INFO)
>> - if options.debug:
>> - logger.setLevel(logging.DEBUG)
>> - if options.newcatalog is None or options.oldcatalog is None:
>> - logger.error("mandatory option missing")
>> + if args.acat is None or args.bcat is None:
>> + logger.error("mandatory args 'acat' 'bcat' missing")
>> sys.exit(2)
>> - oldcat = options.oldcatalog
>> - newcat = options.newcatalog
>> - logger.info(" compare %s with %s", oldcat, newcat)
>>
>> - data = urllib2.urlopen(oldcat).read()
>> - a_catlst = cjson.decode(data)
>> - for e in a_catlst:
>> - del e[9]
>> - b_catlst = []
>> - with open(newcat) as nc:
>> - for i in range(4): # skip 4 lines header
>> - nc.readline()
>> - for cl in nc.readlines():
>> - if "-----BEGIN" == cl.split(' ')[0]:
>> - break
>> - b_catlst.append(cl.rstrip().split(' '))
>> + logger.info("fetch cat_a %s", args.acat)
>> + a_catlst = prepareCatListFromURI(args.acat)
>> +
>> + logger.info("fetch cat_b %s", args.bcat)
>> + b_catlst = prepareCatListFromURI(args.bcat)
>> +
>> + logger.info("compare ...")
>> if len(a_catlst) != len(b_catlst):
>> - logger.warning("a has %d, b has %d
>> packges",len(a_catlst),len(b_catlst))
>> - sys.exit(1)
>> + logger.warning("a has %d, b has %d
>> packages",len(a_catlst),len(b_catlst))
>> + # sys.exit(1)
>> for i in range(len(b_catlst)):
>> - if b_catlst[i] != a_catlst[i] :
>> - logger.warning("a is {0}, b is
>> {1}".format(a_catlst[i],b_catlst[i]))
>> - sys.exit(1)
>> + try:
>> + if b_catlst[i] != a_catlst[i] :
>> + if b_catlst[i][0] != a_catlst[i][0]:
>> + logger.warning("packages out of order: A: %s; B:
>> %s",a_catlst[i][0], b_catlst[i][0])
>>
>
> Hm, what I meant is that out of order comparing:
>
> 1. should just work
> 2. should not be a special case
>
> The code should use such data structures that the ordering doesn't matter.
> For example, if you use a dict, then the ordering doesn't matter:
>
> >>> a = dict([('a', 1), ('b', 2)])
> >>> b = dict([('b', 2), ('a', 1)])
> >>> a == b
> True
>
>
>
>> + compareOutOfOrder(a_catlst, b_catlst, i)
>> + break
>> + else:
>> + logger.warning("pkgs different: {0}
>> {1}".format(a_catlst[i],b_catlst[i]))
>> + sys.exit(1)
>> + except IndexError as e:
>> + logger.info("package %s not in acat", b_catlst[i])
>>
>> # import pdb; pdb.set_trace()
>> - logger.debug("catalogs are same")
>> + logger.info("catalogs are same")
>> sys.exit(0)
>>
>>
>>
>> This was sent by the SourceForge.net collaborative development platform,
>> the world's largest Open Source development site.
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opencsw.org/pipermail/devel/attachments/20140201/35337105/attachment.html>
More information about the devel
mailing list