SF.net SVN: gar:[22910] csw/mgar/gar/v2/lib/python/compare_catalog.py

Maciej (Matchek) Bliziński maciej at opencsw.org
Sat Feb 1 18:02:58 CET 2014


Sending again
Em 28/01/2014 16:36, "Maciej (Matchek) Bliziński" <maciej at opencsw.org>
escreveu:

> Hi Carsten,
>
> More comments! I hope you'll be able to reduce the size of this script.
>
> 2014-01-28 <cgrzemba at users.sourceforge.net>
>
>> Revision: 22910
>>           http://sourceforge.net/p/gar/code/22910
>> Author:   cgrzemba
>> Date:     2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)
>> Log Message:
>> -----------
>> use argparse, add out of order pkg compare
>>
>> Modified Paths:
>> --------------
>>     csw/mgar/gar/v2/lib/python/compare_catalog.py
>>
>> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py
>> ===================================================================
>> --- csw/mgar/gar/v2/lib/python/compare_catalog.py       2014-01-28
>> 12:36:44 UTC (rev 22909)
>> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py       2014-01-28
>> 16:20:56 UTC (rev 22910)
>> @@ -2,55 +2,108 @@
>>
>>  import cjson
>>  import logging
>> -import optparse
>> +import argparse
>>  import urllib2
>>  import sys
>> +import re
>>
>>  logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
>>  logger = logging.getLogger(__name__)
>>
>> +remote_scheme = ['http','https']
>> +local_scheme = ['file']
>> +
>> +def prepareCatListFromURI(uri):
>> +    catlst = []
>> +    if '://' in uri:
>>
>
> We can say that you have to have a valid URI that either starts with
> http:// or https:// or file://
>
>
>> +        scheme = uri.split(':')[0]
>> +        if scheme in remote_scheme:
>> +            logger.info("fetch remote %s", uri)
>> +            data = urllib2.urlopen(uri).read()
>>
>
> Let's use the requests module. We have a package.
>
>
> http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250
>
>
>> +            catlst = cjson.decode(data)
>> +            for e in catlst:
>> +                del e[9]
>> +            return catlst
>> +        elif scheme in local_scheme:
>> +            uri = re.sub('.*://','',uri)
>> +        else:
>> +            logger.error('unsupported URI format')
>> +            sys.exit(4)
>> +    with open(uri) as lcat:
>> +        logger.info("fetch local %s", uri)
>> +        for line in lcat: # skip 4 lines header '# CREATIONDATE'
>>
>
> We already have a parser, please use it.
>
> http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66
>
>
>> +            if line.startswith("# CREATIONDATE"):
>> +                break
>> +        for line in lcat:
>> +            if line.startswith("-----BEGIN PGP SIGNATURE"):
>> +                break
>> +            catlst.append(line.rstrip().split(' '))
>> +    return catlst
>> +
>> +def compareOutOfOrder(a_catlst, b_catlst, idx):
>> +    a_pkgName2Idx = {}
>> +    i = idx
>> +    for j in range(idx,len(a_catlst)):
>> +        a_pkgName2Idx[a_catlst[j][0]] = j
>> +    # import pdb; pdb.set_trace()
>> +    while i < len(b_catlst):
>> +        if b_catlst[i][0] in a_pkgName2Idx:
>> +            if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:
>> +                logger.warning("pkgs different at {0},{1}: {2}
>> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))
>> +                sys.exit(1)
>> +        else:
>> +            logger.warning("not in acat: %s", b_catlst[i])
>> +            sys.exit(1)
>> +        i += 1
>> +    b_pkgName2Idx = {}
>> +    for j in range(idx,len(b_catlst)):
>> +        b_pkgName2Idx[b_catlst[j][0]] = j
>> +    # import pdb; pdb.set_trace()
>> +    i = idx
>> +    while i < len(a_catlst):
>> +        if a_catlst[i][0] not in b_pkgName2Idx:
>> +            logger.warning("not in bcat: %s", a_catlst[i])
>> +            sys.exit(1)
>> +        i += 1
>>
>
> Why not convert both to a data structure consisting of basic types: nested
> lists and dicts? Then you can just compare them using the == operator. If
> you wanted some diagnostic output to display the difference, you can always
> serialize them and display the textual diff - it will save you lots of
> lines of code.
>
>
>>  def main():
>> -    parser = optparse.OptionParser()
>> -    parser.add_option("-v","--verbose", dest="verbose",
>> action="store_true",default=False)
>> -    parser.add_option("-a","--existing-catalog", dest="oldcatalog",
>> -                    help='set URI of existing catalog', metavar =
>> 'catalog')
>> -    parser.add_option("-b","--new-catalog", dest="newcatalog",
>> -                    help='set URI of catalog to generate', metavar =
>> 'catalog')
>> -    options, args = parser.parse_args()
>> +    parser = argparse.ArgumentParser()
>> +    parser.add_argument("-v","--verbose", dest="verbose",
>> action="store_true",default=False)
>> +    parser.add_argument("acat",help="catalog URI")
>> +    parser.add_argument("bcat",help="catalog URI")
>> +    args = parser.parse_args()
>>      opterror = False
>> -    if options.verbose:
>> +    if args.verbose:
>>          logger.setLevel(logging.INFO)
>> -    if options.debug:
>> -        logger.setLevel(logging.DEBUG)
>> -    if options.newcatalog is None or options.oldcatalog is None:
>> -        logger.error("mandatory option missing")
>> +    if args.acat is None or args.bcat is None:
>> +        logger.error("mandatory args 'acat' 'bcat' missing")
>>          sys.exit(2)
>> -    oldcat = options.oldcatalog
>> -    newcat = options.newcatalog
>> -    logger.info(" compare %s with %s", oldcat, newcat)
>>
>> -    data = urllib2.urlopen(oldcat).read()
>> -    a_catlst = cjson.decode(data)
>> -    for e in a_catlst:
>> -        del e[9]
>> -    b_catlst = []
>> -    with open(newcat) as nc:
>> -        for i in range(4): # skip 4 lines header
>> -            nc.readline()
>> -        for cl in nc.readlines():
>> -            if "-----BEGIN" == cl.split(' ')[0]:
>> -                break
>> -            b_catlst.append(cl.rstrip().split(' '))
>> +    logger.info("fetch cat_a %s", args.acat)
>> +    a_catlst = prepareCatListFromURI(args.acat)
>> +
>> +    logger.info("fetch cat_b %s", args.bcat)
>> +    b_catlst = prepareCatListFromURI(args.bcat)
>> +
>> +    logger.info("compare ...")
>>      if len(a_catlst) != len(b_catlst):
>> -        logger.warning("a has %d, b has %d
>> packges",len(a_catlst),len(b_catlst))
>> -        sys.exit(1)
>> +        logger.warning("a has %d, b has %d
>> packages",len(a_catlst),len(b_catlst))
>> +        # sys.exit(1)
>>      for i in range(len(b_catlst)):
>> -        if b_catlst[i] != a_catlst[i] :
>> -            logger.warning("a is {0}, b is
>> {1}".format(a_catlst[i],b_catlst[i]))
>> -            sys.exit(1)
>> +        try:
>> +            if b_catlst[i] != a_catlst[i] :
>> +                if b_catlst[i][0] != a_catlst[i][0]:
>> +                    logger.warning("packages out of order: A: %s; B:
>> %s",a_catlst[i][0], b_catlst[i][0])
>>
>
> Hm, what I meant is that out of order comparing:
>
> 1. should just work
> 2. should not be a special case
>
> The code should use such data structures that the ordering doesn't matter.
> For example, if you use a dict, then the ordering doesn't matter:
>
> >>> a = dict([('a', 1), ('b', 2)])
> >>> b = dict([('b', 2), ('a', 1)])
> >>> a == b
> True
>
>
>
>> +                    compareOutOfOrder(a_catlst, b_catlst, i)
>> +                    break
>> +                else:
>> +                    logger.warning("pkgs different: {0}
>> {1}".format(a_catlst[i],b_catlst[i]))
>> +                    sys.exit(1)
>> +        except IndexError as e:
>> +            logger.info("package %s not in acat", b_catlst[i])
>>
>>      # import pdb; pdb.set_trace()
>> -    logger.debug("catalogs are same")
>> +    logger.info("catalogs are same")
>>      sys.exit(0)
>>
>>
>>
>> This was sent by the SourceForge.net collaborative development platform,
>> the world's largest Open Source development site.
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opencsw.org/pipermail/devel/attachments/20140201/35337105/attachment.html>


More information about the devel mailing list