SF.net SVN: gar:[22910] csw/mgar/gar/v2/lib/python/compare_catalog.py

Maciej (Matchek) Bliziński maciej at opencsw.org
Tue Jan 28 17:36:26 CET 2014


Hi Carsten,

More comments! I hope you'll be able to reduce the size of this script.

2014-01-28 <cgrzemba at users.sourceforge.net>

> Revision: 22910
>           http://sourceforge.net/p/gar/code/22910
> Author:   cgrzemba
> Date:     2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)
> Log Message:
> -----------
> use argparse, add out of order pkg compare
>
> Modified Paths:
> --------------
>     csw/mgar/gar/v2/lib/python/compare_catalog.py
>
> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py
> ===================================================================
> --- csw/mgar/gar/v2/lib/python/compare_catalog.py       2014-01-28
> 12:36:44 UTC (rev 22909)
> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py       2014-01-28
> 16:20:56 UTC (rev 22910)
> @@ -2,55 +2,108 @@
>
>  import cjson
>  import logging
> -import optparse
> +import argparse
>  import urllib2
>  import sys
> +import re
>
>  logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
>  logger = logging.getLogger(__name__)
>
> +remote_scheme = ['http','https']
> +local_scheme = ['file']
> +
> +def prepareCatListFromURI(uri):
> +    catlst = []
> +    if '://' in uri:
>

We can say that you have to have a valid URI that either starts with
http://or https://or file://


> +        scheme = uri.split(':')[0]
> +        if scheme in remote_scheme:
> +            logger.info("fetch remote %s", uri)
> +            data = urllib2.urlopen(uri).read()
>

Let's use the requests module. We have a package.

http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250


> +            catlst = cjson.decode(data)
> +            for e in catlst:
> +                del e[9]
> +            return catlst
> +        elif scheme in local_scheme:
> +            uri = re.sub('.*://','',uri)
> +        else:
> +            logger.error('unsupported URI format')
> +            sys.exit(4)
> +    with open(uri) as lcat:
> +        logger.info("fetch local %s", uri)
> +        for line in lcat: # skip 4 lines header '# CREATIONDATE'
>

We already have a parser, please use it.
http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66


> +            if line.startswith("# CREATIONDATE"):
> +                break
> +        for line in lcat:
> +            if line.startswith("-----BEGIN PGP SIGNATURE"):
> +                break
> +            catlst.append(line.rstrip().split(' '))
> +    return catlst
> +
> +def compareOutOfOrder(a_catlst, b_catlst, idx):
> +    a_pkgName2Idx = {}
> +    i = idx
> +    for j in range(idx,len(a_catlst)):
> +        a_pkgName2Idx[a_catlst[j][0]] = j
> +    # import pdb; pdb.set_trace()
> +    while i < len(b_catlst):
> +        if b_catlst[i][0] in a_pkgName2Idx:
> +            if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:
> +                logger.warning("pkgs different at {0},{1}: {2}
> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))
> +                sys.exit(1)
> +        else:
> +            logger.warning("not in acat: %s", b_catlst[i])
> +            sys.exit(1)
> +        i += 1
> +    b_pkgName2Idx = {}
> +    for j in range(idx,len(b_catlst)):
> +        b_pkgName2Idx[b_catlst[j][0]] = j
> +    # import pdb; pdb.set_trace()
> +    i = idx
> +    while i < len(a_catlst):
> +        if a_catlst[i][0] not in b_pkgName2Idx:
> +            logger.warning("not in bcat: %s", a_catlst[i])
> +            sys.exit(1)
> +        i += 1
>

Why not convert both to a data structure consisting of basic types: nested
lists and dicts? Then you can just compare them using the == operator. If
you wanted some diagnostic output to display the difference, you can always
serialize them and display the textual diff - it will save you lots of
lines of code.


>  def main():
> -    parser = optparse.OptionParser()
> -    parser.add_option("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> -    parser.add_option("-a","--existing-catalog", dest="oldcatalog",
> -                    help='set URI of existing catalog', metavar =
> 'catalog')
> -    parser.add_option("-b","--new-catalog", dest="newcatalog",
> -                    help='set URI of catalog to generate', metavar =
> 'catalog')
> -    options, args = parser.parse_args()
> +    parser = argparse.ArgumentParser()
> +    parser.add_argument("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> +    parser.add_argument("acat",help="catalog URI")
> +    parser.add_argument("bcat",help="catalog URI")
> +    args = parser.parse_args()
>      opterror = False
> -    if options.verbose:
> +    if args.verbose:
>          logger.setLevel(logging.INFO)
> -    if options.debug:
> -        logger.setLevel(logging.DEBUG)
> -    if options.newcatalog is None or options.oldcatalog is None:
> -        logger.error("mandatory option missing")
> +    if args.acat is None or args.bcat is None:
> +        logger.error("mandatory args 'acat' 'bcat' missing")
>          sys.exit(2)
> -    oldcat = options.oldcatalog
> -    newcat = options.newcatalog
> -    logger.info(" compare %s with %s", oldcat, newcat)
>
> -    data = urllib2.urlopen(oldcat).read()
> -    a_catlst = cjson.decode(data)
> -    for e in a_catlst:
> -        del e[9]
> -    b_catlst = []
> -    with open(newcat) as nc:
> -        for i in range(4): # skip 4 lines header
> -            nc.readline()
> -        for cl in nc.readlines():
> -            if "-----BEGIN" == cl.split(' ')[0]:
> -                break
> -            b_catlst.append(cl.rstrip().split(' '))
> +    logger.info("fetch cat_a %s", args.acat)
> +    a_catlst = prepareCatListFromURI(args.acat)
> +
> +    logger.info("fetch cat_b %s", args.bcat)
> +    b_catlst = prepareCatListFromURI(args.bcat)
> +
> +    logger.info("compare ...")
>      if len(a_catlst) != len(b_catlst):
> -        logger.warning("a has %d, b has %d
> packges",len(a_catlst),len(b_catlst))
> -        sys.exit(1)
> +        logger.warning("a has %d, b has %d
> packages",len(a_catlst),len(b_catlst))
> +        # sys.exit(1)
>      for i in range(len(b_catlst)):
> -        if b_catlst[i] != a_catlst[i] :
> -            logger.warning("a is {0}, b is
> {1}".format(a_catlst[i],b_catlst[i]))
> -            sys.exit(1)
> +        try:
> +            if b_catlst[i] != a_catlst[i] :
> +                if b_catlst[i][0] != a_catlst[i][0]:
> +                    logger.warning("packages out of order: A: %s; B:
> %s",a_catlst[i][0], b_catlst[i][0])
>

Hm, what I meant is that out of order comparing:

1. should just work
2. should not be a special case

The code should use such data structures that the ordering doesn't matter.
For example, if you use a dict, then the ordering doesn't matter:

>>> a = dict([('a', 1), ('b', 2)])
>>> b = dict([('b', 2), ('a', 1)])
>>> a == b
True



> +                    compareOutOfOrder(a_catlst, b_catlst, i)
> +                    break
> +                else:
> +                    logger.warning("pkgs different: {0}
> {1}".format(a_catlst[i],b_catlst[i]))
> +                    sys.exit(1)
> +        except IndexError as e:
> +            logger.info("package %s not in acat", b_catlst[i])
>
>      # import pdb; pdb.set_trace()
> -    logger.debug("catalogs are same")
> +    logger.info("catalogs are same")
>      sys.exit(0)
>
>
>
> This was sent by the SourceForge.net collaborative development platform,
> the world's largest Open Source development site.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opencsw.org/pipermail/devel/attachments/20140128/ca17a4d7/attachment-0001.html>


More information about the devel mailing list