[csw-devel] [csw-maintainers] Checkpkg test for direct binding

Wed Sep 19 08:23:14 CEST 2012

2012/9/6 Yann Rouillard <yann at pleiades.fr.eu.org>

> --- dependency_checks.py (revision 19130)
> +++ dependency_checks.py (working copy)
> @@ -39,6 +39,24 @@
>
>  PREFERRED_DIRECTORY_PROVIDERS = set([u"CSWcommon"])
>
> [...]
> +
> +ALLOWED_VERSION_DEPENDENCIES = { "libc.so.1": [ 'SYSVABI_1.3',
> 'SUNWprivate_1.1', 'SUNW_1.22.6', 'SUNW_1.22.5',
>

If you can, limit line width to 80 chars.

> +                                                'SUNW_1.22.4',
> 'SUNW_1.22.3', 'SUNW_1.22.2', 'SUNW_1.22.1',
> +                                                'SUNW_1.22',
> 'SUNW_1.21.3', 'SUNW_1.21.2', 'SUNW_1.21.1',
> +                                                'SUNW_1.21',
> 'SUNW_1.20.4', 'SUNW_1.20.1', 'SUNW_1.20',
> +                                                'SUNW_1.19',
> 'SUNW_1.18.1', 'SUNW_1.18', 'SUNW_1.17',
> +                                                'SUNW_1.16', 'SUNW_1.15',
> 'SUNW_1.14', 'SUNW_1.13',
> +                                                'SUNW_1.12', 'SUNW_1.11',
> 'SUNW_1.10', 'SUNW_1.9',
> +                                                'SUNW_1.8', 'SUNW_1.7',
> 'SUNW_1.6', 'SUNW_1.5',
> +                                                'SUNW_1.4', 'SUNW_1.3',
> 'SUNW_1.2', 'SUNW_1.1',
> +                                                'SUNW_0.9', 'SUNW_0.8',
> 'SUNW_0.7', 'SISCD_2.3' ] }
> +
> +
>
> @@ -147,6 +165,57 @@
>            error_mgr,
>            pkgname, messenger)
>        orphan_sonames.extend(orphan_sonames_tmp)
> +
> +
> +    ldd_info = pkg_data['ldd_info'][binary_info["path"]]
> +    for ldd_response in ldd_info:
> +      if ldd_response['state'] == 'soname-unused' and
> ldd_response['soname'] not in BASE_SOLARIS_LIBRARIES:
> +        messenger.Message(
> +          "Binary %s links to library %s but doesn't seem to use any of
> its symbols. "
> +          "It usually happens because superfluous libraries were added to
> the linker options, "
> +  "either because of the configure script itself or because of the
> \"pkg-config --libs\""
> +  " output of one the dependency."
> +          % ("/" + binary_info["path"], ldd_response['soname']))
> +        error_mgr.ReportError(
> +            pkgname, "soname-unused",
> +            "%s is needed by %s but never used" %
> (ldd_response['soname'], "/" + binary_info["path"]))
>  +
> +    # Even when direct binding is enabled, some symbols might not be
> +    # directly bound because the library explicitely requested the symbol
> +    # not to be drectly bound to.
> +    # For example, libc.so.1 does it for symbol sigaction, free, malloc
> and realloc
> +    # So we consider that direct binding is enabled if at least one
> symbol is directly
> +    # bound to because that definitely means that -B direct or -z direct
> was used.
> +    directly_binded = set()
>

'To bind' is an irregular verb, past tense for bind is bound.
http://forum.wordreference.com/showthread.php?t=600594

+    for syminfo in
> pkg_data["binaries_elf_info"][binary_info["path"]]['symbol table']:
> +      if syminfo['external'] and syminfo['flags'] and 'D' in
> syminfo['flags'] and 'B' in syminfo['flags']:
> +          directly_binded.add(syminfo['soname'])
> +    not_directly_binded =
> directly_binded.symmetric_difference(binary_info["needed sonames"])
> +
> +    if not_directly_binded:
> +      messenger.Message(
> + "No symbol of binary %s is directly binded against the following
> libraries: %s. "
> +        "Please make sure the binaries are compiled using the
> \"-Bdirect\" linker option."
> +        % ("/" + binary_info["path"], ", ".join(not_directly_binded)))
> +      for soname in not_directly_binded:
> +        error_mgr.ReportError(
> +          pkgname, "no-direct-binding",
> +          "%s is not directly binded to soname %s" % ("/" +
> binary_info["path"], soname))
> +
> +
> +    for version_dep in
> pkg_data["binaries_elf_info"][binary_info["path"]]['version needed']:
> +      if (version_dep['soname'] in ALLOWED_VERSION_DEPENDENCIES and
> +        not version_dep['version'] in
> ALLOWED_VERSION_DEPENDENCIES[version_dep['soname']]):
> +        messenger.Message(
> +          "Binary %s requires interface version %s in library %s which is
> only available "
> +  "in recent Solaris releases."
> +          % ("/" + binary_info["path"], version_dep['version'],
> version_dep['soname']))
> +        error_mgr.ReportError(
> +          pkgname, "forbidden-version-interface-dependencies",
> +          "%s requires forbidden interface version %s in library %s"
> +  % ("/" + binary_info["path"], version_dep['version'],
> version_dep['soname']))
> +
> +
>    orphan_sonames = set(orphan_sonames)
>    for soname, binary_path in orphan_sonames:
>      if soname not in ALLOWED_ORPHAN_SONAMES:
>
>
> --- inspective_package.py (revision 19130)
> +++ inspective_package.py (working copy)
> @@ -229,17 +229,132 @@
>
>      return defined_symbols
>
> +
> +  def GetBinaryElfInfo(self):
> +    """Returns various informations symbol and version present in elf
> header
> +
> +    To do this we parse output lines from elfdump -syv, it's the
> +    only command that will give us all informations we need on symbols
> and versions.
> +    We will analyse 3 sections:
> +     - version section: contains soname needed, version interface
> required for each soname,
> +                        and version definition
> +     - symbol table section: contains list of symbol and soname/version
> interface providing it
> +                             (the latter is an index in the version
> section)
> +     - syminfo section: contains special linking flags for each symbol
> +    """
> +    binaries = self.ListBinaries()
> +    binaries_elf_info = {}
> +
> +    for binary in binaries:
> +      binary_abspath = os.path.join(self.directory, "root", binary)
> +      # elfdump is the only tool that give us all informations
> +      args = ["/usr/ccs/bin/elfdump", "-svy", binary_abspath]
> +      elfdump_proc = subprocess.Popen(
> +          args,
> +          stdout=subprocess.PIPE,
>  +          stderr=subprocess.PIPE)
> +      stdout, stderr = elfdump_proc.communicate()
> +      retcode = elfdump_proc.wait()
> +      if retcode or stderr:
> +        logging.error("%s returned one or more errors: %s", args,
> stderr.splitlines()[0])
>  +        continue
> +      elfdump_out = stdout.splitlines()
>

It's weird that we don't have a function to wrap shelling out. Okay, we can
refactor it another time.

> +
> +      symbols = {}
> +      binary_info = { 'version definition': [],
> +      'version needed': [],
> +      'symbol table': [] }
> +      # we will merge syminfo and symbol table information in one list
> +      # so the syminfo list is the same as the symbol table one
> +      binary_info['syminfo'] = binary_info['symbol table']
> +
> +      # The list of fields we want to retrieve in the elfdump output by
> section
> +      # if the field is a tuple, it means we will map the original field
> name
> +      # to another name in the final data structure
> +      elf_fields = { 'version defined': [ 'version', 'dependency' ],
> +                     'version needed': [ ('file', 'soname'), 'version' ],
> +     'symbol table': [ ('name', 'symbol'), ('ver', 'version'),
> +               'bind', ('shndx', 'external') ],
> +     'syminfo': [ ('library', 'soname'), 'symbol', 'flags' ] }
> +
> +      cur_section = None
> +      for line in elfdump_out:
> +
> +        elfdump_data, cur_section = self._ParseElfdumpLine(line,
> cur_section)
> +
> + # header or blank line contains no information
> + if not elfdump_data:
>

Or, if you want to distinguish between an empty data structure (empty dict)
and None, you can write:

if elfdump_data is None:
   ...

It might not matter in this case, but it's generally a good thing to
distinguish between None and False.

> +          continue
> +
> +        elf_info = {}
> + for field in elf_fields[cur_section]:
> +          if type(field) == tuple:
> +            elf_info[field[1]] = elfdump_data[field[0]]
> +          else:
> +            elf_info[field] = elfdump_data[field]
>

This bit looks weird. Is there something wrong with the data?

> +
> +        # we merge symbol table and syminfo informations so we have to
> check
> + # if the symbol has not already been added
> + if cur_section in ('symbol table', 'syminfo'):
> +  if elf_info['symbol'] in symbols:
> +    symbols[elf_info['symbol']].update(elf_info)
> +    continue
>

Is the indentation good? I see "symbols" and "continue" at the same level.

+          else:
> +    symbols[elf_info['symbol']] = elf_info
> +
> +        binary_info[cur_section].append(elf_info)
> +
> +      # elfdump doesn't repeat the name of the soname in the version
> section
> +      # if it's the same on two contiguous line, so we have to make sure
> +      # the information is present in each entry
> +      for i, version in enumerate(binary_info['version needed'][1:]):
> +        if not version['soname']:
> +          version['soname'] = binary_info['version needed'][i]['soname']
> +
> +      # if it exists, the first "version definition" entry is the base
> soname
> +      # we don't need this information
> +      if binary_info['version definition']:
> + binary_info['version definition'].pop(0)
> +
> +      # To not rely of the section order output of elfdump, we resolve
> symbol version
> +      # informations here after having parsed all elfdump output
> +      nb_versions_definition = len(binary_info['version definition'])
> +      for sym_info in binary_info['symbol table']:
> +        version_index = int(sym_info['version']) - 2
> + if version_index > 1:
> +  if version_index < nb_versions_definition:
> +    version = binary_info['version definition'][version_index]
> +          else:
> +    version = binary_info['version needed'][version_index -
> nb_versions_definition]
> +          sym_info['version'] = version['version']
> +  sym_info['soname'] = version['soname']
> +        else:
> +          sym_info['version'] = None
> +
> +        if sym_info['external'] == 'UNDEF':
> +          sym_info['external'] = True
> +        else:
> +          sym_info['external'] = False
> +
> +        # we make sure the field are present even if the syminfo section
> is not
> +        sym_info.setdefault('soname')
> +        sym_info.setdefault('flags')
> +
> +      binaries_elf_info[binary] = binary_info
> +
> +    return binaries_elf_info
>

There's a lot of if-else logic in this section of code. Do you have unit
tests to make sure it does what it's supposed to? It would be helpful to
see the elfdump outputs and corresponding parsed data structures.

+
> +
>
> @@ -263,7 +390,38 @@
>      sym = { 'address': fields[0], 'type': fields[1], 'name': fields[2] }
>      return sym
>
> -  def _ParseLddDashRline(self, line):
> +
> +  def _ParseElfdumpLine(self, line, section = None):
> +
> +    headers_re = (r'(?P<section>Version Needed|Version Definition|Symbol
> Table|Syminfo) Section:\s+(?:\.SUNW_version|\.dynsym|\.SUNW_syminfo)\s*$|'
> +                   '\s*(?:index\s+)?version\s+dependency\s*$|'
> +                   '\s*(?:index\s+)?file\s+version\s*$|'
> +
> '\s*index\s*value\s+size\s+type\s+bind\s+oth\s+ver\s+shndx\s+name\s*$|'
> +                   '\s*index\s+flags\s+bound to\s+symbol\s*$|'
> +   '\s*$')
>

For these style regular expressions, it's a good idea to use the verbose
regex syntax, as described here:
http://www.diveintopython.net/regular_expressions/verbose.html

It allows you to split the regex into parts and provide inline comments
where appropriate.

> +
> +    re_by_section = { 'version definition':
> (r'\s*(?:\[(?P<index>\d+)\]\s+)?(?P<version>.*\S)\s+(?P<dependency>\S+)?\s*$'),
> +      'version needed':
> (r'\s*(?:\[(?P<index>\d+)\]\s+)?(?:(?P<file>\S+)\s+(?!\[ (?:INFO|WEAK)
> \]))?(?P<version>\S+)(?:\s+\[ (?:INFO|WEAK) \])?\s*$'),
> +                      'symbol table':
> (r'\s*\[\d+\]\s+0x[0-9a-f]+\s+0x[0-9a-f]+\s+\S+\s+(?P<bind>\S+)\s+\S+\s+(?P<ver>\S+)\s+(?P<shndx>\S+)\s+(?P<name>\S+)?\s*$'),
> +      'syminfo':
> (r'\s*\[\d+\]\s+(?P<flags>[ABCDFILNPS]+)\s+(?:(?:\[\d+\]\s+(?P<library>.*\S)|<self>)\s+)?(?P<symbol>.*\S)\s*')
> }
>

The same comment about verbose regex syntax.

> +
> +    if section:
> +      m = re.match(re_by_section[section], line)
> +      if m:
> +        elfdump_data = m.groupdict()
> +        return elfdump_data, section
> +
> +    m = re.match(headers_re, line)
> +    if not m:
> +      raise package.StdoutSyntaxError("Could not parse %s" % (repr(line)))
>

I like it that you split the matching into separate sections instead of
cramming everything into one regex.

> +
> +    if m.lastindex:
> +      section = m.group('section').lower()
> +
> +    return None, section
>

One overall comment is that some functions are really big. This means that
they are messy and hard to test. You could split out some of the operations
into separate functions and write simple unit tests for them. This way the
whole code will be more manageable. It'll also allow you to give operations
meaningful names.

Maciej
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opencsw.org/pipermail/devel/attachments/20120919/af270c5b/attachment-0001.html>