Commit f853179c authored by Simon McVittie's avatar Simon McVittie
Browse files

utils: Add functions to compare symbols and versions with libelf



When two libraries have the same numeric tail we were not able to reliably
determine which one was the newer. In particular, many distributions
install libgcc_s.so.1 as a regular file, rather than a symlink to a
versioned name, so library_cmp_by_name() can't work.

These functions let us also check the library's version-definitions and
the individual symbols, to make a more nuanced decision.

Implementation originally by Ludovico de Nittis, adapted by Simon
McVittie to fit the same signature as library_cmp_by_name() so that
we can call the comparison functions via function pointers, to set up
different comparison weights for each library if necessary. This version
also includes Simon's changes to ignore uninteresting symbols for the
purposes of library comparison, with a list of uninteresting symbols
that are part of various architectures' ABIs, taken from dpkg-gensymbols.
Co-authored-by: Ludovico de Nittis's avatarLudovico de Nittis <ludovico.denittis@collabora.com>
Signed-off-by: Simon McVittie's avatarSimon McVittie <smcv@collabora.com>
parent 6013fb6e
......@@ -467,7 +467,7 @@ tests_utils_t_SOURCES = tests/utils.c \
tests/test-helpers.h \
utils/library-cmp.c \
utils/library-cmp.h
tests_utils_t_LDADD = utils/libutils.la $(GLIB_LIBS)
tests_utils_t_LDADD = utils/libutils.la $(GLIB_LIBS) $(LIBELF_LIBS)
test_scripts = tests/capture-libs.pl \
tests/symbols.pl \
......
......@@ -17,12 +17,699 @@
#include "library-cmp.h"
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <libelf.h>
#include <gelf.h>
#include "debug.h"
#include "utils.h"
// From binutils/include/elf/common.h (this doesn't appear to be documented
// anywhere else).
//
// /* This flag appears in a Versym structure. It means that the symbol
// is hidden, and is only visible with an explicit version number.
// This is a GNU extension. */
// #define VERSYM_HIDDEN 0x8000
//
// /* This is the mask for the rest of the Versym information. */
// #define VERSYM_VERSION 0x7fff
#define VERSYM_HIDDEN 0x8000
#define VERSYM_VERSION 0x7fff
static int
bsearch_strcmp_cb( const void *n, const void *ip )
{
const char *needle = n;
const char * const *item_p = ip;
return strcmp( needle, *item_p );
}
static int
qsort_strcmp_cb( const void* s1, const void* s2 )
{
const char * const *a = (const char* const*) s1;
const char* const *b = (const char* const*) s2;
return strcmp( *a, *b );
}
/*
* string_set_diff_flags:
* @STRING_SET_DIFF_ONLY_IN_FIRST: At least one element is in the first set but not the second
* @STRING_SET_DIFF_ONLY_IN_SECOND: At least one element is in the second set but not the first
* @STRING_SET_DIFF_NONE: All elements are equal
*
* The result of comparing two sets of strings. If each set
* contains elements that the other does not, then
* both @STRING_SET_DIFF_ONLY_IN_FIRST
* and @STRING_SET_DIFF_ONLY_IN_SECOND will be set.
*/
typedef enum
{
STRING_SET_DIFF_ONLY_IN_FIRST = (1 << 0),
STRING_SET_DIFF_ONLY_IN_SECOND = (1 << 1),
STRING_SET_DIFF_NONE = 0
} string_set_diff_flags;
/*
* compare_string_sets:
* @first: the first set to compare
* @first_length: number of elements in the first set
* @second: the second set to compare
* @second_length: number of elements in the second set
*
* The two sets needs to be ordered because we will use a binary search to do
* the comparison.
*/
static string_set_diff_flags
compare_string_sets ( char **first, size_t first_length,
char **second, size_t second_length )
{
string_set_diff_flags result = STRING_SET_DIFF_NONE;
assert( first != NULL );
assert( second != NULL );
if( first_length > second_length )
{
result |= STRING_SET_DIFF_ONLY_IN_FIRST;
}
else
{
for( size_t i = 0; i < first_length; i++ )
{
char *found = bsearch( first[i], second, second_length, sizeof(char *), bsearch_strcmp_cb );
if( found == NULL )
{
result |= STRING_SET_DIFF_ONLY_IN_FIRST;
break;
}
}
}
if( first_length < second_length )
{
result |= STRING_SET_DIFF_ONLY_IN_SECOND;
}
else
{
for( size_t i = 0; i < second_length; i++ )
{
char *found = bsearch( second[i], first, first_length, sizeof(char *), bsearch_strcmp_cb );
if( found == NULL )
{
result |= STRING_SET_DIFF_ONLY_IN_SECOND;
break;
}
}
}
return result;
}
static void
close_elf (Elf **elfp, int *fdp)
{
if (elfp != NULL && *elfp != NULL)
{
elf_end( *elfp );
*elfp = NULL;
}
if (fdp != NULL && *fdp >= 0)
{
close( *fdp );
*fdp = -1;
}
}
/*
* open_elf_library:
* @path: (type filename): The path where the library is located
* @fd: (out) (not optional): Used to return a file descriptor of the
* opened library
* @elf: (out) (not optional): Used to return an initialized elf of the
* library
* @code: (out) (optional): Used to return an error code on failure
* @message: (out) (optional) (nullable): Used to return an error message
* on failure
*
* Returns: %TRUE if the elf opening succeded, %FALSE otherwise.
*/
static bool
open_elf_library ( const char *path, int *fd, Elf **elf,
int *code, char **message)
{
GElf_Ehdr ehdr;
bool result = true;
assert( *elf == NULL );
if( elf_version(EV_CURRENT) == EV_NONE )
{
_capsule_set_error( code, message, EINVAL,
"elf_version(EV_CURRENT): %s",
elf_errmsg( elf_errno() ) );
result = false;
goto out;
}
if( ( *fd = open( path, O_RDONLY | O_CLOEXEC, 0 ) ) < 0 )
{
_capsule_set_error( code, message, EINVAL,
"failed to open %s", path );
result = false;
goto out;
}
if( ( *elf = elf_begin( *fd, ELF_C_READ, NULL ) ) == NULL )
{
_capsule_set_error( code, message, EINVAL,
"elf_begin() failed: %s",
elf_errmsg( elf_errno() ) );
result = false;
goto out;
}
if( elf_kind( *elf ) != ELF_K_ELF )
{
_capsule_set_error( code, message, EINVAL,
"%s is not in ELF format", path );
result = false;
goto out;
}
if( gelf_getehdr( *elf, &ehdr ) == NULL )
{
_capsule_set_error( code, message, EINVAL,
"gelf_getehdr() failed: %s",
elf_errmsg( elf_errno() ) );
result = false;
goto out;
}
if( ehdr.e_type != ET_DYN )
{
_capsule_set_error( code, message, EINVAL,
"%s is not a shared library, elf type is %d",
path, ehdr.e_type );
result = false;
goto out;
}
out:
if( !result )
close_elf( elf, fd );
return result;
}
static void
print_debug_string_list( char **list, const char *begin_message )
{
if( list == NULL || list[0] == NULL )
return;
if( begin_message != NULL )
DEBUG( DEBUG_ELF, "%s", begin_message );
for( size_t i = 0; list[i] != NULL; i++ )
DEBUG( DEBUG_ELF, "%s", list[i] );
}
/*
* get_versions:
* @elf: The object's elf of which we want to get the versions
* @versions_number: (out) (not optional): The number of versions found
* @code: (out) (optional): Used to return an error code on
* failure
* @message: (out) (optional) (nullable): Used to return an error message
* on failure
*
* Returns: (transfer full): The list of versions that the
* shared object has, on failure %NULL.
*/
static char **
get_versions( Elf *elf, size_t *versions_number, int *code, char **message )
{
char **versions = NULL;
Elf_Scn *scn = NULL;
Elf_Data *data;
GElf_Shdr shdr_mem;
GElf_Shdr *shdr = NULL;
GElf_Verdef def_mem;
GElf_Verdef *def;
bool found_verdef = false;
uintptr_t verdef_ptr = 0;
size_t auxoffset;
size_t offset = 0;
size_t phnum;
size_t sh_entsize;
ptr_list *versions_list = NULL;
assert( versions_number != NULL );
*versions_number = 0;
if( elf_getphdrnum( elf, &phnum ) < 0 )
{
_capsule_set_error( code, message, EINVAL,
"Unable to determine the number of program headers: %s",
elf_errmsg( elf_errno() ) );
return versions;
}
/* Get the dynamic section */
for( size_t i = 0; i < phnum; i++ )
{
GElf_Phdr phdr_mem;
GElf_Phdr *phdr = gelf_getphdr( elf, i, &phdr_mem );
if( phdr != NULL && phdr->p_type == PT_DYNAMIC )
{
scn = gelf_offscn( elf, phdr->p_offset );
shdr = gelf_getshdr( scn, &shdr_mem );
break;
}
}
if( shdr == NULL )
{
int err = elf_errno();
if( err == 0 )
{
_capsule_set_error( code, message, EINVAL,
"Unable to find the section header from the dynamic section" );
}
else
{
_capsule_set_error( code, message, EINVAL,
"Unable to get the section header: %s",
elf_errmsg( err ) );
}
return versions;
}
data = elf_getdata( scn, NULL );
if( data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get the dynamic section data: %s",
elf_errmsg( elf_errno() ) );
return versions;
}
sh_entsize = gelf_fsize( elf, ELF_T_DYN, 1, EV_CURRENT );
for( size_t i = 0; i < shdr->sh_size / sh_entsize; i++ )
{
GElf_Dyn dyn_mem;
GElf_Dyn *dyn = gelf_getdyn( data, i, &dyn_mem );
if( dyn == NULL )
break;
if( dyn->d_tag == DT_VERDEF )
{
verdef_ptr = dyn->d_un.d_ptr;
found_verdef = true;
break;
}
}
if( !found_verdef )
{
DEBUG( DEBUG_ELF, "The version definition table is not available" );
versions = calloc( 1, sizeof(char *) );
versions[0] = NULL;
return versions;
}
scn = gelf_offscn( elf, verdef_ptr );
data = elf_getdata( scn, NULL );
if( data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get symbols data: %s", elf_errmsg( elf_errno() ) );
return versions;
}
def = gelf_getverdef( data, 0, &def_mem );
if( def == NULL )
{
DEBUG( DEBUG_ELF, "Verdef is not available: %s", elf_errmsg( elf_errno() ) );
versions = calloc( 1, sizeof(char *) );
versions[0] = NULL;
return versions;
}
/* Arbitrarily start the list with 8 elements */
versions_list = ptr_list_alloc( 8 );
while( def != NULL )
{
GElf_Verdaux aux_mem, *aux;
const char *version;
auxoffset = offset + def->vd_aux;
offset += def->vd_next;
/* The first Verdaux array must exist and it points to the version
* definition string that Verdef defines. Every possible additional
* Verdaux arrays are the dependencies of said version definition.
* In our case we don't need to list the dependencies, so we just
* get the first Verdaux of every Verdef. */
aux = gelf_getverdaux( data, auxoffset, &aux_mem );
if( aux == NULL )
continue;
version = elf_strptr( elf, shdr->sh_link, aux->vda_name );
if( version == NULL )
continue;
if( ( def->vd_flags & VER_FLG_BASE ) == 0 )
ptr_list_push_ptr( versions_list, strdup( version ) );
if( def->vd_next == 0 )
def = NULL;
else
def = gelf_getverdef( data, offset, &def_mem );
}
versions = (char **) ptr_list_free_to_array ( versions_list, versions_number );
qsort( versions, *versions_number, sizeof(char *), qsort_strcmp_cb );
return versions;
}
static const char * const ignore_symbols[] =
{
/* Libraries on at least SteamOS 2 'brewmaster' sometimes have
* symbols that appear to have an empty name. */
"",
/* These symbols can appear in libraries witout actually being part
* of anyone's ABI. List taken from dpkg-gensymbols. */
"__bss_end__",
"__bss_end",
"_bss_end__",
"__bss_start",
"__bss_start__",
"__data_start",
"__do_global_ctors_aux",
"__do_global_dtors_aux",
"__do_jv_register_classes",
"_DYNAMIC",
"_edata",
"_end",
"__end__",
"__exidx_end",
"__exidx_start",
"_fbss",
"_fdata",
"_fini",
"_ftext",
"_GLOBAL_OFFSET_TABLE_",
"__gmon_start__",
"__gnu_local_gp",
"_gp",
"_init",
"_PROCEDURE_LINKAGE_TABLE_",
"_SDA2_BASE_",
"_SDA_BASE_",
};
/*
* get_symbols:
* @elf: The object's elf of which we want to get the symbols
* @symbols_number: (out) (not optional): The number of symbols found
* @code: (out) (optional): Used to return an error code on
* failure
* @message: (out) (optional) (nullable): Used to return an error message
* on failure
*
* Returns: (transfer full): The list of symbols that the
* shared object has, on failure %NULL.
*/
static char **
get_symbols ( Elf *elf, size_t *symbols_number, int *code, char **message )
{
char **symbols = NULL;
Elf_Scn *scn = NULL;
Elf_Scn *scn_sym = NULL;
Elf_Scn *scn_ver = NULL;
Elf_Scn *scn_verdef = NULL;
Elf_Data *data;
Elf_Data *sym_data;
Elf_Data *versym_data = NULL;
Elf_Data *verdef_data = NULL;
GElf_Ehdr ehdr;
GElf_Shdr shdr_mem;
GElf_Shdr *shdr = NULL;
bool found_symtab = false;
bool found_versym = false;
bool found_verdef = false;
uintptr_t symtab_ptr = 0;
uintptr_t versym_ptr = 0;
uintptr_t verdef_ptr = 0;
size_t elsize = 0;
size_t phnum;
size_t sh_entsize;
ptr_list *symbols_list = NULL;
assert( symbols_number != NULL );
*symbols_number = 0;
if( elf_getphdrnum( elf, &phnum ) < 0 )
{
_capsule_set_error( code, message, EINVAL,
"Unable to determine the number of program headers: %s",
elf_errmsg( elf_errno() ) );
return symbols;
}
/* Get the dynamic section */
for( size_t i = 0; i < phnum; i++ )
{
GElf_Phdr phdr_mem;
GElf_Phdr *phdr = gelf_getphdr( elf, i, &phdr_mem );
if( phdr != NULL && phdr->p_type == PT_DYNAMIC )
{
scn = gelf_offscn( elf, phdr->p_offset );
shdr = gelf_getshdr( scn, &shdr_mem );
break;
}
}
if( shdr == NULL )
{
int err = elf_errno();
if( err == 0 )
{
_capsule_set_error( code, message, EINVAL,
"Unable to find the section header from the dynamic section" );
}
else
{
_capsule_set_error( code, message, EINVAL,
"Unable to get the section header: %s",
elf_errmsg( err ) );
}
return symbols;
}
data = elf_getdata( scn, NULL );
if( data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get dynamic section data: %s", elf_errmsg( elf_errno() ) );
return symbols;
}
sh_entsize = gelf_fsize( elf, ELF_T_DYN, 1, EV_CURRENT );
for( size_t i = 0; i < shdr->sh_size / sh_entsize; i++ )
{
GElf_Dyn dyn_mem;
GElf_Dyn *dyn = gelf_getdyn (data, i, &dyn_mem);
if( dyn == NULL )
break;
switch( dyn->d_tag )
{
case DT_SYMTAB:
symtab_ptr = dyn->d_un.d_ptr;
found_symtab = true;
break;
case DT_VERSYM:
versym_ptr = dyn->d_un.d_ptr;
found_versym = true;
break;
case DT_VERDEF:
verdef_ptr = dyn->d_un.d_ptr;
found_verdef = true;
break;
default:
break;
}
}
if( !found_symtab )
{
_capsule_set_error( code, message, EINVAL, "Unable to find the symbols table" );
return symbols;
}
scn_sym = gelf_offscn( elf, symtab_ptr );
sym_data = elf_getdata( scn_sym, NULL );
if( sym_data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get symbols table data: %s", elf_errmsg( elf_errno() ) );
return symbols;
}
if( found_versym )
{
scn_ver = gelf_offscn( elf, versym_ptr );
versym_data = elf_getdata( scn_ver, NULL );
if( versym_data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get symbols version information data: %s",
elf_errmsg( elf_errno() ) );
return symbols;
}
}
if( found_verdef )
{
scn_verdef = gelf_offscn( elf, verdef_ptr );
verdef_data = elf_getdata( scn_verdef, NULL );
if( verdef_data == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to get symbols version definition data: %s",
elf_errmsg( elf_errno() ) );
return symbols;
}
}
if( gelf_getehdr( elf, &ehdr ) == NULL )
{
_capsule_set_error( code, message, EINVAL,
"Unable to retrieve Ehdr header: %s",
elf_errmsg( elf_errno() ) );
return symbols;
}
elsize = gelf_fsize( elf, ELF_T_SYM, 1, ehdr.e_version );
if( elsize == 0 )
{
_capsule_set_error( code, message, EINVAL,
"Size of symbols in Ehdr array is zero: %s",
elf_errmsg( elf_errno() ) );
return symbols;
}
/* Arbitrarily start the list with 8 elements */
symbols_list = ptr_list_alloc( 8 );
for( size_t index = 0; index < sym_data->d_size / elsize; index++ )
{
GElf_Sym *sym;
GElf_Sym sym_mem;
const char *symbol;
GElf_Versym versym_mem;
GElf_Versym *versym;
GElf_Verdaux aux_mem;
GElf_Verdaux *aux = NULL;
GElf_Verdef def_mem;
GElf_Verdef *def = NULL;
bool interesting = true;