From 35cf45d805b3c8e220f20c0b366c68e6cdb92bf0 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Mon, 21 Aug 2023 14:19:00 +0200 Subject: [PATCH] 3.21.0-10 lazy debuginfo loading - Add valgrind-3.21.0-lazy-debuginfo.patch - Add valgrind-3.21.0-cleanup-read_elf_object.patch --- valgrind-3.21.0-cleanup-read_elf_object.patch | 58 ++ valgrind-3.21.0-lazy-debuginfo.patch | 850 ++++++++++++++++++ valgrind.spec | 14 +- 3 files changed, 921 insertions(+), 1 deletion(-) create mode 100644 valgrind-3.21.0-cleanup-read_elf_object.patch create mode 100644 valgrind-3.21.0-lazy-debuginfo.patch diff --git a/valgrind-3.21.0-cleanup-read_elf_object.patch b/valgrind-3.21.0-cleanup-read_elf_object.patch new file mode 100644 index 0000000..c81ca94 --- /dev/null +++ b/valgrind-3.21.0-cleanup-read_elf_object.patch @@ -0,0 +1,58 @@ +From a0d555a0dfe078ef04ea49d991a8090ab14bd4a5 Mon Sep 17 00:00:00 2001 +From: Paul Floyd +Date: Sat, 19 Aug 2023 21:37:33 +0200 +Subject: [PATCH] Always cleanup on exit from ML_(read_elf_object) + +I'm still a but baffled as to why this wasn't seen earlier. +A FreeBSD testcase started failing with kernel 13.2 patch 2, +which is quite a minor change. The testcase gets an fd from +pdfork and the parent does a printf with the fd then zaps the +process with pdkill. Standalone the fd is 3, and that's what +the expected contains. However, when it started failing I saw +with lsof that fds 3 and 4 were associated with the guest exe +and ld-elf.so.1. +--- + coregrind/m_debuginfo/readelf.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c +index f99d3dfd2..ac72f98fb 100644 +--- a/coregrind/m_debuginfo/readelf.c ++++ b/coregrind/m_debuginfo/readelf.c +@@ -1916,6 +1916,7 @@ Bool ML_(read_elf_object) ( struct _DebugInfo* di ) + Word i, j; + Bool dynbss_present = False; + Bool sdynbss_present = False; ++ Bool retval = False; + + /* Image for the main ELF file we're working with. */ + DiImage* mimg = NULL; +@@ -2944,19 +2945,16 @@ Bool ML_(read_elf_object) ( struct _DebugInfo* di ) + } + } + +- return True; ++ retval = True; + +- out: +- { +- /* Last, but not least, detach from the image. */ +- if (mimg) ML_(img_done)(mimg); ++ out: + +- if (svma_ranges) VG_(deleteXA)(svma_ranges); ++ /* Last, but not least, detach from the image. */ ++ if (mimg) ML_(img_done)(mimg); + +- return False; +- } /* out: */ ++ if (svma_ranges) VG_(deleteXA)(svma_ranges); + +- /* NOTREACHED */ ++ return retval; + } + + Bool ML_(read_elf_debug) ( struct _DebugInfo* di ) +-- +2.41.0 + diff --git a/valgrind-3.21.0-lazy-debuginfo.patch b/valgrind-3.21.0-lazy-debuginfo.patch new file mode 100644 index 0000000..6500abc --- /dev/null +++ b/valgrind-3.21.0-lazy-debuginfo.patch @@ -0,0 +1,850 @@ +From 60f7e89ba32b54d73b9e36d49e28d0f559ade0b9 Mon Sep 17 00:00:00 2001 +From: Aaron Merey +Date: Fri, 30 Jun 2023 18:31:42 -0400 +Subject: [PATCH] Support lazy reading and downloading of DWARF debuginfo + +Currently valgrind attempts to read DWARF .debug_* sections as well +as separate debuginfo files for ELF binaries as soon as a shared library +is loaded. This might also result in the downloading of separate debuginfo +files via debuginfod. + +This is inefficient when some of this debuginfo never ends up being used +by valgrind while running the client process. + +This patch adds support for lazy reading and downloading of DWARF +debuginfo. When an ELF shared library is loaded, the reading of .debug_* +sections as well as separate or alternate debuginfo is deferred until +valgrind handles an instruction pointer corresponding to a text segment +of the shared library. At this point the deferred sections and separate +debug files are loaded. + +This feature is only supported on ELF platforms. + +https://bugs.kde.org/show_bug.cgi?id=471807 + +ChangeLog + * debuginfo.c (di_notify_ACHIEVE_ACCEPT_STATE): Replace + read_elf_debug_info with read_elf_object. + (addr_load_di): New function. Attempts to load deferred debuginfo + associated with a given address. + (load_di): New function. Attempts to load a given deferred + debuginfo associated with a given address. + (describe_IP): Add calls to load_di and addr_load_di. + (find_DiCfSI): Add call to load_di. + + * priv_readelf.h (read_elf_object): New declaration. + (read_elf_debug): Ditto. + + * priv_storage.h (struct _DebugInfo): New field 'bool deferred'. + + * readelf.c (read_elf_debug_info): Split into read_elf_object and + read_elf_debug. + (read_elf_object): Read non .debug_* section from an ELF binary. + (read_elf_debug): Read .debug_* sections from an ELF binary as + as well any separate/alternate debuginfo files. + + * storage.c (canonicaliseSymtab): Remove assert in order to support + canonicalization of deferred _DebugInfo. + (finish_CFSI_arrays): Add early return if _DebugInfo is + deferred in order to avoid freeing memory that will be needed + when reading debuginfo at a later time. + (canonicaliseTables): Ditto. + + * pub_core_debuginfo.h (addr_load_di): New declaration. + (load_di): New declaration. +--- + NEWS | 1 + + coregrind/m_debuginfo/debuginfo.c | 57 +++- + coregrind/m_debuginfo/priv_readelf.h | 24 +- + coregrind/m_debuginfo/priv_storage.h | 7 + + coregrind/m_debuginfo/readelf.c | 437 ++++++++++++++++++--------- + coregrind/m_debuginfo/storage.c | 13 +- + coregrind/pub_core_debuginfo.h | 4 + + 7 files changed, 379 insertions(+), 164 deletions(-) + +diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c +index 22b41def2..8d1fdc696 100644 +--- a/coregrind/m_debuginfo/debuginfo.c ++++ b/coregrind/m_debuginfo/debuginfo.c +@@ -959,14 +959,16 @@ static ULong di_notify_ACHIEVE_ACCEPT_STATE ( struct _DebugInfo* di ) + discard_DebugInfos_which_overlap_with( di ); + + /* The DebugInfoMappings that now exist in the FSM may involve +- overlaps. This confuses ML_(read_elf_debug_info), and may cause ++ overlaps. This confuses ML_(read_elf_*), and may cause + it to compute wrong biases. So de-overlap them now. + See http://bugzilla.mozilla.org/show_bug.cgi?id=788974 */ + truncate_DebugInfoMapping_overlaps( di, di->fsm.maps ); + + /* And acquire new info. */ + # if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd) +- ok = ML_(read_elf_debug_info)( di ); ++ ok = ML_(read_elf_object)( di ); ++ if (ok) ++ di->deferred = True; + # elif defined(VGO_darwin) + ok = ML_(read_macho_debug_info)( di ); + # else +@@ -1443,6 +1445,50 @@ ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd ) + } + } + ++/* Load DI if it has a text segment containing A and DI hasn't already ++ been loaded. */ ++ ++void VG_(load_di)( DebugInfo *di, Addr a) ++{ ++ if (!di->deferred ++ || !di->text_present ++ || di->text_size <= 0 ++ || di->text_avma > a ++ || a >= di->text_avma + di->text_size) ++ return; ++ ++ di->deferred = False; ++ ML_(read_elf_debug) (di); ++ ML_(canonicaliseTables)( di ); ++ ++ /* Check invariants listed in ++ Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in ++ priv_storage.h. */ ++ check_CFSI_related_invariants(di); ++ ML_(finish_CFSI_arrays)(di); ++} ++ ++/* Attempt to load DebugInfo with a text segment containing A, ++ if such a debuginfo hasn't already been loaded. */ ++ ++void VG_(addr_load_di)( Addr a ) ++{ ++ DebugInfo *di; ++ ++ di = VG_(find_DebugInfo)(VG_(current_DiEpoch)(), a); ++ if (di != NULL) ++ if (di->deferred) { ++ di->deferred = False; ++ ML_(read_elf_debug) (di); ++ ML_(canonicaliseTables)( di ); ++ ++ /* Check invariants listed in ++ Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in ++ priv_storage.h. */ ++ check_CFSI_related_invariants(di); ++ ML_(finish_CFSI_arrays)(di); ++ } ++} + + /* Unmap is simpler - throw away any SegInfos intersecting + [a, a+len). */ +@@ -2746,6 +2792,11 @@ const HChar* VG_(describe_IP)(DiEpoch ep, Addr eip, const InlIPCursor *iipc) + Bool know_objname; + Bool know_srcloc; + ++ if (iipc && iipc->di) ++ VG_(load_di) (iipc->di, eip); ++ else ++ VG_(addr_load_di) (eip); ++ + if (is_bottom(iipc)) { + // At the bottom (towards main), we describe the fn at eip. + know_fnname = VG_(clo_sym_offsets) +@@ -3090,6 +3141,8 @@ static void find_DiCfSI ( /*OUT*/DebugInfo** diP, + if (!is_DI_valid_for_epoch(di, curr_epoch)) + continue; + ++ VG_(load_di)(di, ip); ++ + /* Use the per-DebugInfo summary address ranges to skip + inapplicable DebugInfos quickly. */ + if (di->cfsi_used == 0) +diff --git a/coregrind/m_debuginfo/priv_readelf.h b/coregrind/m_debuginfo/priv_readelf.h +index 57aa0cc3f..7e0fa17c9 100644 +--- a/coregrind/m_debuginfo/priv_readelf.h ++++ b/coregrind/m_debuginfo/priv_readelf.h +@@ -44,13 +44,23 @@ + extern Bool ML_(is_elf_object_file)( const void* image, SizeT n_image, + Bool rel_ok ); + +-/* The central function for reading ELF debug info. For the +- object/exe specified by the SegInfo, find ELF sections, then read +- the symbols, line number info, file name info, CFA (stack-unwind +- info) and anything else we want, into the tables within the +- supplied SegInfo. +-*/ +-extern Bool ML_(read_elf_debug_info) ( DebugInfo* di ); ++/* Read the ELF binary specified by DI. For the object/exe specified ++ by the SegInfo, find ELF sections, then read the symbols, line number ++ info, file name info, CFA (stack-unwind info) and anything else we ++ want, into the tables within the supplied SegInfo. ++ ++ .debug_* sections as well as any separate debuginfo files are not ++ loaded by this function but instead by ML_(read_elf_debug). This ++ separation facilitates lazy loading of debuginfo. */ ++extern Bool ML_(read_elf_object) ( DebugInfo* di ); ++ ++/* Read .debug_* sections from the ELF binary specified by DI. Also ++ attempt to load any separate debuginfo files associated with the ++ object. ++ ++ ML_(read_elf_object) should be called on DI before calling this ++ function. */ ++extern Bool ML_(read_elf_debug) ( DebugInfo* di ); + + extern Bool ML_(check_elf_and_get_rw_loads) ( Int fd, const HChar* filename, Int * rw_load_count ); + +diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h +index a4b90d36b..b959873ab 100644 +--- a/coregrind/m_debuginfo/priv_storage.h ++++ b/coregrind/m_debuginfo/priv_storage.h +@@ -678,6 +678,13 @@ struct _DebugInfo { + invalid and should not be consulted. */ + Bool have_dinfo; /* initially False */ + ++ /* If true then the reading of .debug_* section has been deferred ++ until it this information is required (such as when printing ++ a stacktrace). Additionally, if true then the reading of any ++ separate debuginfo files associated with this object has also ++ been deferred. */ ++ Bool deferred; ++ + /* All the rest of the fields in this structure are filled in once + we have committed to reading the symbols and debug info (that + is, at the point where .have_dinfo is set to True). */ +diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c +index ce7b7998d..f99d3dfd2 100644 +--- a/coregrind/m_debuginfo/readelf.c ++++ b/coregrind/m_debuginfo/readelf.c +@@ -1836,6 +1836,44 @@ static HChar* readlink_path (const HChar *path) + return buf; + } + ++#define FINDX_MIMG(_sec_name, _sec_escn, _post_fx) \ ++ do { \ ++ ElfXX_Shdr a_shdr; \ ++ ML_(img_get)(&a_shdr, mimg, \ ++ INDEX_BIS(shdr_mioff, i, shdr_ment_szB), \ ++ sizeof(a_shdr)); \ ++ if (0 == ML_(img_strcmp_c)(mimg, shdr_strtab_mioff \ ++ + a_shdr.sh_name, _sec_name)) { \ ++ Bool nobits; \ ++ _sec_escn.img = mimg; \ ++ _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \ ++ _sec_escn.szB = a_shdr.sh_size; \ ++ if (!check_compression(&a_shdr, &_sec_escn)) { \ ++ ML_(symerr)(di, True, " Compression type is unsupported"); \ ++ goto out; \ ++ } \ ++ nobits = a_shdr.sh_type == SHT_NOBITS; \ ++ vg_assert(_sec_escn.img != NULL); \ ++ vg_assert(_sec_escn.ioff != DiOffT_INVALID); \ ++ TRACE_SYMTAB( "%-18s: ioff %llu .. %llu\n", \ ++ _sec_name, (ULong)a_shdr.sh_offset, \ ++ ((ULong)a_shdr.sh_offset) + a_shdr.sh_size - 1); \ ++ /* SHT_NOBITS sections have zero size in the file. */ \ ++ if (!nobits && \ ++ a_shdr.sh_offset + \ ++ a_shdr.sh_size > ML_(img_real_size)(mimg)) { \ ++ ML_(symerr)(di, True, \ ++ " section beyond image end?!"); \ ++ goto out; \ ++ } \ ++ _post_fx; \ ++ } \ ++ } while (0); ++ ++/* Version with no post-effects */ ++#define FIND_MIMG(_sec_name, _sec_escn) \ ++ FINDX_MIMG(_sec_name, _sec_escn, /**/) ++ + /* The central function for reading ELF debug info. For the + object/exe specified by the DebugInfo, find ELF sections, then read + the symbols, line number info, file name info, CFA (stack-unwind +@@ -1843,7 +1881,7 @@ static HChar* readlink_path (const HChar *path) + supplied DebugInfo. + */ + +-Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) ++Bool ML_(read_elf_object) ( struct _DebugInfo* di ) + { + /* This function is long and complex. That, and the presence of + nested scopes, means it's not always easy to see which parts are +@@ -1874,7 +1912,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + + + /* TOPLEVEL */ +- Bool res, ok; ++ Bool ok; + Word i, j; + Bool dynbss_present = False; + Bool sdynbss_present = False; +@@ -1882,12 +1920,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + /* Image for the main ELF file we're working with. */ + DiImage* mimg = NULL; + +- /* Ditto for any ELF debuginfo file that we might happen to load. */ +- DiImage* dimg = NULL; +- +- /* Ditto for alternate ELF debuginfo file that we might happen to load. */ +- DiImage* aimg = NULL; +- + /* ELF header offset for the main file. Should be zero since the + ELF header is at start of file. */ + DiOffT ehdr_mioff = 0; +@@ -1970,8 +2002,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + got, plt, and toc. + ---------------------------------------------------------- */ + +- res = False; +- + if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)) + VG_(message)(Vg_DebugMsg, "Reading syms from %s\n", + di->fsm.filename ); +@@ -2056,7 +2086,7 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + shdr_strtab_mioff + = ehdr_mioff /* isn't this always zero? */ + a_shdr.sh_offset; + +- if (!ML_(img_valid)(mimg, shdr_strtab_mioff, ++ if (!ML_(img_valid)(mimg, shdr_strtab_mioff, + 1/*bogus, but we don't know the real size*/ )) { + ML_(symerr)(di, True, "Invalid ELF Section Header String Table"); + goto out; +@@ -2798,10 +2828,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + di->text_avma - di->text_bias, + di->text_avma ); + +- TRACE_SYMTAB("\n"); +- TRACE_SYMTAB("------ Finding image addresses " +- "for debug-info sections ------\n"); +- + /* TOPLEVEL */ + /* Find interesting sections, read the symbol table(s), read any + debug information. Each section is located either in the main, +@@ -2821,27 +2847,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + # if defined(VGO_solaris) + DiSlice ldynsym_escn = DiSlice_INVALID; // .SUNW_ldynsym + # endif +- DiSlice debuglink_escn = DiSlice_INVALID; // .gnu_debuglink +- DiSlice debugaltlink_escn = DiSlice_INVALID; // .gnu_debugaltlink +- DiSlice debug_line_escn = DiSlice_INVALID; // .debug_line (dwarf2) +- DiSlice debug_info_escn = DiSlice_INVALID; // .debug_info (dwarf2) +- DiSlice debug_types_escn = DiSlice_INVALID; // .debug_types (dwarf4) +- DiSlice debug_abbv_escn = DiSlice_INVALID; // .debug_abbrev (dwarf2) +- DiSlice debug_str_escn = DiSlice_INVALID; // .debug_str (dwarf2) +- DiSlice debug_line_str_escn = DiSlice_INVALID; // .debug_line_str(dwarf5) +- DiSlice debug_ranges_escn = DiSlice_INVALID; // .debug_ranges (dwarf2) +- DiSlice debug_rnglists_escn = DiSlice_INVALID; // .debug_rnglists(dwarf5) +- DiSlice debug_loclists_escn = DiSlice_INVALID; // .debug_loclists(dwarf5) +- DiSlice debug_addr_escn = DiSlice_INVALID; // .debug_addr (dwarf5) +- DiSlice debug_str_offsets_escn = DiSlice_INVALID; // .debug_str_offsets (dwarf5) +- DiSlice debug_loc_escn = DiSlice_INVALID; // .debug_loc (dwarf2) +- DiSlice debug_frame_escn = DiSlice_INVALID; // .debug_frame (dwarf2) +- DiSlice debug_line_alt_escn = DiSlice_INVALID; // .debug_line (alt) +- DiSlice debug_info_alt_escn = DiSlice_INVALID; // .debug_info (alt) +- DiSlice debug_abbv_alt_escn = DiSlice_INVALID; // .debug_abbrev (alt) +- DiSlice debug_str_alt_escn = DiSlice_INVALID; // .debug_str (alt) +- DiSlice dwarf1d_escn = DiSlice_INVALID; // .debug (dwarf1) +- DiSlice dwarf1l_escn = DiSlice_INVALID; // .line (dwarf1) + DiSlice opd_escn = DiSlice_INVALID; // .opd (dwarf2, + // ppc64be-linux) + DiSlice ehframe_escn[N_EHFRAME_SECTS]; // .eh_frame (dwarf2) +@@ -2868,118 +2873,282 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + /* TOPLEVEL */ + /* Iterate over section headers (again) */ + for (i = 0; i < ehdr_m.e_shnum; i++) { ++ /* NAME ElfSec */ ++ FIND_MIMG( ".dynsym", dynsym_escn) ++ FIND_MIMG( ".dynstr", dynstr_escn) ++ FIND_MIMG( ".symtab", symtab_escn) ++ FIND_MIMG( ".strtab", strtab_escn) ++# if defined(VGO_solaris) ++ FIND_MIMG( ".SUNW_ldynsym", ldynsym_escn) ++# endif + +-# define FINDX(_sec_name, _sec_escn, _post_fx) \ +- do { \ +- ElfXX_Shdr a_shdr; \ +- ML_(img_get)(&a_shdr, mimg, \ +- INDEX_BIS(shdr_mioff, i, shdr_ment_szB), \ +- sizeof(a_shdr)); \ +- if (0 == ML_(img_strcmp_c)(mimg, shdr_strtab_mioff \ +- + a_shdr.sh_name, _sec_name)) { \ +- Bool nobits; \ +- _sec_escn.img = mimg; \ +- _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \ +- _sec_escn.szB = a_shdr.sh_size; \ +- if (!check_compression(&a_shdr, &_sec_escn)) { \ +- ML_(symerr)(di, True, " Compression type is unsupported"); \ +- goto out; \ +- } \ +- nobits = a_shdr.sh_type == SHT_NOBITS; \ +- vg_assert(_sec_escn.img != NULL); \ +- vg_assert(_sec_escn.ioff != DiOffT_INVALID); \ +- TRACE_SYMTAB( "%-18s: ioff %llu .. %llu\n", \ +- _sec_name, (ULong)a_shdr.sh_offset, \ +- ((ULong)a_shdr.sh_offset) + a_shdr.sh_size - 1); \ +- /* SHT_NOBITS sections have zero size in the file. */ \ +- if (!nobits && \ +- a_shdr.sh_offset + \ +- a_shdr.sh_size > ML_(img_real_size)(mimg)) { \ +- ML_(symerr)(di, True, \ +- " section beyond image end?!"); \ +- goto out; \ +- } \ +- _post_fx; \ +- } \ +- } while (0); ++ FINDX_MIMG( ".eh_frame", ehframe_escn[ehframe_mix], ++ do { ehframe_mix++; vg_assert(ehframe_mix <= N_EHFRAME_SECTS); ++ } while (0) ++ ) ++ /* Comment_on_EH_FRAME_MULTIPLE_INSTANCES: w.r.t. .eh_frame ++ multi-instance kludgery, how are we assured that the order ++ in which we fill in ehframe_escn[] is consistent with the ++ order in which we previously filled in di->ehframe_avma[] ++ and di->ehframe_size[] ? By the fact that in both cases, ++ these arrays were filled in by iterating over the section ++ headers top-to-bottom. So both loops (this one and the ++ previous one) encounter the .eh_frame entries in the same ++ order and so fill in these arrays in a consistent order. ++ */ ++ } /* Iterate over section headers (again) */ + +- /* Version with no post-effects */ +-# define FIND(_sec_name, _sec_escn) \ +- FINDX(_sec_name, _sec_escn, /**/) ++ /* Check some sizes */ ++ vg_assert((dynsym_escn.szB % sizeof(ElfXX_Sym)) == 0); ++ vg_assert((symtab_escn.szB % sizeof(ElfXX_Sym)) == 0); ++# if defined(VGO_solaris) ++ vg_assert((ldynsym_escn.szB % sizeof(ElfXX_Sym)) == 0); ++# endif + +- /* NAME ElfSec */ +- FIND( ".dynsym", dynsym_escn) +- FIND( ".dynstr", dynstr_escn) +- FIND( ".symtab", symtab_escn) +- FIND( ".strtab", strtab_escn) ++ /* Read symbols */ ++ { ++ void (*read_elf_symtab)(struct _DebugInfo*, const HChar*, ++ DiSlice*, DiSlice*, DiSlice*, Bool); ++# if defined(VGP_ppc64be_linux) ++ read_elf_symtab = read_elf_symtab__ppc64be_linux; ++# else ++ read_elf_symtab = read_elf_symtab__normal; ++# endif ++ if (symtab_escn.img != NULL) ++ read_elf_symtab(di, "symbol table", ++ &symtab_escn, &strtab_escn, &opd_escn, ++ False); ++ read_elf_symtab(di, "dynamic symbol table", ++ &dynsym_escn, &dynstr_escn, &opd_escn, ++ False); + # if defined(VGO_solaris) +- FIND( ".SUNW_ldynsym", ldynsym_escn) ++ read_elf_symtab(di, "local dynamic symbol table", ++ &ldynsym_escn, &dynstr_escn, &opd_escn, ++ False); + # endif ++ } + +- FIND( ".gnu_debuglink", debuglink_escn) +- FIND( ".gnu_debugaltlink", debugaltlink_escn) ++ /* TOPLEVEL */ ++ /* Read .eh_frame and .debug_frame (call-frame-info) if any. Do ++ the .eh_frame section(s) first. */ ++ vg_assert(di->n_ehframe >= 0 && di->n_ehframe <= N_EHFRAME_SECTS); ++ for (i = 0; i < di->n_ehframe; i++) { ++ /* see Comment_on_EH_FRAME_MULTIPLE_INSTANCES above for why ++ this next assertion should hold. */ ++ vg_assert(ML_(sli_is_valid)(ehframe_escn[i])); ++ vg_assert(ehframe_escn[i].szB == di->ehframe_size[i]); ++ ML_(read_callframe_info_dwarf3)( di, ++ ehframe_escn[i], ++ di->ehframe_avma[i], ++ True/*is_ehframe*/ ); ++ } ++ } ++ ++ return True; ++ ++ out: ++ { ++ /* Last, but not least, detach from the image. */ ++ if (mimg) ML_(img_done)(mimg); ++ ++ if (svma_ranges) VG_(deleteXA)(svma_ranges); + +- FIND( ".debug_line", debug_line_escn) ++ return False; ++ } /* out: */ ++ ++ /* NOTREACHED */ ++} ++ ++Bool ML_(read_elf_debug) ( struct _DebugInfo* di ) ++{ ++ Word i, j; ++ Bool res = True; ++ Bool ok; ++ ++ /* Image for the main ELF file we're working with. */ ++ DiImage* mimg = NULL; ++ ++ /* Ditto for any ELF debuginfo file that we might happen to load. */ ++ DiImage* dimg = NULL; ++ ++ /* Ditto for alternate ELF debuginfo file that we might happen to load. */ ++ DiImage* aimg = NULL; ++ ++ /* Section header image addr, # entries, entry size. Also the ++ associated string table. */ ++ DiOffT shdr_mioff = 0; ++ UWord shdr_mnent = 0; ++ UWord shdr_ment_szB = 0; ++ DiOffT shdr_strtab_mioff = 0; ++ ++ DiOffT ehdr_mioff = 0; ++ ++ /* Connect to the primary object image, so that we can read symbols ++ and line number info out of it. It will be disconnected ++ immediately thereafter; it is only connected transiently. */ ++ mimg = ML_(img_from_local_file)(di->fsm.filename); ++ if (mimg == NULL) { ++ VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n", ++ di->fsm.filename ); ++ VG_(message)(Vg_UserMsg, " no debug info loaded\n" ); ++ return False; ++ } ++ ++ /* Ok, the object image is available. Now verify that it is a ++ valid ELF .so or executable image. */ ++ ok = is_elf_object_file_by_DiImage(mimg, False); ++ if (!ok) { ++ ML_(symerr)(di, True, "Invalid ELF Header"); ++ goto out; ++ } ++ ++ /* Find where the program and section header tables are, and give ++ up if either is missing or outside the image (bogus). */ ++ ElfXX_Ehdr ehdr_m; ++ vg_assert(ehdr_mioff == 0); // ensured by its initialisation ++ ok = ML_(img_valid)(mimg, ehdr_mioff, sizeof(ehdr_m)); ++ vg_assert(ok); // ML_(is_elf_object_file) should ensure this ++ ML_(img_get)(&ehdr_m, mimg, ehdr_mioff, sizeof(ehdr_m)); ++ ++ shdr_mioff = ehdr_mioff + ehdr_m.e_shoff; ++ shdr_mnent = ehdr_m.e_shnum; ++ shdr_ment_szB = ehdr_m.e_shentsize; ++ ++ if (shdr_mnent == 0 ++ || !ML_(img_valid)(mimg, shdr_mioff, shdr_mnent * shdr_ment_szB)) { ++ ML_(symerr)(di, True, "Missing or invalid ELF Section Header Table"); ++ goto out; ++ } ++ ++ /* Also find the section header's string table, and validate. */ ++ /* checked previously by is_elf_object_file: */ ++ vg_assert(ehdr_m.e_shstrndx != SHN_UNDEF); ++ ++ // shdr_mioff is the offset of the section header table ++ // and we need the ehdr_m.e_shstrndx'th entry ++ { ElfXX_Shdr a_shdr; ++ ML_(img_get)(&a_shdr, mimg, ++ INDEX_BIS(shdr_mioff, ehdr_m.e_shstrndx, shdr_ment_szB), ++ sizeof(a_shdr)); ++ shdr_strtab_mioff ++ = ehdr_mioff /* isn't this always zero? */ + a_shdr.sh_offset; ++ ++ if (!ML_(img_valid)(mimg, shdr_strtab_mioff, ++ 1/*bogus, but we don't know the real size*/ )) { ++ ML_(symerr)(di, True, "Invalid ELF Section Header String Table"); ++ goto out; ++ } ++ } ++ ++ TRACE_SYMTAB("\n"); ++ TRACE_SYMTAB("------ Finding image addresses " ++ "for debug-info sections ------\n"); ++ /* TOPLEVEL */ ++ /* Find interesting sections, read the symbol table(s), read any ++ debug information. Each section is located either in the main, ++ debug or alt-debug files, but only in one. For each section, ++ |section_escn| records which of |mimg|, |dimg| or |aimg| we ++ found it in, along with the section's image offset and its size. ++ The triples (section_img, section_ioff, section_szB) are ++ consistent, in that they are always either (NULL, ++ DiOffT_INVALID, 0), or refer to the same image, and are all ++ assigned together. */ ++ ++ { ++ /* TOPLEVEL */ ++ DiSlice strtab_escn = DiSlice_INVALID; // .strtab ++ DiSlice symtab_escn = DiSlice_INVALID; // .symtab ++ DiSlice debuglink_escn = DiSlice_INVALID; // .gnu_debuglink ++ DiSlice debugaltlink_escn = DiSlice_INVALID; // .gnu_debugaltlink ++ DiSlice debug_line_escn = DiSlice_INVALID; // .debug_line (dwarf2) ++ DiSlice debug_info_escn = DiSlice_INVALID; // .debug_info (dwarf2) ++ DiSlice debug_types_escn = DiSlice_INVALID; // .debug_types (dwarf4) ++ DiSlice debug_abbv_escn = DiSlice_INVALID; // .debug_abbrev (dwarf2) ++ DiSlice debug_str_escn = DiSlice_INVALID; // .debug_str (dwarf2) ++ DiSlice debug_line_str_escn = DiSlice_INVALID; // .debug_line_str(dwarf5) ++ DiSlice debug_ranges_escn = DiSlice_INVALID; // .debug_ranges (dwarf2) ++ DiSlice debug_rnglists_escn = DiSlice_INVALID; // .debug_rnglists(dwarf5) ++ DiSlice debug_loclists_escn = DiSlice_INVALID; // .debug_loclists(dwarf5) ++ DiSlice debug_addr_escn = DiSlice_INVALID; // .debug_addr (dwarf5) ++ DiSlice debug_str_offsets_escn = DiSlice_INVALID; // .debug_str_offsets (dwarf5) ++ DiSlice debug_loc_escn = DiSlice_INVALID; // .debug_loc (dwarf2) ++ DiSlice debug_frame_escn = DiSlice_INVALID; // .debug_frame (dwarf2) ++ DiSlice debug_line_alt_escn = DiSlice_INVALID; // .debug_line (alt) ++ DiSlice debug_info_alt_escn = DiSlice_INVALID; // .debug_info (alt) ++ DiSlice debug_abbv_alt_escn = DiSlice_INVALID; // .debug_abbrev (alt) ++ DiSlice debug_str_alt_escn = DiSlice_INVALID; // .debug_str (alt) ++ DiSlice dwarf1d_escn = DiSlice_INVALID; // .debug (dwarf1) ++ DiSlice dwarf1l_escn = DiSlice_INVALID; // .line (dwarf1) ++ DiSlice opd_escn = DiSlice_INVALID; // .opd (dwarf2, ++ // ppc64be-linux) ++ ++ /* TOPLEVEL */ ++ /* Iterate over section headers (again) */ ++ for (i = 0; i < ehdr_m.e_shnum; i++) { ++ ++ /* NAME ElfSec */ ++ FIND_MIMG( ".symtab", symtab_escn) ++ FIND_MIMG( ".strtab", strtab_escn) ++ FIND_MIMG( ".gnu_debuglink", debuglink_escn) ++ FIND_MIMG( ".gnu_debugaltlink", debugaltlink_escn) ++ ++ FIND_MIMG( ".debug_line", debug_line_escn) + if (!ML_(sli_is_valid)(debug_line_escn)) +- FIND(".zdebug_line", debug_line_escn) ++ FIND_MIMG(".zdebug_line", debug_line_escn) + +- FIND( ".debug_info", debug_info_escn) ++ FIND_MIMG( ".debug_info", debug_info_escn) + if (!ML_(sli_is_valid)(debug_info_escn)) +- FIND(".zdebug_info", debug_info_escn) ++ FIND_MIMG(".zdebug_info", debug_info_escn) + +- FIND( ".debug_types", debug_types_escn) ++ FIND_MIMG( ".debug_types", debug_types_escn) + if (!ML_(sli_is_valid)(debug_types_escn)) +- FIND(".zdebug_types", debug_types_escn) ++ FIND_MIMG(".zdebug_types", debug_types_escn) + +- FIND( ".debug_abbrev", debug_abbv_escn) ++ FIND_MIMG( ".debug_abbrev", debug_abbv_escn) + if (!ML_(sli_is_valid)(debug_abbv_escn)) +- FIND(".zdebug_abbrev", debug_abbv_escn) ++ FIND_MIMG(".zdebug_abbrev", debug_abbv_escn) + +- FIND( ".debug_str", debug_str_escn) ++ FIND_MIMG( ".debug_str", debug_str_escn) + if (!ML_(sli_is_valid)(debug_str_escn)) +- FIND(".zdebug_str", debug_str_escn) ++ FIND_MIMG(".zdebug_str", debug_str_escn) + +- FIND( ".debug_line_str", debug_line_str_escn) ++ FIND_MIMG( ".debug_line_str", debug_line_str_escn) + if (!ML_(sli_is_valid)(debug_line_str_escn)) +- FIND(".zdebug_str", debug_line_str_escn) ++ FIND_MIMG(".zdebug_str", debug_line_str_escn) + +- FIND( ".debug_ranges", debug_ranges_escn) ++ FIND_MIMG( ".debug_ranges", debug_ranges_escn) + if (!ML_(sli_is_valid)(debug_ranges_escn)) +- FIND(".zdebug_ranges", debug_ranges_escn) ++ FIND_MIMG(".zdebug_ranges", debug_ranges_escn) + +- FIND( ".debug_rnglists", debug_rnglists_escn) ++ FIND_MIMG( ".debug_rnglists", debug_rnglists_escn) + if (!ML_(sli_is_valid)(debug_rnglists_escn)) +- FIND(".zdebug_rnglists", debug_rnglists_escn) ++ FIND_MIMG(".zdebug_rnglists", debug_rnglists_escn) + +- FIND( ".debug_loclists", debug_loclists_escn) ++ FIND_MIMG( ".debug_loclists", debug_loclists_escn) + if (!ML_(sli_is_valid)(debug_loclists_escn)) +- FIND(".zdebug_loclists", debug_loclists_escn) ++ FIND_MIMG(".zdebug_loclists", debug_loclists_escn) + +- FIND( ".debug_loc", debug_loc_escn) ++ FIND_MIMG( ".debug_loc", debug_loc_escn) + if (!ML_(sli_is_valid)(debug_loc_escn)) +- FIND(".zdebug_loc", debug_loc_escn) ++ FIND_MIMG(".zdebug_loc", debug_loc_escn) + +- FIND( ".debug_frame", debug_frame_escn) ++ FIND_MIMG( ".debug_frame", debug_frame_escn) + if (!ML_(sli_is_valid)(debug_frame_escn)) +- FIND(".zdebug_frame", debug_frame_escn) ++ FIND_MIMG(".zdebug_frame", debug_frame_escn) + +- FIND( ".debug_addr", debug_addr_escn) ++ FIND_MIMG( ".debug_addr", debug_addr_escn) + if (!ML_(sli_is_valid)(debug_addr_escn)) +- FIND(".zdebug_addr", debug_addr_escn) ++ FIND_MIMG(".zdebug_addr", debug_addr_escn) + +- FIND( ".debug_str_offsets", debug_str_offsets_escn) ++ FIND_MIMG( ".debug_str_offsets", debug_str_offsets_escn) + if (!ML_(sli_is_valid)(debug_str_offsets_escn)) +- FIND(".zdebug_str_offsets", debug_str_offsets_escn) ++ FIND_MIMG(".zdebug_str_offsets", debug_str_offsets_escn) + +- FIND( ".debug", dwarf1d_escn) +- FIND( ".line", dwarf1l_escn) ++ FIND_MIMG( ".debug", dwarf1d_escn) ++ FIND_MIMG( ".line", dwarf1l_escn) + +- FIND( ".opd", opd_escn) ++ FIND_MIMG( ".opd", opd_escn) + +- FINDX( ".eh_frame", ehframe_escn[ehframe_mix], +- do { ehframe_mix++; vg_assert(ehframe_mix <= N_EHFRAME_SECTS); +- } while (0) +- ) + /* Comment_on_EH_FRAME_MULTIPLE_INSTANCES: w.r.t. .eh_frame + multi-instance kludgery, how are we assured that the order + in which we fill in ehframe_escn[] is consistent with the +@@ -2991,8 +3160,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + order and so fill in these arrays in a consistent order. + */ + +-# undef FINDX +-# undef FIND + } /* Iterate over section headers (again) */ + + /* TOPLEVEL */ +@@ -3465,53 +3632,23 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + } /* Find all interesting sections */ + } /* do we have a debug image? */ + +- + /* TOPLEVEL */ +- /* Check some sizes */ +- vg_assert((dynsym_escn.szB % sizeof(ElfXX_Sym)) == 0); + vg_assert((symtab_escn.szB % sizeof(ElfXX_Sym)) == 0); +-# if defined(VGO_solaris) +- vg_assert((ldynsym_escn.szB % sizeof(ElfXX_Sym)) == 0); +-# endif + + /* TOPLEVEL */ + /* Read symbols */ + { + void (*read_elf_symtab)(struct _DebugInfo*, const HChar*, + DiSlice*, DiSlice*, DiSlice*, Bool); +- Bool symtab_in_debug; + # if defined(VGP_ppc64be_linux) + read_elf_symtab = read_elf_symtab__ppc64be_linux; + # else + read_elf_symtab = read_elf_symtab__normal; + # endif +- symtab_in_debug = symtab_escn.img == dimg; +- read_elf_symtab(di, "symbol table", +- &symtab_escn, &strtab_escn, &opd_escn, +- symtab_in_debug); +- read_elf_symtab(di, "dynamic symbol table", +- &dynsym_escn, &dynstr_escn, &opd_escn, +- False); +-# if defined(VGO_solaris) +- read_elf_symtab(di, "local dynamic symbol table", +- &ldynsym_escn, &dynstr_escn, &opd_escn, +- False); +-# endif +- } +- +- /* TOPLEVEL */ +- /* Read .eh_frame and .debug_frame (call-frame-info) if any. Do +- the .eh_frame section(s) first. */ +- vg_assert(di->n_ehframe >= 0 && di->n_ehframe <= N_EHFRAME_SECTS); +- for (i = 0; i < di->n_ehframe; i++) { +- /* see Comment_on_EH_FRAME_MULTIPLE_INSTANCES above for why +- this next assertion should hold. */ +- vg_assert(ML_(sli_is_valid)(ehframe_escn[i])); +- vg_assert(ehframe_escn[i].szB == di->ehframe_size[i]); +- ML_(read_callframe_info_dwarf3)( di, +- ehframe_escn[i], +- di->ehframe_avma[i], +- True/*is_ehframe*/ ); ++ if (symtab_escn.img != NULL) ++ read_elf_symtab(di, "symbol table", ++ &symtab_escn, &strtab_escn, &opd_escn, ++ True); + } + if (ML_(sli_is_valid)(debug_frame_escn)) { + ML_(read_callframe_info_dwarf3)( di, +@@ -3643,8 +3780,6 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + if (dimg) ML_(img_done)(dimg); + if (aimg) ML_(img_done)(aimg); + +- if (svma_ranges) VG_(deleteXA)(svma_ranges); +- + return res; + } /* out: */ + +diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c +index c3fa62e96..3ad114607 100644 +--- a/coregrind/m_debuginfo/storage.c ++++ b/coregrind/m_debuginfo/storage.c +@@ -1297,8 +1297,7 @@ void ML_(addVar)( struct _DebugInfo* di, + that those extra sections have the same bias as .text, but that + seems a reasonable assumption to me. */ + /* This is assured us by top level steering logic in debuginfo.c, +- and it is re-checked at the start of +- ML_(read_elf_debug_info). */ ++ and it is re-checked at the start of ML_(read_elf_object). */ + vg_assert(di->fsm.have_rx_map && di->fsm.rw_map_count); + if (level > 0 && ML_(find_rx_mapping)(di, aMin, aMax) == NULL) { + if (VG_(clo_verbosity) > 1) { +@@ -1725,7 +1724,6 @@ static void canonicaliseSymtab ( struct _DebugInfo* di ) + for (i = 0; i < di->symtab_used; i++) { + DiSym* sym = &di->symtab[i]; + vg_assert(sym->pri_name); +- vg_assert(!sym->sec_names); + } + + /* Sort by address. */ +@@ -2383,6 +2381,9 @@ void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di ) + vg_assert (f_holes == n_holes); + vg_assert (pos == new_used); + ++ if (di->deferred) ++ return; ++ + di->cfsi_used = new_used; + di->cfsi_size = new_used; + ML_(dinfo_free) (di->cfsi_rd); +@@ -2398,9 +2399,13 @@ void ML_(canonicaliseTables) ( struct _DebugInfo* di ) + canonicaliseLoctab ( di ); + canonicaliseInltab ( di ); + ML_(canonicaliseCFI) ( di ); ++ canonicaliseVarInfo ( di ); ++ ++ if (di->deferred) ++ return; ++ + if (di->cfsi_m_pool) + VG_(freezeDedupPA) (di->cfsi_m_pool, ML_(dinfo_shrink_block)); +- canonicaliseVarInfo ( di ); + if (di->strpool) + VG_(freezeDedupPA) (di->strpool, ML_(dinfo_shrink_block)); + if (di->fndnpool) +diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h +index 938ed00cc..ce7246217 100644 +--- a/coregrind/pub_core_debuginfo.h ++++ b/coregrind/pub_core_debuginfo.h +@@ -76,6 +76,10 @@ extern void VG_(di_notify_pdb_debuginfo)( Int fd, Addr avma, + extern void VG_(di_notify_vm_protect)( Addr a, SizeT len, UInt prot ); + #endif + ++extern void VG_(addr_load_di)( Addr a ); ++ ++extern void VG_(load_di)( DebugInfo *di, Addr a ); ++ + extern void VG_(di_discard_ALL_debuginfo)( void ); + + /* Like VG_(get_fnname), but it does not do C++ demangling nor Z-demangling +-- +2.41.0 + diff --git a/valgrind.spec b/valgrind.spec index c9b719f..d630003 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -3,7 +3,7 @@ Summary: Dynamic analysis tools to detect memory or thread bugs and profile Name: %{?scl_prefix}valgrind Version: 3.21.0 -Release: 9%{?dist} +Release: 10%{?dist} Epoch: 1 License: GPLv2+ URL: https://www.valgrind.org/ @@ -116,6 +116,12 @@ Patch12: valgrind-3.21.0-pgste.patch # https://bugs.kde.org/show_bug.cgi?id=471311 Patch13: valgrind-3.21.0-gdb-multi-mode-stdout-redirecting-to-stderr.patch +# Add support for lazy reading and downloading of DWARF debuginfo +# https://bugs.kde.org/show_bug.cgi?id=471807 +# Plus fixup commit a0d555a0dfe078ef04ea49d991a8090ab14bd4a5 +Patch14: valgrind-3.21.0-lazy-debuginfo.patch +Patch15: valgrind-3.21.0-cleanup-read_elf_object.patch + BuildRequires: make BuildRequires: glibc-devel @@ -262,6 +268,8 @@ Valgrind User Manual for details. %patch -P11 -p1 %patch -P12 -p1 %patch -P13 -p1 +%patch -P14 -p1 +%patch -P15 -p1 %build @@ -495,6 +503,10 @@ fi %endif %changelog +* Mon Aug 21 2023 Mark Wielaard - 3.21.0-10 +- Add valgrind-3.21.0-lazy-debuginfo.patch +- Add valgrind-3.21.0-cleanup-read_elf_object.patch + * Thu Aug 17 2023 Mark Wielaard - 3.21.0-9 - Add valgrind-3.21.0-gdb-multi-mode-stdout-redirecting-to-stderr.patch - Use %%patch -Pn instead of deprecated %%patchn