diff --git a/kexec-tools-2.0.29-makedumpfile-Fix-a-data-race-in-multi-threading-mode-num.patch b/kexec-tools-2.0.29-makedumpfile-Fix-a-data-race-in-multi-threading-mode-num.patch new file mode 100644 index 0000000..9a2b900 --- /dev/null +++ b/kexec-tools-2.0.29-makedumpfile-Fix-a-data-race-in-multi-threading-mode-num.patch @@ -0,0 +1,120 @@ +From 65bf4c9ef0fd0cbf2fb99b60e15b00d984b391b8 Mon Sep 17 00:00:00 2001 +From: Tao Liu +Date: Wed, 25 Jun 2025 14:23:44 +1200 +Subject: [PATCH] [PATCH v2] Fix a data race in multi-threading mode + (--num-threads=N) + +A vmcore corrupt issue has been noticed in powerpc arch [1]. It can be +reproduced with upstream makedumpfile. + +When analyzing the corrupt vmcore using crash, the following error +message will output: + + crash: compressed kdump: uncompress failed: 0 + crash: read error: kernel virtual address: c0001e2d2fe48000 type: + "hardirq thread_union" + crash: cannot read hardirq_ctx[930] at c0001e2d2fe48000 + crash: compressed kdump: uncompress failed: 0 + +If the vmcore is generated without num-threads option, then no such +errors are noticed. + +With --num-threads=N enabled, there will be N sub-threads created. All +sub-threads are producers which responsible for mm page processing, e.g. +compression. The main thread is the consumer which responsible for +writing the compressed data into file. page_flag_buf->ready is used to +sync main and sub-threads. When a sub-thread finishes page processing, +it will set ready flag to be FLAG_READY. In the meantime, main thread +looply check all threads of the ready flags, and break the loop when +find FLAG_READY. + +page_flag_buf->ready is read/write by main/sub-threads simultaneously, +but it is unprotected and unsafe. I have tested both mutex and atomic_rw +can fix this issue. This patch takes atomic_rw for its simplicity. + +[1]: https://github.com/makedumpfile/makedumpfile/issues/15 + +Resolves: https://github.com/makedumpfile/makedumpfile/issues/15 +Tested-by: Sourabh Jain +Signed-off-by: Tao Liu +--- + makedumpfile.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/makedumpfile.c b/makedumpfile.c +index 4e087ee..12fb0d8 100644 +--- a/makedumpfile-1.7.6/makedumpfile.c ++++ b/makedumpfile-1.7.6/makedumpfile.c +@@ -8635,7 +8635,8 @@ kdump_thread_function_cyclic(void *arg) { + + while (buf_ready == FALSE) { + pthread_testcancel(); +- if (page_flag_buf->ready == FLAG_READY) ++ if (__atomic_load_n(&page_flag_buf->ready, ++ __ATOMIC_SEQ_CST) == FLAG_READY) + continue; + + /* get next dumpable pfn */ +@@ -8651,7 +8652,8 @@ kdump_thread_function_cyclic(void *arg) { + info->current_pfn = pfn + 1; + + page_flag_buf->pfn = pfn; +- page_flag_buf->ready = FLAG_FILLING; ++ __atomic_store_n(&page_flag_buf->ready, FLAG_FILLING, ++ __ATOMIC_SEQ_CST); + pthread_mutex_unlock(&info->current_pfn_mutex); + sem_post(&info->page_flag_buf_sem); + +@@ -8740,7 +8742,8 @@ kdump_thread_function_cyclic(void *arg) { + page_flag_buf->index = index; + buf_ready = TRUE; + next: +- page_flag_buf->ready = FLAG_READY; ++ __atomic_store_n(&page_flag_buf->ready, FLAG_READY, ++ __ATOMIC_SEQ_CST); + page_flag_buf = page_flag_buf->next; + + } +@@ -8869,7 +8872,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, + * current_pfn is used for recording the value of pfn when checking the pfn. + */ + for (i = 0; i < info->num_threads; i++) { +- if (info->page_flag_buf[i]->ready == FLAG_UNUSED) ++ if (__atomic_load_n(&info->page_flag_buf[i]->ready, ++ __ATOMIC_SEQ_CST) == FLAG_UNUSED) + continue; + temp_pfn = info->page_flag_buf[i]->pfn; + +@@ -8877,7 +8881,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, + * count how many threads have reached the end. + */ + if (temp_pfn >= end_pfn) { +- info->page_flag_buf[i]->ready = FLAG_UNUSED; ++ __atomic_store_n(&info->page_flag_buf[i]->ready, ++ FLAG_UNUSED, __ATOMIC_SEQ_CST); + end_count++; + continue; + } +@@ -8899,7 +8904,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, + * If the page_flag_buf is not ready, the pfn recorded may be changed. + * So we should recheck. + */ +- if (info->page_flag_buf[consuming]->ready != FLAG_READY) { ++ if (__atomic_load_n(&info->page_flag_buf[consuming]->ready, ++ __ATOMIC_SEQ_CST) != FLAG_READY) { + clock_gettime(CLOCK_MONOTONIC, &new); + if (new.tv_sec - last.tv_sec > WAIT_TIME) { + ERRMSG("Can't get data of pfn.\n"); +@@ -8941,7 +8947,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, + goto out; + page_data_buf[index].used = FALSE; + } +- info->page_flag_buf[consuming]->ready = FLAG_UNUSED; ++ __atomic_store_n(&info->page_flag_buf[consuming]->ready, ++ FLAG_UNUSED, __ATOMIC_SEQ_CST); + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; + } + finish: +-- +2.49.0 + diff --git a/kexec-tools.spec b/kexec-tools.spec index 4d771e4..ed7f905 100644 --- a/kexec-tools.spec +++ b/kexec-tools.spec @@ -115,6 +115,7 @@ Requires: systemd-udev%{?_isa} # # Patches 601 onward are generic patches # +Patch601: kexec-tools-2.0.29-makedumpfile-Fix-a-data-race-in-multi-threading-mode-num.patch %description kexec-tools provides /sbin/kexec binary that facilitates a new @@ -130,6 +131,8 @@ mkdir -p -m755 kcp tar -z -x -v -f %{SOURCE9} tar -z -x -v -f %{SOURCE19} +%patch601 -p1 + %ifarch ppc %define archdef ARCH=ppc %endif