227 lines
8.8 KiB
Diff
227 lines
8.8 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Stefan Berger <stefanb@linux.ibm.com>
|
||
|
Date: Tue, 25 Jul 2023 13:23:10 -0400
|
||
|
Subject: [PATCH] kern/ieee1275/init: ppc64: Restrict high memory in presence
|
||
|
of fadump
|
||
|
|
||
|
When a kernel dump is present then restrict the high memory regions to
|
||
|
avoid allocating memory where the kernel dump resides. Use the
|
||
|
ibm,kernel-dump node under /rtas to determine whether a kernel dump exists
|
||
|
and up to which limit grub can use available memory. Set the
|
||
|
upper_mem_limit to the size of the kernel dump section of type
|
||
|
'REAL_MODE_REGION' and therefore only allow grub's memory usage for high
|
||
|
addresses from RMO_ADDR_MAX to 'upper_mem_limit'. This means that grub can
|
||
|
use high memory in the range of RMO_ADDR_MAX (768MB) to upper_mem_limit and
|
||
|
the kernel-dump memory regions above 'upper_mem_limit' remain untouched.
|
||
|
This change has no effect on memory allocations below 'linux_rmo_save'
|
||
|
(typically at 640MB).
|
||
|
|
||
|
Also, fall back to allocating below rmo_linux_save in case the chunk of
|
||
|
memory there would be larger than the chunk of memory above RMO_ADDR_MAX.
|
||
|
This can for example occur if a free memory area is found starting at 300MB
|
||
|
extending up to 1GB but a kernel dump is located at 768MB and therefore
|
||
|
does not allow the allocation of the high memory area but requiring to use
|
||
|
the chunk starting at 300MB to avoid an unnecessary out-of-memory
|
||
|
condition.
|
||
|
|
||
|
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
|
||
|
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
|
||
|
Cc: Pavithra Prakash <pavrampu@in.ibm.com>
|
||
|
Cc: Michael Ellerman <mpe@ellerman.id.au>
|
||
|
Cc: Carolyn Scherrer <cpscherr@us.ibm.com>
|
||
|
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||
|
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
|
||
|
---
|
||
|
grub-core/kern/ieee1275/init.c | 144 ++++++++++++++++++++++++++++++++++++++++-
|
||
|
1 file changed, 142 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/grub-core/kern/ieee1275/init.c b/grub-core/kern/ieee1275/init.c
|
||
|
index 3d4ad9d1f162..8e7f742fad46 100644
|
||
|
--- a/grub-core/kern/ieee1275/init.c
|
||
|
+++ b/grub-core/kern/ieee1275/init.c
|
||
|
@@ -17,6 +17,8 @@
|
||
|
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
+#include <stddef.h> /* offsetof() */
|
||
|
+
|
||
|
#include <grub/kernel.h>
|
||
|
#include <grub/dl.h>
|
||
|
#include <grub/disk.h>
|
||
|
@@ -198,6 +200,96 @@ grub_claim_heap (void)
|
||
|
#else
|
||
|
/* Helpers for mm on powerpc. */
|
||
|
|
||
|
+/* ibm,kernel-dump data structures */
|
||
|
+struct kd_section
|
||
|
+{
|
||
|
+ grub_uint32_t flags;
|
||
|
+ grub_uint16_t src_datatype;
|
||
|
+#define KD_SRC_DATATYPE_REAL_MODE_REGION 0x0011
|
||
|
+ grub_uint16_t error_flags;
|
||
|
+ grub_uint64_t src_address;
|
||
|
+ grub_uint64_t num_bytes;
|
||
|
+ grub_uint64_t act_bytes;
|
||
|
+ grub_uint64_t dst_address;
|
||
|
+} GRUB_PACKED;
|
||
|
+
|
||
|
+#define MAX_KD_SECTIONS 10
|
||
|
+
|
||
|
+struct kernel_dump
|
||
|
+{
|
||
|
+ grub_uint32_t format;
|
||
|
+ grub_uint16_t num_sections;
|
||
|
+ grub_uint16_t status_flags;
|
||
|
+ grub_uint32_t offset_1st_section;
|
||
|
+ grub_uint32_t num_blocks;
|
||
|
+ grub_uint64_t start_block;
|
||
|
+ grub_uint64_t num_blocks_avail;
|
||
|
+ grub_uint32_t offet_path_string;
|
||
|
+ grub_uint32_t max_time_allowed;
|
||
|
+ struct kd_section kds[MAX_KD_SECTIONS]; /* offset_1st_section should point to kds[0] */
|
||
|
+} GRUB_PACKED;
|
||
|
+
|
||
|
+/*
|
||
|
+ * Determine if a kernel dump exists and if it does, then determine the highest
|
||
|
+ * address that grub can use for memory allocations.
|
||
|
+ * The caller must have initialized *highest to rmo_top. *highest will not
|
||
|
+ * be modified if no kernel dump is found.
|
||
|
+ */
|
||
|
+static void
|
||
|
+check_kernel_dump (grub_uint64_t *highest)
|
||
|
+{
|
||
|
+ struct kernel_dump kernel_dump;
|
||
|
+ grub_ssize_t kernel_dump_size;
|
||
|
+ grub_ieee1275_phandle_t rtas;
|
||
|
+ struct kd_section *kds;
|
||
|
+ grub_size_t i;
|
||
|
+
|
||
|
+ /* If there's a kernel-dump it must have at least one section */
|
||
|
+ if (grub_ieee1275_finddevice ("/rtas", &rtas) ||
|
||
|
+ grub_ieee1275_get_property (rtas, "ibm,kernel-dump", &kernel_dump,
|
||
|
+ sizeof (kernel_dump), &kernel_dump_size) ||
|
||
|
+ kernel_dump_size <= (grub_ssize_t) offsetof (struct kernel_dump, kds[1]))
|
||
|
+ return;
|
||
|
+
|
||
|
+ kernel_dump_size = grub_min (kernel_dump_size, (grub_ssize_t) sizeof (kernel_dump));
|
||
|
+
|
||
|
+ if (grub_be_to_cpu32 (kernel_dump.format) != 1)
|
||
|
+ {
|
||
|
+ grub_printf (_("Error: ibm,kernel-dump has an unexpected format version '%u'\n"),
|
||
|
+ grub_be_to_cpu32 (kernel_dump.format));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (grub_be_to_cpu16 (kernel_dump.num_sections) > MAX_KD_SECTIONS)
|
||
|
+ {
|
||
|
+ grub_printf (_("Error: Too many kernel dump sections: %d\n"),
|
||
|
+ grub_be_to_cpu32 (kernel_dump.num_sections));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i = 0; i < grub_be_to_cpu16 (kernel_dump.num_sections); i++)
|
||
|
+ {
|
||
|
+ kds = (struct kd_section *) ((grub_addr_t) &kernel_dump +
|
||
|
+ grub_be_to_cpu32 (kernel_dump.offset_1st_section) +
|
||
|
+ i * sizeof (struct kd_section));
|
||
|
+ /* sanity check the address is within the 'kernel_dump' struct */
|
||
|
+ if ((grub_addr_t) kds > (grub_addr_t) &kernel_dump + kernel_dump_size + sizeof (*kds))
|
||
|
+ {
|
||
|
+ grub_printf (_("Error: 'kds' address beyond last available section\n"));
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ if ((grub_be_to_cpu16 (kds->src_datatype) == KD_SRC_DATATYPE_REAL_MODE_REGION) &&
|
||
|
+ (grub_be_to_cpu64 (kds->src_address) == 0))
|
||
|
+ {
|
||
|
+ *highest = grub_min (*highest, grub_be_to_cpu64 (kds->num_bytes));
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ return;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* How much memory does OF believe exists in total?
|
||
|
*
|
||
|
@@ -277,10 +369,31 @@ regions_claim (grub_uint64_t addr, grub_uint64_t len, grub_memory_type_t type,
|
||
|
*
|
||
|
* Finally, we also want to make sure that when grub loads the kernel,
|
||
|
* it isn't going to use up all the memory we're trying to reserve! So
|
||
|
- * enforce our entire RUNTIME_MIN_SPACE here:
|
||
|
+ * enforce our entire RUNTIME_MIN_SPACE here (no fadump):
|
||
|
+ *
|
||
|
+ * | Top of memory == upper_mem_limit -|
|
||
|
+ * | |
|
||
|
+ * | available |
|
||
|
+ * | |
|
||
|
+ * |---------- 768 MB ----------|
|
||
|
+ * | |
|
||
|
+ * | reserved |
|
||
|
+ * | |
|
||
|
+ * |--- 768 MB - runtime min space ---|
|
||
|
+ * | |
|
||
|
+ * | available |
|
||
|
+ * | |
|
||
|
+ * |---------- 0 MB ----------|
|
||
|
+ *
|
||
|
+ * In case fadump is used, we allow the following:
|
||
|
*
|
||
|
* |---------- Top of memory ----------|
|
||
|
* | |
|
||
|
+ * | unavailable |
|
||
|
+ * | (kernel dump area) |
|
||
|
+ * | |
|
||
|
+ * |--------- upper_mem_limit ---------|
|
||
|
+ * | |
|
||
|
* | available |
|
||
|
* | |
|
||
|
* |---------- 768 MB ----------|
|
||
|
@@ -335,17 +448,44 @@ regions_claim (grub_uint64_t addr, grub_uint64_t len, grub_memory_type_t type,
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
+ grub_uint64_t upper_mem_limit = rmo_top;
|
||
|
+ grub_uint64_t orig_addr = addr;
|
||
|
+
|
||
|
+ check_kernel_dump (&upper_mem_limit);
|
||
|
+
|
||
|
/*
|
||
|
* we order these cases to prefer higher addresses and avoid some
|
||
|
* splitting issues
|
||
|
+ * The following shows the order of variables:
|
||
|
+ * no kernel dump: linux_rmo_save < RMO_ADDR_MAX <= upper_mem_limit == rmo_top
|
||
|
+ * with kernel dump: liuxx_rmo_save < RMO_ADDR_MAX <= upper_mem_limit <= rmo_top
|
||
|
*/
|
||
|
- if (addr < RMO_ADDR_MAX && (addr + len) > RMO_ADDR_MAX)
|
||
|
+ if (addr < RMO_ADDR_MAX && (addr + len) > RMO_ADDR_MAX && upper_mem_limit >= RMO_ADDR_MAX)
|
||
|
{
|
||
|
grub_dprintf ("ieee1275",
|
||
|
"adjusting region for RUNTIME_MIN_SPACE: (%llx -> %llx) -> (%llx -> %llx)\n",
|
||
|
addr, addr + len, RMO_ADDR_MAX, addr + len);
|
||
|
len = (addr + len) - RMO_ADDR_MAX;
|
||
|
addr = RMO_ADDR_MAX;
|
||
|
+
|
||
|
+ /* We must not exceed the upper_mem_limit (assuming it's >= RMO_ADDR_MAX) */
|
||
|
+ if (addr + len > upper_mem_limit)
|
||
|
+ {
|
||
|
+ /* take the bigger chunk from either below linux_rmo_save or above upper_mem_limit */
|
||
|
+ len = upper_mem_limit - addr;
|
||
|
+ if (orig_addr < linux_rmo_save && linux_rmo_save - orig_addr > len)
|
||
|
+ {
|
||
|
+ /* lower part is bigger */
|
||
|
+ addr = orig_addr;
|
||
|
+ len = linux_rmo_save - addr;
|
||
|
+ }
|
||
|
+
|
||
|
+ grub_dprintf ("ieee1275", "re-adjusted region to: (%llx -> %llx)\n",
|
||
|
+ addr, addr + len);
|
||
|
+
|
||
|
+ if (len == 0)
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
}
|
||
|
else if ((addr < linux_rmo_save) && ((addr + len) > linux_rmo_save))
|
||
|
{
|