Enable to work with more than 256 CPUs Resolves: https://issues.redhat.com/browse/RHEL-106908 Signed-off-by: Pingfan Liu <piliu@redhat.com>
224 lines
5.7 KiB
Diff
224 lines
5.7 KiB
Diff
From 00128bb30fb1b88bf2c7421b2a5a759dcffacb32 Mon Sep 17 00:00:00 2001
|
|
From: Sandipan Das <sandipan.das@amd.com>
|
|
Date: Mon, 30 Jun 2025 18:42:33 +0530
|
|
Subject: [PATCH 5/6] common: Resolve max count of CPUs per node at runtime
|
|
|
|
Replace statically defined NCPUS_NODE_MAX with the previously introduced
|
|
ncpus_max. Technically, ncpus_max denotes the maximum possible number of
|
|
CPUs in a system but it can also serve as the maximum possible number of
|
|
CPUs per NUMA node because of the following reasons.
|
|
* CPUs may not be uniformly distributed across NUMA nodes.
|
|
* Some NUMA nodes may not have any CPUs associated with them.
|
|
|
|
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
---
|
|
common/include/os/node.h | 2 +-
|
|
common/include/types.h | 1 -
|
|
common/os/node.c | 72 ++++++++++++++++++++++++++++------------
|
|
common/os/os_win.c | 2 +-
|
|
4 files changed, 53 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/common/include/os/node.h b/common/include/os/node.h
|
|
index 2c21556..0b3c362 100644
|
|
--- a/common/include/os/node.h
|
|
+++ b/common/include/os/node.h
|
|
@@ -90,7 +90,7 @@ typedef struct _node_imc {
|
|
typedef struct _node {
|
|
int nid;
|
|
int ncpus;
|
|
- perf_cpu_t cpus[NCPUS_NODE_MAX];
|
|
+ perf_cpu_t *cpus;
|
|
count_value_t countval;
|
|
node_meminfo_t meminfo;
|
|
node_qpi_t qpi;
|
|
diff --git a/common/include/types.h b/common/include/types.h
|
|
index eb64fb1..1d1545d 100644
|
|
--- a/common/include/types.h
|
|
+++ b/common/include/types.h
|
|
@@ -115,7 +115,6 @@ typedef enum {
|
|
|
|
#define UI_COUNT_NUM 5
|
|
|
|
-#define NCPUS_NODE_MAX 256
|
|
#define NPROCS_NAX 4096
|
|
#define LL_THRESH 128
|
|
#define LL_PERIOD 1000
|
|
diff --git a/common/os/node.c b/common/os/node.c
|
|
index f79bcdf..cb8c38a 100644
|
|
--- a/common/os/node.c
|
|
+++ b/common/os/node.c
|
|
@@ -50,20 +50,34 @@ int g_ncpus;
|
|
int nnodes_max;
|
|
int ncpus_max;
|
|
|
|
-static void
|
|
+static int
|
|
node_init(node_t *node, int nid, boolean_t hotadd)
|
|
{
|
|
memset(node, 0, sizeof (node_t));
|
|
- os_perf_cpuarr_init(node->cpus, NCPUS_NODE_MAX, hotadd);
|
|
node->nid = nid;
|
|
node->hotadd = hotadd;
|
|
+ if (!NODE_VALID(node)) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if ((node->cpus = zalloc(ncpus_max * sizeof(perf_cpu_t))) == NULL) {
|
|
+ return (-1);
|
|
+ }
|
|
+
|
|
+ os_perf_cpuarr_init(node->cpus, ncpus_max, hotadd);
|
|
+ return 0;
|
|
}
|
|
|
|
static void
|
|
node_fini(node_t *node)
|
|
{
|
|
- os_perf_cpuarr_fini(node->cpus, NCPUS_NODE_MAX, B_FALSE);
|
|
+ if (!NODE_VALID(node)) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ os_perf_cpuarr_fini(node->cpus, ncpus_max, B_FALSE);
|
|
node->ncpus = 0;
|
|
+ free(node->cpus);
|
|
node->nid = INVALID_NID;
|
|
}
|
|
|
|
@@ -71,7 +85,7 @@ static void
|
|
node_hotremove(node_t *node)
|
|
{
|
|
node->hotremove = B_TRUE;
|
|
- os_perf_cpuarr_fini(node->cpus, NCPUS_NODE_MAX, B_TRUE);
|
|
+ os_perf_cpuarr_fini(node->cpus, ncpus_max, B_TRUE);
|
|
}
|
|
|
|
/*
|
|
@@ -101,10 +115,20 @@ node_group_init(void)
|
|
s_node_group.inited = B_TRUE;
|
|
for (i = 0; i < nnodes_max; i++) {
|
|
node = node_get(i);
|
|
- node_init(node, INVALID_NID, B_FALSE);
|
|
+ if (node_init(node, INVALID_NID, B_FALSE)) {
|
|
+ goto L_EXIT;
|
|
+ }
|
|
}
|
|
|
|
return (node_group_refresh(B_TRUE));
|
|
+
|
|
+L_EXIT:
|
|
+ for (i = i - 1; i >= 0; i--) {
|
|
+ node = node_get(i);
|
|
+ node_fini(node);
|
|
+ }
|
|
+
|
|
+ return (-1);
|
|
}
|
|
|
|
/*
|
|
@@ -172,23 +196,27 @@ cpuid_max_get(int *cpu_arr, int num)
|
|
static int
|
|
cpu_refresh(boolean_t init)
|
|
{
|
|
- int i, j, num, cpuid_max = -1;
|
|
- int cpu_arr[NCPUS_NODE_MAX];
|
|
+ int i, j, num, cpuid_max = -1, ret = -1;
|
|
+ int *cpu_arr;
|
|
node_t *node;
|
|
|
|
+ if ((cpu_arr = zalloc(ncpus_max * sizeof(int))) == NULL) {
|
|
+ return (-1);
|
|
+ }
|
|
+
|
|
for (i = 0; i < nnodes_max; i++) {
|
|
node = node_get(i);
|
|
if (NODE_VALID(node)) {
|
|
- if (!os_sysfs_cpu_enum(node->nid, cpu_arr, NCPUS_NODE_MAX, &num)) {
|
|
- return (-1);
|
|
+ if (!os_sysfs_cpu_enum(node->nid, cpu_arr, ncpus_max, &num)) {
|
|
+ goto L_EXIT;
|
|
}
|
|
- if (num < 0 || num > NCPUS_NODE_MAX) {
|
|
- return (-1);
|
|
+ if (num < 0 || num > ncpus_max) {
|
|
+ goto L_EXIT;
|
|
}
|
|
|
|
- if (os_perf_cpuarr_refresh(node->cpus, NCPUS_NODE_MAX, cpu_arr,
|
|
+ if (os_perf_cpuarr_refresh(node->cpus, ncpus_max, cpu_arr,
|
|
num, init) != 0) {
|
|
- return (-1);
|
|
+ goto L_EXIT;
|
|
}
|
|
|
|
node->ncpus = num;
|
|
@@ -205,7 +233,11 @@ cpu_refresh(boolean_t init)
|
|
|
|
/* Refresh the number of online CPUs */
|
|
g_ncpus = os_sysfs_online_ncpus();
|
|
- return (0);
|
|
+ ret = 0;
|
|
+
|
|
+L_EXIT:
|
|
+ free(cpu_arr);
|
|
+ return (ret);
|
|
}
|
|
|
|
static int
|
|
@@ -268,10 +300,8 @@ node_group_refresh(boolean_t init)
|
|
if (!NODE_VALID(node)) {
|
|
if ((j = nid_find(i, node_arr, num)) >= 0) {
|
|
ASSERT(node_arr[j] == i);
|
|
- if (init) {
|
|
- node_init(node, i, B_FALSE);
|
|
- } else {
|
|
- node_init(node, i, B_TRUE);
|
|
+ if (node_init(node, i, init ? B_FALSE : B_TRUE)) {
|
|
+ goto L_EXIT;
|
|
}
|
|
|
|
s_node_group.nnodes++;
|
|
@@ -339,7 +369,7 @@ node_by_cpu(int cpuid)
|
|
continue;
|
|
}
|
|
|
|
- for (j = 0; j < NCPUS_NODE_MAX; j++) {
|
|
+ for (j = 0; j < ncpus_max; j++) {
|
|
if (cpuid == node->cpus[j].cpuid) {
|
|
return (node);
|
|
}
|
|
@@ -412,7 +442,7 @@ node_cpu_traverse(pfn_perf_cpu_op_t func, void *arg, boolean_t err_ret,
|
|
continue;
|
|
}
|
|
|
|
- for (j = 0; j < NCPUS_NODE_MAX; j++) {
|
|
+ for (j = 0; j < ncpus_max; j++) {
|
|
cpu = &node->cpus[j];
|
|
if (cpu->hotremove) {
|
|
pf_resource_free(cpu);
|
|
@@ -455,7 +485,7 @@ countval_sum(count_value_t *countval_arr, int nid,
|
|
return (0);
|
|
}
|
|
|
|
- for (i = 0; i < NCPUS_NODE_MAX; i++) {
|
|
+ for (i = 0; i < ncpus_max; i++) {
|
|
if (num >= node->ncpus) {
|
|
break;
|
|
}
|
|
diff --git a/common/os/os_win.c b/common/os/os_win.c
|
|
index 9aaefae..bd34388 100644
|
|
--- a/common/os/os_win.c
|
|
+++ b/common/os/os_win.c
|
|
@@ -152,7 +152,7 @@ node_cpu_string(node_t *node, char *s1, int size)
|
|
}
|
|
|
|
j = 0;
|
|
- for (i = 0; (i < NCPUS_NODE_MAX) && (j < ncpus); i++) {
|
|
+ for (i = 0; (i < ncpus_max) && (j < ncpus); i++) {
|
|
if ((cpus[i].cpuid != INVALID_CPUID) && (!cpus[i].hotremove)) {
|
|
cpuid_arr[j++] = cpus[i].cpuid;
|
|
}
|
|
--
|
|
2.49.0
|
|
|