Resolves: RHEL-76285, Fix handling of non-contiguous CPU IDs

This commit is contained in:
Than Ngo 2025-02-01 10:56:59 +01:00
parent 7757efa247
commit 6bf4958b42
3 changed files with 918 additions and 1 deletions

View File

@ -0,0 +1,163 @@
commit 54cf30c7d274c8aab2a7ae589ab056f52dfffc62
Author: Aboorva Devarajan <aboorvad@linux.ibm.com>
Date: Sat Dec 7 21:54:44 2024 -0500
cpu_info_helpers: Add helper function to retrieve present CPU core list
Introduce get_present_core_list helper function to accurately parse
and retrieve the list of present CPU cores, addressing gaps in core
numbering caused by dynamic addition or removal of CPUs (via CPU DLPAR
operation)
Utilizes the present CPU list from `sys/devices/system/cpu/present`
to handle non-contiguous CPU IDs. Accurately maps core IDs to CPUs
considering specified number of threads per CPU, addressing gaps in
core numbering.
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
index 8c57db8..756e792 100644
--- a/src/common/cpu_info_helpers.c
+++ b/src/common/cpu_info_helpers.c
@@ -203,6 +203,113 @@ int __get_one_smt_state(int core, int threads_per_cpu)
return smt_state;
}
+int get_present_cpu_count(void)
+{
+ int start, end, total_cpus = 0;
+ size_t len = 0;
+ char *line = NULL;
+ FILE *fp;
+ char *token;
+
+ fp = fopen(CPU_PRESENT_PATH, "r");
+ if (!fp) {
+ perror("Error opening CPU_PRESENT_PATH");
+ return -1;
+ }
+
+ if (getline(&line, &len, fp) == -1) {
+ perror("Error reading CPU_PRESENT_PATH");
+ fclose(fp);
+ free(line);
+ return -1;
+ }
+ fclose(fp);
+
+ token = strtok(line, ",");
+ while (token) {
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
+ total_cpus += (end - start + 1);
+ } else if (sscanf(token, "%d", &start) == 1) {
+ total_cpus++;
+ }
+ token = strtok(NULL, ",");
+ }
+
+ free(line);
+ return total_cpus;
+}
+
+int get_present_core_list(int **present_cores, int *num_present_cores, int threads_per_cpu)
+{
+ FILE *fp = NULL;
+ char *line = NULL;
+ char *token = NULL;
+ size_t len = 0;
+ ssize_t read;
+ int core_count = 0;
+ int core_list_size;
+ int *cores = NULL;
+ int start, end, i;
+
+ if (threads_per_cpu <= 0) {
+ fprintf(stderr, "Invalid threads_per_cpu value, got %d expected >= 1\n", threads_per_cpu);
+ return -1;
+ }
+
+ core_list_size = get_present_cpu_count() / threads_per_cpu;
+ if (core_list_size <= 0) {
+ fprintf(stderr, "Error while calculating core list size\n");
+ return -1;
+ }
+
+ cores = malloc(core_list_size * sizeof(int));
+ if (!cores) {
+ perror("Memory allocation failed");
+ goto cleanup;
+ }
+
+ fp = fopen(CPU_PRESENT_PATH, "r");
+ if (!fp) {
+ perror("Error opening file");
+ goto cleanup;
+ }
+
+ read = getline(&line, &len, fp);
+ if (read == -1) {
+ perror("Error reading file");
+ goto cleanup;
+ }
+
+ token = strtok(line, ",");
+ while (token) {
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
+ for (i = start; i <= end; i++) {
+ if (i % threads_per_cpu == 0) {
+ cores[core_count++] = i / threads_per_cpu;
+ }
+ }
+ } else if (sscanf(token, "%d", &start) == 1) {
+ if (start % threads_per_cpu == 0) {
+ cores[core_count++] = start / threads_per_cpu;
+ }
+ }
+ token = strtok(NULL, ",");
+ }
+
+ *present_cores = cores;
+ *num_present_cores = core_count;
+ free(line);
+ return 0;
+
+cleanup:
+ if (fp) {
+ fclose(fp);
+ }
+ free(line);
+ free(cores);
+ return -1;
+}
+
static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
int cpus_in_system)
{
diff --git a/src/common/cpu_info_helpers.h b/src/common/cpu_info_helpers.h
index c063fff..77e6ad7 100644
--- a/src/common/cpu_info_helpers.h
+++ b/src/common/cpu_info_helpers.h
@@ -24,9 +24,10 @@
#ifndef _CPU_INFO_HELPERS_H
#define _CPU_INFO_HELPERS_H
-#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
-#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
-#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
+#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
+#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
+#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
+#define CPU_PRESENT_PATH "/sys/devices/system/cpu/present"
#define SYSFS_PATH_MAX 128
@@ -39,6 +40,8 @@ extern int num_subcores(void);
extern int get_attribute(char *path, const char *fmt, int *value);
extern int get_cpu_info(int *threads_per_cpu, int *cpus_in_system,
int *threads_in_system);
+extern int get_present_core_list(int **present_cores, int *num_present_cores,
+ int threads_per_cpu);
extern int __is_smt_capable(int threads_in_system);
extern int __get_one_smt_state(int core, int threads_per_cpu);
extern int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,

View File

@ -0,0 +1,749 @@
commit e5fd24a6e35c3be78c96d6887e3774852bbe4674
Author: Aboorva Devarajan <aboorvad@linux.ibm.com>
Date: Wed Jan 1 22:56:07 2025 -0500
ppc64_cpu: Fix handling of non-contiguous CPU IDs
In ppc64le environments, adding or removing CPUs dynamically through
DLPAR can create gaps in CPU IDs, such as `0-103,120-151`, in this
case CPUs 104-119 are missing.
ppc64_cpu doesn't handles this scenario and always considers CPU IDs
to be contiguous causing issues in core numbering, cpu info and SMT
mode reporting.
To illustrate the issues this patch fixes, consider the following
system configuration:
$ lscpu
Architecture: ppc64le
Byte Order: Little Endian
CPU(s): 136
On-line CPU(s) list: 0-103,120-151
**Note: CPU IDs are non-contiguous**
-----------------------------------------------------------------
Before Patch:
-----------------------------------------------------------------
$ ppc64_cpu --info
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
........................................................... *gap*
Core 13: 120* 121* 122* 123* 124* 125* 126* 127*
Core 14: 128* 129* 130* 131* 132* 133* 134* 135*
Core 15: 136* 137* 138* 139* 140* 141* 142* 143*
Core 16: 144* 145* 146* 147* 148* 149* 150* 151*
**Although the CPU IDs are non contiguous, associated core IDs are
represented in contiguous order, which makes it harder to interpret
this clearly.**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --cores-on
Number of cores online = 15
**Expected: Number of online cores = 17**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --offline-cores
Cores offline = 13, 14
**Even though no cores are actually offline, two cores (13, 14)
are displayed as offline.**
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --online-cores
Cores online = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16
**The list of online cores is missing two cores (13, 14).**
-----------------------------------------------------------------
To resolve this, use the present CPU list from sysfs to assign
numbers to CPUs and cores, which will make this accurate.
$ cat /sys/devices/system/cpu/present
0-103,120-151
With this patch, the command output correctly reflects the
current CPU configuration, providing a more precise representation
of the system state.
-----------------------------------------------------------------
After Patch:
-----------------------------------------------------------------
$ ppc64_cpu --info
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
........................................................... *gap*
Core 15: 120* 121* 122* 123* 124* 125* 126* 127*
Core 16: 128* 129* 130* 131* 132* 133* 134* 135*
Core 17: 136* 137* 138* 139* 140* 141* 142* 143*
Core 18: 144* 145* 146* 147* 148* 149* 150* 151*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --cores-on
Number of cores online = 17
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --offline-cores
Cores offline =
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ ppc64_cpu --online-cores
Cores online = 0,1,2,3,4,5,6,7,8,9,10,11,12,15,16,17,18
-----------------------------------------------------------------
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
index 756e792..e75cf6e 100644
--- a/src/common/cpu_info_helpers.c
+++ b/src/common/cpu_info_helpers.c
@@ -311,67 +311,94 @@ cleanup:
}
static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
- int cpus_in_system)
+ int threads_per_cpu)
{
- int core;
+ int *present_cores = NULL;
+ int num_present_cores;
+ int start, end, i = 0;
const char *comma = "";
- for (core = 0; core < cpus_in_system; core++) {
- int begin = core;
- if (CPU_ISSET_S(core, cpuset_size, cpuset)) {
- while (CPU_ISSET_S(core+1, cpuset_size, cpuset))
- core++;
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
+ fprintf(stderr, "Failed to get present_cores list\n");
+ return;
+ }
- if (core > begin)
- printf("%s%d-%d", comma, begin, core);
- else
- printf("%s%d", comma, core);
+ while (i < num_present_cores) {
+ start = present_cores[i];
+ if (CPU_ISSET_S(start, cpuset_size, cpuset)) {
+ end = start;
+ while (i + 1 < num_present_cores &&
+ CPU_ISSET_S(present_cores[i + 1], cpuset_size, cpuset) &&
+ present_cores[i + 1] == end + 1) {
+ end = present_cores[++i];
+ }
+ if (start == end) {
+ printf("%s%d", comma, start);
+ } else {
+ printf("%s%d-%d", comma, start, end);
+ }
comma = ",";
}
+ i++;
}
+ free(present_cores);
}
-int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
- bool print_smt_state)
+int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu, bool print_smt_state)
{
- int thread, c, smt_state = 0;
cpu_set_t **cpu_states = NULL;
- int cpu_state_size = CPU_ALLOC_SIZE(cpus_in_system);
- int start_cpu = 0, stop_cpu = cpus_in_system;
+ int thread, smt_state = -1;
+ int cpu_state_size;
int rc = 0;
+ int i, core_id, threads_online;
+ int *present_cores = NULL;
+ int num_present_cores;
- cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t));
- if (!cpu_states)
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
+ fprintf(stderr, "Failed to get present core list\n");
return -ENOMEM;
+ }
+ cpu_state_size = CPU_ALLOC_SIZE(num_present_cores);
+ cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t *));
+ if (!cpu_states) {
+ rc = -ENOMEM;
+ goto cleanup_present_cores;
+ }
for (thread = 0; thread < threads_per_cpu; thread++) {
- cpu_states[thread] = CPU_ALLOC(cpus_in_system);
+ cpu_states[thread] = CPU_ALLOC(num_present_cores);
+ if (!cpu_states[thread]) {
+ rc = -ENOMEM;
+ goto cleanup_cpu_states;
+ }
CPU_ZERO_S(cpu_state_size, cpu_states[thread]);
}
- for (c = start_cpu; c < stop_cpu; c++) {
- int threads_online = __get_one_smt_state(c, threads_per_cpu);
-
+ for (i = 0; i < num_present_cores; i++) {
+ core_id = present_cores[i];
+ threads_online = __get_one_smt_state(core_id, threads_per_cpu);
if (threads_online < 0) {
rc = threads_online;
- goto cleanup_get_smt;
+ goto cleanup_cpu_states;
+ }
+ if (threads_online) {
+ CPU_SET_S(core_id, cpu_state_size, cpu_states[threads_online - 1]);
}
- if (threads_online)
- CPU_SET_S(c, cpu_state_size,
- cpu_states[threads_online - 1]);
}
for (thread = 0; thread < threads_per_cpu; thread++) {
if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
- if (smt_state == 0)
+ if (smt_state == -1)
smt_state = thread + 1;
else if (smt_state > 0)
smt_state = 0; /* mix of SMT modes */
}
}
- if (!print_smt_state)
- return smt_state;
+ if (!print_smt_state) {
+ rc = smt_state;
+ goto cleanup_cpu_states;
+ }
if (smt_state == 1) {
if (numeric)
@@ -380,11 +407,9 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
printf("SMT is off\n");
} else if (smt_state == 0) {
for (thread = 0; thread < threads_per_cpu; thread++) {
- if (CPU_COUNT_S(cpu_state_size,
- cpu_states[thread])) {
+ if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
printf("SMT=%d: ", thread + 1);
- print_cpu_list(cpu_states[thread],
- cpu_state_size, cpus_in_system);
+ print_cpu_list(cpu_states[thread], cpu_state_size, threads_per_cpu);
printf("\n");
}
}
@@ -392,9 +417,12 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
printf("SMT=%d\n", smt_state);
}
-cleanup_get_smt:
+cleanup_cpu_states:
for (thread = 0; thread < threads_per_cpu; thread++)
CPU_FREE(cpu_states[thread]);
+ free(cpu_states);
+cleanup_present_cores:
+ free(present_cores);
return rc;
}
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
index 4017240..0233d29 100644
--- a/src/ppc64_cpu.c
+++ b/src/ppc64_cpu.c
@@ -52,7 +52,6 @@
#define DSCR_DEFAULT_PATH "/sys/devices/system/cpu/dscr_default"
-#define MAX_NR_CPUS 1024
#define DIAGNOSTICS_RUN_MODE 42
#define CPU_OFFLINE -1
@@ -266,21 +265,31 @@ static int get_one_smt_state(int core)
static int get_smt_state(void)
{
int smt_state = -1;
- int i;
+ int i, rc;
+ int *present_cores;
+ int num_present_cores;
+
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ return -1;
+ }
+
+ for (i = 0; i < num_present_cores; i++) {
+ int cpu_state = get_one_smt_state(present_cores[i]);
- for (i = 0; i < cpus_in_system; i++) {
- int cpu_state = get_one_smt_state(i);
if (cpu_state == 0)
continue;
if (smt_state == -1)
smt_state = cpu_state;
+
if (smt_state != cpu_state) {
smt_state = -1;
break;
}
}
+ free(present_cores);
return smt_state;
}
@@ -313,20 +322,36 @@ static int set_smt_state(int smt_state)
{
int i, j, rc = 0;
int error = 0;
+ int cpu_base, cpu_id, core_id;
+ int *present_cores = NULL;
+ int num_present_cores;
if (!sysattr_is_writeable("online")) {
perror("Cannot set smt state");
return -1;
}
- for (i = 0; i < threads_in_system; i += threads_per_cpu) {
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }
+
+ for (i = 0; i < num_present_cores; i++) {
+
+ core_id = present_cores[i];
+ cpu_base = core_id * threads_per_cpu;
+
/* Online means any thread on this core running, so check all
* threads in the core, not just the first. */
for (j = 0; j < threads_per_cpu; j++) {
- if (!cpu_online(i + j))
+ cpu_id = cpu_base + j;
+
+ if (!cpu_online(cpu_id))
continue;
- rc = set_one_smt_state(i, smt_state);
+ rc = set_one_smt_state(cpu_base, smt_state);
/* Record an error, but do not check result: if we
* have failed to set this core, keep trying
* subsequent ones. */
@@ -336,10 +361,13 @@ static int set_smt_state(int smt_state)
}
}
+ free(present_cores);
+
if (error) {
- fprintf(stderr, "One or more cpus could not be on/offlined\n");
+ fprintf(stderr, "One or more CPUs could not be on/offlined\n");
return -1;
}
+
return rc;
}
@@ -459,8 +487,8 @@ static int do_subcores_per_core(char *state)
}
printf("Subcores per core: %d\n", subcore_state);
} else {
- /* Kernel decides what values are valid, so no need to
- * check here. */
+ /* Kernel decides what values are valid, so no need to
+ * check here. */
subcore_state = strtol(state, NULL, 0);
rc = set_attribute(SYSFS_SUBCORES, "%d", subcore_state);
if (rc) {
@@ -1038,7 +1066,7 @@ static int set_all_threads_off(int cpu, int smt_state)
snprintf(path, SYSFS_PATH_MAX, SYSFS_CPUDIR"/%s", i, "online");
rc = offline_thread(path);
if (rc == -1)
- printf("Unable to take cpu%d offline", i);
+ printf("Unable to take CPU %d offline\n", i);
}
return rc;
@@ -1065,11 +1093,13 @@ static int set_one_core(int smt_state, int core, int state)
static int do_online_cores(char *cores, int state)
{
int smt_state;
- int *core_state, *desired_core_state;
+ int *core_state = NULL, *desired_core_state = NULL;
int i, rc = 0;
- int core;
+ int core, valid = 0, core_idx = 0;
char *str, *token, *end_token;
bool first_core = true;
+ int *present_cores = NULL;
+ int num_present_cores;
if (cores) {
if (!sysattr_is_writeable("online")) {
@@ -1083,49 +1113,62 @@ static int do_online_cores(char *cores, int state)
}
}
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }
+
smt_state = get_smt_state();
- core_state = calloc(cpus_in_system, sizeof(int));
- if (!core_state)
+ core_state = calloc(num_present_cores, sizeof(int));
+ if (!core_state) {
+ free(present_cores);
return -ENOMEM;
+ }
- for (i = 0; i < cpus_in_system ; i++)
- core_state[i] = (get_one_smt_state(i) > 0);
+ for (i = 0; i < num_present_cores; i++) {
+ core_state[i] = (get_one_smt_state(present_cores[i]) > 0);
+ }
if (!cores) {
printf("Cores %s = ", state == 0 ? "offline" : "online");
- for (i = 0; i < cpus_in_system; i++) {
+ for (i = 0; i < num_present_cores; i++) {
if (core_state[i] == state) {
if (first_core)
first_core = false;
else
printf(",");
- printf("%d", i);
+ printf("%d", present_cores[i]);
}
}
printf("\n");
free(core_state);
+ free(present_cores);
return 0;
}
if (smt_state == -1) {
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
- "cores to have the same number of online threads to continue.\n");
+ "cores to have the same number of online threads to continue.\n");
do_info();
+ free(present_cores);
return -1;
}
- desired_core_state = calloc(cpus_in_system, sizeof(int));
+ desired_core_state = calloc(num_present_cores, sizeof(int));
if (!desired_core_state) {
free(core_state);
+ free(present_cores);
return -ENOMEM;
}
- for (i = 0; i < cpus_in_system; i++)
+ for (i = 0; i < num_present_cores; i++) {
/*
* Not specified on command-line
*/
desired_core_state[i] = -1;
+ }
str = cores;
while (1) {
@@ -1141,42 +1184,57 @@ static int do_online_cores(char *cores, int state)
rc = -1;
continue;
}
- if (core >= cpus_in_system || core < 0) {
+
+ for (i = 0; i < num_present_cores; i++) {
+ if (core == present_cores[i]) {
+ valid = 1;
+ core_idx = i;
+ break;
+ }
+ }
+
+ if (!valid) {
printf("Invalid core to %s: %d\n", state == 0 ? "offline" : "online", core);
rc = -1;
continue;
}
- desired_core_state[core] = state;
+
+ desired_core_state[core_idx] = state;
}
if (rc) {
- free(core_state);
- free(desired_core_state);
- return rc;
+ goto cleanup;
}
- for (i = 0; i < cpus_in_system; i++) {
+ for (i = 0; i < num_present_cores; i++) {
if (desired_core_state[i] != -1) {
- rc = set_one_core(smt_state, i, state);
- if (rc)
+ rc = set_one_core(smt_state, present_cores[i], state);
+ if (rc) {
+ fprintf(stderr, "Failed to set core %d to %s\n", present_cores[i], state == 0 ? "offline" : "online");
break;
+ }
}
}
+cleanup:
free(core_state);
free(desired_core_state);
+ free(present_cores);
+
return rc;
}
static int do_cores_on(char *state)
{
int smt_state;
- int *core_state;
- int cores_now_online = 0;
- int i, rc;
+ int cores_now_online = 0, core_id = 0;
+ int i, rc = 0;
int number_to_have, number_to_change = 0, number_changed = 0;
+ int *core_state = NULL;
int new_state;
char *end_state;
+ int *present_cores = NULL;
+ int num_present_cores;
if (state) {
if (!sysattr_is_writeable("online")) {
@@ -1194,24 +1252,33 @@ static int do_cores_on(char *state)
if (!core_state)
return -ENOMEM;
- for (i = 0; i < cpus_in_system ; i++) {
- core_state[i] = (get_one_smt_state(i) > 0);
- if (core_state[i])
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ free(core_state);
+ return rc;
+ }
+ for (i = 0; i < num_present_cores; i++) {
+ int core = present_cores[i];
+ core_state[i] = (get_one_smt_state(core) > 0);
+ if (core_state[i]) {
cores_now_online++;
+ }
}
if (!state) {
printf("Number of cores online = %d\n", cores_now_online);
- free(core_state);
- return 0;
+ rc = 0;
+ goto cleanup;
}
smt_state = get_smt_state();
if (smt_state == -1) {
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
- "cores to have the same number of online threads to continue.\n");
+ "cores to have the same number of online threads to continue.\n");
do_info();
- return -1;
+ rc = -1;
+ goto cleanup;
}
if (!strcmp(state, "all")) {
@@ -1227,15 +1294,16 @@ static int do_cores_on(char *state)
}
if (number_to_have == cores_now_online) {
- free(core_state);
- return 0;
+ rc = 0;
+ goto cleanup;
}
- if (number_to_have > cpus_in_system) {
- printf("Cannot online more cores than are present.\n");
+ if (number_to_have <= 0 || number_to_have > cpus_in_system) {
+ printf("Error: Invalid number of cores requested: %d, possible values \
+ should be in range: (1-%d)\n", number_to_have, cpus_in_system);
do_cores_present();
- free(core_state);
- return -1;
+ rc = -1;
+ goto cleanup;
}
if (number_to_have > cores_now_online) {
@@ -1248,41 +1316,50 @@ static int do_cores_on(char *state)
if (new_state) {
for (i = 0; i < cpus_in_system; i++) {
+ core_id = present_cores[i];
if (!core_state[i]) {
- rc = set_one_core(smt_state, i, new_state);
- if (!rc)
+ rc = set_one_core(smt_state, core_id, new_state);
+ if (!rc) {
number_changed++;
- if (number_changed >= number_to_change)
+ }
+ if (number_changed >= number_to_change) {
break;
+ }
}
}
} else {
- for (i = cpus_in_system - 1; i > 0; i--) {
+ for (i = cpus_in_system - 1; i >= 0; i--) {
+ core_id = present_cores[i];
if (core_state[i]) {
- rc = set_one_core(smt_state, i, new_state);
- if (!rc)
+ rc = set_one_core(smt_state, core_id, new_state);
+ if (!rc) {
number_changed++;
- if (number_changed >= number_to_change)
+ }
+ if (number_changed >= number_to_change) {
break;
+ }
}
}
}
if (number_changed != number_to_change) {
cores_now_online = 0;
- for (i = 0; i < cpus_in_system ; i++) {
- if (cpu_online(i * threads_per_cpu))
+ for (i = 0; i < cpus_in_system; i++) {
+ core_id = present_cores[i];
+ if (cpu_online(core_id * threads_per_cpu)) {
cores_now_online++;
+ }
}
printf("Failed to set requested number of cores online.\n"
- "Requested: %d cores, Onlined: %d cores\n",
- number_to_have, cores_now_online);
- free(core_state);
- return -1;
+ "Requested: %d cores, Onlined: %d cores\n",
+ number_to_have, cores_now_online);
+ rc = -1;
}
+cleanup:
free(core_state);
- return 0;
+ free(present_cores);
+ return rc;
}
static bool core_is_online(int core)
@@ -1294,35 +1371,45 @@ static int do_info(void)
{
int i, j, thread_num;
char online;
- int core, subcores = 0;
+ int subcores = 0, core_id = 0;
+ int *present_cores = NULL;
+ int num_present_cores;
- if (is_subcore_capable())
+ if (is_subcore_capable()) {
subcores = num_subcores();
+ }
- for (i = 0, core = 0; core < cpus_in_system; i++) {
+ int rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
+ if (rc != 0) {
+ fprintf(stderr, "Failed to retrieve present core list\n");
+ return rc;
+ }
- if (!core_is_online(i))
+ for (i = 0; i < num_present_cores; i++) {
+ core_id = present_cores[i];
+ if (!core_is_online(core_id)) {
continue;
+ }
if (subcores > 1) {
- if (core % subcores == 0)
- printf("Core %3d:\n", core/subcores);
- printf(" Subcore %3d: ", core);
+ if (core_id % subcores == 0) {
+ printf("Core %3d:\n", core_id / subcores);
+ }
+ printf(" Subcore %3d: ", core_id);
} else {
- printf("Core %3d: ", core);
+ printf("Core %3d: ", core_id);
}
- thread_num = i * threads_per_cpu;
- for (j = 0; j < threads_per_cpu; j++, thread_num++) {
+ for (j = 0; j < threads_per_cpu; j++) {
+ thread_num = core_id * threads_per_cpu + j;
online = cpu_online(thread_num) ? '*' : ' ';
printf("%4d%c ", thread_num, online);
}
printf("\n");
- core++;
}
+ free(present_cores);
return 0;
}
-
static void usage(void)
{
printf(

View File

@ -1,6 +1,6 @@
Name: powerpc-utils
Version: 1.3.13
Release: 1%{?dist}
Release: 2%{?dist}
Summary: PERL-based scripts for maintaining and servicing PowerPC systems
License: GPL-2.0-only
@ -10,6 +10,8 @@ Source1: nx-gzip.udev
Patch0: powerpc-utils-1.3.11-manpages.patch
# upstream patches
Patch100: powerpc-utils-1.3.13-cpu_info_helpers.patch
Patch101: powerpc-utils-1.3.13-ppc64_cpu-Fix-handling-of-non-contiguous-CPU-IDs.patch
ExclusiveArch: ppc %{power64}
@ -211,6 +213,9 @@ systemctl enable hcn-init.service >/dev/null 2>&1 || :
%changelog
* Sat Feb 01 2025 Than Ngo <than@redhat.com> - 1.3.13-2
- Resolves: RHEL-76285, Fix handling of non-contiguous CPU IDs
* Wed Nov 20 2024 Than Ngo <than@redhat.com> - 1.3.13-1
- Update to 1.3.13
Resolves: RHEL-24535