Resolves: RHEL-76285, Fix handling of non-contiguous CPU IDs
This commit is contained in:
parent
7757efa247
commit
6bf4958b42
163
powerpc-utils-1.3.13-cpu_info_helpers.patch
Normal file
163
powerpc-utils-1.3.13-cpu_info_helpers.patch
Normal file
@ -0,0 +1,163 @@
|
||||
commit 54cf30c7d274c8aab2a7ae589ab056f52dfffc62
|
||||
Author: Aboorva Devarajan <aboorvad@linux.ibm.com>
|
||||
Date: Sat Dec 7 21:54:44 2024 -0500
|
||||
|
||||
cpu_info_helpers: Add helper function to retrieve present CPU core list
|
||||
|
||||
Introduce get_present_core_list helper function to accurately parse
|
||||
and retrieve the list of present CPU cores, addressing gaps in core
|
||||
numbering caused by dynamic addition or removal of CPUs (via CPU DLPAR
|
||||
operation)
|
||||
|
||||
Utilizes the present CPU list from `sys/devices/system/cpu/present`
|
||||
to handle non-contiguous CPU IDs. Accurately maps core IDs to CPUs
|
||||
considering specified number of threads per CPU, addressing gaps in
|
||||
core numbering.
|
||||
|
||||
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
|
||||
|
||||
diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
|
||||
index 8c57db8..756e792 100644
|
||||
--- a/src/common/cpu_info_helpers.c
|
||||
+++ b/src/common/cpu_info_helpers.c
|
||||
@@ -203,6 +203,113 @@ int __get_one_smt_state(int core, int threads_per_cpu)
|
||||
return smt_state;
|
||||
}
|
||||
|
||||
+int get_present_cpu_count(void)
|
||||
+{
|
||||
+ int start, end, total_cpus = 0;
|
||||
+ size_t len = 0;
|
||||
+ char *line = NULL;
|
||||
+ FILE *fp;
|
||||
+ char *token;
|
||||
+
|
||||
+ fp = fopen(CPU_PRESENT_PATH, "r");
|
||||
+ if (!fp) {
|
||||
+ perror("Error opening CPU_PRESENT_PATH");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ if (getline(&line, &len, fp) == -1) {
|
||||
+ perror("Error reading CPU_PRESENT_PATH");
|
||||
+ fclose(fp);
|
||||
+ free(line);
|
||||
+ return -1;
|
||||
+ }
|
||||
+ fclose(fp);
|
||||
+
|
||||
+ token = strtok(line, ",");
|
||||
+ while (token) {
|
||||
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
|
||||
+ total_cpus += (end - start + 1);
|
||||
+ } else if (sscanf(token, "%d", &start) == 1) {
|
||||
+ total_cpus++;
|
||||
+ }
|
||||
+ token = strtok(NULL, ",");
|
||||
+ }
|
||||
+
|
||||
+ free(line);
|
||||
+ return total_cpus;
|
||||
+}
|
||||
+
|
||||
+int get_present_core_list(int **present_cores, int *num_present_cores, int threads_per_cpu)
|
||||
+{
|
||||
+ FILE *fp = NULL;
|
||||
+ char *line = NULL;
|
||||
+ char *token = NULL;
|
||||
+ size_t len = 0;
|
||||
+ ssize_t read;
|
||||
+ int core_count = 0;
|
||||
+ int core_list_size;
|
||||
+ int *cores = NULL;
|
||||
+ int start, end, i;
|
||||
+
|
||||
+ if (threads_per_cpu <= 0) {
|
||||
+ fprintf(stderr, "Invalid threads_per_cpu value, got %d expected >= 1\n", threads_per_cpu);
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ core_list_size = get_present_cpu_count() / threads_per_cpu;
|
||||
+ if (core_list_size <= 0) {
|
||||
+ fprintf(stderr, "Error while calculating core list size\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ cores = malloc(core_list_size * sizeof(int));
|
||||
+ if (!cores) {
|
||||
+ perror("Memory allocation failed");
|
||||
+ goto cleanup;
|
||||
+ }
|
||||
+
|
||||
+ fp = fopen(CPU_PRESENT_PATH, "r");
|
||||
+ if (!fp) {
|
||||
+ perror("Error opening file");
|
||||
+ goto cleanup;
|
||||
+ }
|
||||
+
|
||||
+ read = getline(&line, &len, fp);
|
||||
+ if (read == -1) {
|
||||
+ perror("Error reading file");
|
||||
+ goto cleanup;
|
||||
+ }
|
||||
+
|
||||
+ token = strtok(line, ",");
|
||||
+ while (token) {
|
||||
+ if (sscanf(token, "%d-%d", &start, &end) == 2) {
|
||||
+ for (i = start; i <= end; i++) {
|
||||
+ if (i % threads_per_cpu == 0) {
|
||||
+ cores[core_count++] = i / threads_per_cpu;
|
||||
+ }
|
||||
+ }
|
||||
+ } else if (sscanf(token, "%d", &start) == 1) {
|
||||
+ if (start % threads_per_cpu == 0) {
|
||||
+ cores[core_count++] = start / threads_per_cpu;
|
||||
+ }
|
||||
+ }
|
||||
+ token = strtok(NULL, ",");
|
||||
+ }
|
||||
+
|
||||
+ *present_cores = cores;
|
||||
+ *num_present_cores = core_count;
|
||||
+ free(line);
|
||||
+ return 0;
|
||||
+
|
||||
+cleanup:
|
||||
+ if (fp) {
|
||||
+ fclose(fp);
|
||||
+ }
|
||||
+ free(line);
|
||||
+ free(cores);
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
|
||||
int cpus_in_system)
|
||||
{
|
||||
diff --git a/src/common/cpu_info_helpers.h b/src/common/cpu_info_helpers.h
|
||||
index c063fff..77e6ad7 100644
|
||||
--- a/src/common/cpu_info_helpers.h
|
||||
+++ b/src/common/cpu_info_helpers.h
|
||||
@@ -24,9 +24,10 @@
|
||||
#ifndef _CPU_INFO_HELPERS_H
|
||||
#define _CPU_INFO_HELPERS_H
|
||||
|
||||
-#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
|
||||
-#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
|
||||
-#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
|
||||
+#define SYSFS_CPUDIR "/sys/devices/system/cpu/cpu%d"
|
||||
+#define SYSFS_SUBCORES "/sys/devices/system/cpu/subcores_per_core"
|
||||
+#define INTSERV_PATH "/proc/device-tree/cpus/%s/ibm,ppc-interrupt-server#s"
|
||||
+#define CPU_PRESENT_PATH "/sys/devices/system/cpu/present"
|
||||
|
||||
#define SYSFS_PATH_MAX 128
|
||||
|
||||
@@ -39,6 +40,8 @@ extern int num_subcores(void);
|
||||
extern int get_attribute(char *path, const char *fmt, int *value);
|
||||
extern int get_cpu_info(int *threads_per_cpu, int *cpus_in_system,
|
||||
int *threads_in_system);
|
||||
+extern int get_present_core_list(int **present_cores, int *num_present_cores,
|
||||
+ int threads_per_cpu);
|
||||
extern int __is_smt_capable(int threads_in_system);
|
||||
extern int __get_one_smt_state(int core, int threads_per_cpu);
|
||||
extern int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
|
@ -0,0 +1,749 @@
|
||||
commit e5fd24a6e35c3be78c96d6887e3774852bbe4674
|
||||
Author: Aboorva Devarajan <aboorvad@linux.ibm.com>
|
||||
Date: Wed Jan 1 22:56:07 2025 -0500
|
||||
|
||||
ppc64_cpu: Fix handling of non-contiguous CPU IDs
|
||||
|
||||
In ppc64le environments, adding or removing CPUs dynamically through
|
||||
DLPAR can create gaps in CPU IDs, such as `0-103,120-151`, in this
|
||||
case CPUs 104-119 are missing.
|
||||
|
||||
ppc64_cpu doesn't handles this scenario and always considers CPU IDs
|
||||
to be contiguous causing issues in core numbering, cpu info and SMT
|
||||
mode reporting.
|
||||
|
||||
To illustrate the issues this patch fixes, consider the following
|
||||
system configuration:
|
||||
|
||||
$ lscpu
|
||||
Architecture: ppc64le
|
||||
Byte Order: Little Endian
|
||||
CPU(s): 136
|
||||
On-line CPU(s) list: 0-103,120-151
|
||||
|
||||
**Note: CPU IDs are non-contiguous**
|
||||
|
||||
-----------------------------------------------------------------
|
||||
Before Patch:
|
||||
-----------------------------------------------------------------
|
||||
|
||||
$ ppc64_cpu --info
|
||||
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
|
||||
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
|
||||
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
|
||||
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
|
||||
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
|
||||
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
|
||||
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
|
||||
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
|
||||
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
|
||||
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
|
||||
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
|
||||
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
|
||||
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
|
||||
........................................................... *gap*
|
||||
Core 13: 120* 121* 122* 123* 124* 125* 126* 127*
|
||||
Core 14: 128* 129* 130* 131* 132* 133* 134* 135*
|
||||
Core 15: 136* 137* 138* 139* 140* 141* 142* 143*
|
||||
Core 16: 144* 145* 146* 147* 148* 149* 150* 151*
|
||||
|
||||
**Although the CPU IDs are non contiguous, associated core IDs are
|
||||
represented in contiguous order, which makes it harder to interpret
|
||||
this clearly.**
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --cores-on
|
||||
Number of cores online = 15
|
||||
|
||||
**Expected: Number of online cores = 17**
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --offline-cores
|
||||
Cores offline = 13, 14
|
||||
|
||||
**Even though no cores are actually offline, two cores (13, 14)
|
||||
are displayed as offline.**
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --online-cores
|
||||
Cores online = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16
|
||||
|
||||
**The list of online cores is missing two cores (13, 14).**
|
||||
-----------------------------------------------------------------
|
||||
|
||||
To resolve this, use the present CPU list from sysfs to assign
|
||||
numbers to CPUs and cores, which will make this accurate.
|
||||
|
||||
$ cat /sys/devices/system/cpu/present
|
||||
0-103,120-151
|
||||
|
||||
With this patch, the command output correctly reflects the
|
||||
current CPU configuration, providing a more precise representation
|
||||
of the system state.
|
||||
|
||||
-----------------------------------------------------------------
|
||||
After Patch:
|
||||
-----------------------------------------------------------------
|
||||
|
||||
$ ppc64_cpu --info
|
||||
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
|
||||
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
|
||||
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
|
||||
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
|
||||
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
|
||||
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
|
||||
Core 6: 48* 49* 50* 51* 52* 53* 54* 55*
|
||||
Core 7: 56* 57* 58* 59* 60* 61* 62* 63*
|
||||
Core 8: 64* 65* 66* 67* 68* 69* 70* 71*
|
||||
Core 9: 72* 73* 74* 75* 76* 77* 78* 79*
|
||||
Core 10: 80* 81* 82* 83* 84* 85* 86* 87*
|
||||
Core 11: 88* 89* 90* 91* 92* 93* 94* 95*
|
||||
Core 12: 96* 97* 98* 99* 100* 101* 102* 103*
|
||||
........................................................... *gap*
|
||||
Core 15: 120* 121* 122* 123* 124* 125* 126* 127*
|
||||
Core 16: 128* 129* 130* 131* 132* 133* 134* 135*
|
||||
Core 17: 136* 137* 138* 139* 140* 141* 142* 143*
|
||||
Core 18: 144* 145* 146* 147* 148* 149* 150* 151*
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --cores-on
|
||||
Number of cores online = 17
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --offline-cores
|
||||
Cores offline =
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
$ ppc64_cpu --online-cores
|
||||
Cores online = 0,1,2,3,4,5,6,7,8,9,10,11,12,15,16,17,18
|
||||
|
||||
-----------------------------------------------------------------
|
||||
|
||||
Signed-off-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
|
||||
|
||||
diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
|
||||
index 756e792..e75cf6e 100644
|
||||
--- a/src/common/cpu_info_helpers.c
|
||||
+++ b/src/common/cpu_info_helpers.c
|
||||
@@ -311,67 +311,94 @@ cleanup:
|
||||
}
|
||||
|
||||
static void print_cpu_list(const cpu_set_t *cpuset, int cpuset_size,
|
||||
- int cpus_in_system)
|
||||
+ int threads_per_cpu)
|
||||
{
|
||||
- int core;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
+ int start, end, i = 0;
|
||||
const char *comma = "";
|
||||
|
||||
- for (core = 0; core < cpus_in_system; core++) {
|
||||
- int begin = core;
|
||||
- if (CPU_ISSET_S(core, cpuset_size, cpuset)) {
|
||||
- while (CPU_ISSET_S(core+1, cpuset_size, cpuset))
|
||||
- core++;
|
||||
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
|
||||
+ fprintf(stderr, "Failed to get present_cores list\n");
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- if (core > begin)
|
||||
- printf("%s%d-%d", comma, begin, core);
|
||||
- else
|
||||
- printf("%s%d", comma, core);
|
||||
+ while (i < num_present_cores) {
|
||||
+ start = present_cores[i];
|
||||
+ if (CPU_ISSET_S(start, cpuset_size, cpuset)) {
|
||||
+ end = start;
|
||||
+ while (i + 1 < num_present_cores &&
|
||||
+ CPU_ISSET_S(present_cores[i + 1], cpuset_size, cpuset) &&
|
||||
+ present_cores[i + 1] == end + 1) {
|
||||
+ end = present_cores[++i];
|
||||
+ }
|
||||
+ if (start == end) {
|
||||
+ printf("%s%d", comma, start);
|
||||
+ } else {
|
||||
+ printf("%s%d-%d", comma, start, end);
|
||||
+ }
|
||||
comma = ",";
|
||||
}
|
||||
+ i++;
|
||||
}
|
||||
+ free(present_cores);
|
||||
}
|
||||
|
||||
-int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
|
||||
- bool print_smt_state)
|
||||
+int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu, bool print_smt_state)
|
||||
{
|
||||
- int thread, c, smt_state = 0;
|
||||
cpu_set_t **cpu_states = NULL;
|
||||
- int cpu_state_size = CPU_ALLOC_SIZE(cpus_in_system);
|
||||
- int start_cpu = 0, stop_cpu = cpus_in_system;
|
||||
+ int thread, smt_state = -1;
|
||||
+ int cpu_state_size;
|
||||
int rc = 0;
|
||||
+ int i, core_id, threads_online;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
|
||||
- cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t));
|
||||
- if (!cpu_states)
|
||||
+ if (get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu) != 0) {
|
||||
+ fprintf(stderr, "Failed to get present core list\n");
|
||||
return -ENOMEM;
|
||||
+ }
|
||||
+ cpu_state_size = CPU_ALLOC_SIZE(num_present_cores);
|
||||
+ cpu_states = (cpu_set_t **)calloc(threads_per_cpu, sizeof(cpu_set_t *));
|
||||
+ if (!cpu_states) {
|
||||
+ rc = -ENOMEM;
|
||||
+ goto cleanup_present_cores;
|
||||
+ }
|
||||
|
||||
for (thread = 0; thread < threads_per_cpu; thread++) {
|
||||
- cpu_states[thread] = CPU_ALLOC(cpus_in_system);
|
||||
+ cpu_states[thread] = CPU_ALLOC(num_present_cores);
|
||||
+ if (!cpu_states[thread]) {
|
||||
+ rc = -ENOMEM;
|
||||
+ goto cleanup_cpu_states;
|
||||
+ }
|
||||
CPU_ZERO_S(cpu_state_size, cpu_states[thread]);
|
||||
}
|
||||
|
||||
- for (c = start_cpu; c < stop_cpu; c++) {
|
||||
- int threads_online = __get_one_smt_state(c, threads_per_cpu);
|
||||
-
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ core_id = present_cores[i];
|
||||
+ threads_online = __get_one_smt_state(core_id, threads_per_cpu);
|
||||
if (threads_online < 0) {
|
||||
rc = threads_online;
|
||||
- goto cleanup_get_smt;
|
||||
+ goto cleanup_cpu_states;
|
||||
+ }
|
||||
+ if (threads_online) {
|
||||
+ CPU_SET_S(core_id, cpu_state_size, cpu_states[threads_online - 1]);
|
||||
}
|
||||
- if (threads_online)
|
||||
- CPU_SET_S(c, cpu_state_size,
|
||||
- cpu_states[threads_online - 1]);
|
||||
}
|
||||
|
||||
for (thread = 0; thread < threads_per_cpu; thread++) {
|
||||
if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
|
||||
- if (smt_state == 0)
|
||||
+ if (smt_state == -1)
|
||||
smt_state = thread + 1;
|
||||
else if (smt_state > 0)
|
||||
smt_state = 0; /* mix of SMT modes */
|
||||
}
|
||||
}
|
||||
|
||||
- if (!print_smt_state)
|
||||
- return smt_state;
|
||||
+ if (!print_smt_state) {
|
||||
+ rc = smt_state;
|
||||
+ goto cleanup_cpu_states;
|
||||
+ }
|
||||
|
||||
if (smt_state == 1) {
|
||||
if (numeric)
|
||||
@@ -380,11 +407,9 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
|
||||
printf("SMT is off\n");
|
||||
} else if (smt_state == 0) {
|
||||
for (thread = 0; thread < threads_per_cpu; thread++) {
|
||||
- if (CPU_COUNT_S(cpu_state_size,
|
||||
- cpu_states[thread])) {
|
||||
+ if (CPU_COUNT_S(cpu_state_size, cpu_states[thread])) {
|
||||
printf("SMT=%d: ", thread + 1);
|
||||
- print_cpu_list(cpu_states[thread],
|
||||
- cpu_state_size, cpus_in_system);
|
||||
+ print_cpu_list(cpu_states[thread], cpu_state_size, threads_per_cpu);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
@@ -392,9 +417,12 @@ int __do_smt(bool numeric, int cpus_in_system, int threads_per_cpu,
|
||||
printf("SMT=%d\n", smt_state);
|
||||
}
|
||||
|
||||
-cleanup_get_smt:
|
||||
+cleanup_cpu_states:
|
||||
for (thread = 0; thread < threads_per_cpu; thread++)
|
||||
CPU_FREE(cpu_states[thread]);
|
||||
+ free(cpu_states);
|
||||
+cleanup_present_cores:
|
||||
+ free(present_cores);
|
||||
|
||||
return rc;
|
||||
}
|
||||
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
|
||||
index 4017240..0233d29 100644
|
||||
--- a/src/ppc64_cpu.c
|
||||
+++ b/src/ppc64_cpu.c
|
||||
@@ -52,7 +52,6 @@
|
||||
|
||||
#define DSCR_DEFAULT_PATH "/sys/devices/system/cpu/dscr_default"
|
||||
|
||||
-#define MAX_NR_CPUS 1024
|
||||
#define DIAGNOSTICS_RUN_MODE 42
|
||||
#define CPU_OFFLINE -1
|
||||
|
||||
@@ -266,21 +265,31 @@ static int get_one_smt_state(int core)
|
||||
static int get_smt_state(void)
|
||||
{
|
||||
int smt_state = -1;
|
||||
- int i;
|
||||
+ int i, rc;
|
||||
+ int *present_cores;
|
||||
+ int num_present_cores;
|
||||
+
|
||||
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
|
||||
+ if (rc != 0) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ int cpu_state = get_one_smt_state(present_cores[i]);
|
||||
|
||||
- for (i = 0; i < cpus_in_system; i++) {
|
||||
- int cpu_state = get_one_smt_state(i);
|
||||
if (cpu_state == 0)
|
||||
continue;
|
||||
|
||||
if (smt_state == -1)
|
||||
smt_state = cpu_state;
|
||||
+
|
||||
if (smt_state != cpu_state) {
|
||||
smt_state = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
+ free(present_cores);
|
||||
return smt_state;
|
||||
}
|
||||
|
||||
@@ -313,20 +322,36 @@ static int set_smt_state(int smt_state)
|
||||
{
|
||||
int i, j, rc = 0;
|
||||
int error = 0;
|
||||
+ int cpu_base, cpu_id, core_id;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
|
||||
if (!sysattr_is_writeable("online")) {
|
||||
perror("Cannot set smt state");
|
||||
return -1;
|
||||
}
|
||||
|
||||
- for (i = 0; i < threads_in_system; i += threads_per_cpu) {
|
||||
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
|
||||
+
|
||||
+ if (rc != 0) {
|
||||
+ fprintf(stderr, "Failed to retrieve present core list\n");
|
||||
+ return rc;
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+
|
||||
+ core_id = present_cores[i];
|
||||
+ cpu_base = core_id * threads_per_cpu;
|
||||
+
|
||||
/* Online means any thread on this core running, so check all
|
||||
* threads in the core, not just the first. */
|
||||
for (j = 0; j < threads_per_cpu; j++) {
|
||||
- if (!cpu_online(i + j))
|
||||
+ cpu_id = cpu_base + j;
|
||||
+
|
||||
+ if (!cpu_online(cpu_id))
|
||||
continue;
|
||||
|
||||
- rc = set_one_smt_state(i, smt_state);
|
||||
+ rc = set_one_smt_state(cpu_base, smt_state);
|
||||
/* Record an error, but do not check result: if we
|
||||
* have failed to set this core, keep trying
|
||||
* subsequent ones. */
|
||||
@@ -336,10 +361,13 @@ static int set_smt_state(int smt_state)
|
||||
}
|
||||
}
|
||||
|
||||
+ free(present_cores);
|
||||
+
|
||||
if (error) {
|
||||
- fprintf(stderr, "One or more cpus could not be on/offlined\n");
|
||||
+ fprintf(stderr, "One or more CPUs could not be on/offlined\n");
|
||||
return -1;
|
||||
}
|
||||
+
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -459,8 +487,8 @@ static int do_subcores_per_core(char *state)
|
||||
}
|
||||
printf("Subcores per core: %d\n", subcore_state);
|
||||
} else {
|
||||
- /* Kernel decides what values are valid, so no need to
|
||||
- * check here. */
|
||||
+ /* Kernel decides what values are valid, so no need to
|
||||
+ * check here. */
|
||||
subcore_state = strtol(state, NULL, 0);
|
||||
rc = set_attribute(SYSFS_SUBCORES, "%d", subcore_state);
|
||||
if (rc) {
|
||||
@@ -1038,7 +1066,7 @@ static int set_all_threads_off(int cpu, int smt_state)
|
||||
snprintf(path, SYSFS_PATH_MAX, SYSFS_CPUDIR"/%s", i, "online");
|
||||
rc = offline_thread(path);
|
||||
if (rc == -1)
|
||||
- printf("Unable to take cpu%d offline", i);
|
||||
+ printf("Unable to take CPU %d offline\n", i);
|
||||
}
|
||||
|
||||
return rc;
|
||||
@@ -1065,11 +1093,13 @@ static int set_one_core(int smt_state, int core, int state)
|
||||
static int do_online_cores(char *cores, int state)
|
||||
{
|
||||
int smt_state;
|
||||
- int *core_state, *desired_core_state;
|
||||
+ int *core_state = NULL, *desired_core_state = NULL;
|
||||
int i, rc = 0;
|
||||
- int core;
|
||||
+ int core, valid = 0, core_idx = 0;
|
||||
char *str, *token, *end_token;
|
||||
bool first_core = true;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
|
||||
if (cores) {
|
||||
if (!sysattr_is_writeable("online")) {
|
||||
@@ -1083,49 +1113,62 @@ static int do_online_cores(char *cores, int state)
|
||||
}
|
||||
}
|
||||
|
||||
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
|
||||
+ if (rc != 0) {
|
||||
+ fprintf(stderr, "Failed to retrieve present core list\n");
|
||||
+ return rc;
|
||||
+ }
|
||||
+
|
||||
smt_state = get_smt_state();
|
||||
|
||||
- core_state = calloc(cpus_in_system, sizeof(int));
|
||||
- if (!core_state)
|
||||
+ core_state = calloc(num_present_cores, sizeof(int));
|
||||
+ if (!core_state) {
|
||||
+ free(present_cores);
|
||||
return -ENOMEM;
|
||||
+ }
|
||||
|
||||
- for (i = 0; i < cpus_in_system ; i++)
|
||||
- core_state[i] = (get_one_smt_state(i) > 0);
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ core_state[i] = (get_one_smt_state(present_cores[i]) > 0);
|
||||
+ }
|
||||
|
||||
if (!cores) {
|
||||
printf("Cores %s = ", state == 0 ? "offline" : "online");
|
||||
- for (i = 0; i < cpus_in_system; i++) {
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
if (core_state[i] == state) {
|
||||
if (first_core)
|
||||
first_core = false;
|
||||
else
|
||||
printf(",");
|
||||
- printf("%d", i);
|
||||
+ printf("%d", present_cores[i]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
free(core_state);
|
||||
+ free(present_cores);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (smt_state == -1) {
|
||||
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
|
||||
- "cores to have the same number of online threads to continue.\n");
|
||||
+ "cores to have the same number of online threads to continue.\n");
|
||||
do_info();
|
||||
+ free(present_cores);
|
||||
return -1;
|
||||
}
|
||||
|
||||
- desired_core_state = calloc(cpus_in_system, sizeof(int));
|
||||
+ desired_core_state = calloc(num_present_cores, sizeof(int));
|
||||
if (!desired_core_state) {
|
||||
free(core_state);
|
||||
+ free(present_cores);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- for (i = 0; i < cpus_in_system; i++)
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
/*
|
||||
* Not specified on command-line
|
||||
*/
|
||||
desired_core_state[i] = -1;
|
||||
+ }
|
||||
|
||||
str = cores;
|
||||
while (1) {
|
||||
@@ -1141,42 +1184,57 @@ static int do_online_cores(char *cores, int state)
|
||||
rc = -1;
|
||||
continue;
|
||||
}
|
||||
- if (core >= cpus_in_system || core < 0) {
|
||||
+
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ if (core == present_cores[i]) {
|
||||
+ valid = 1;
|
||||
+ core_idx = i;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!valid) {
|
||||
printf("Invalid core to %s: %d\n", state == 0 ? "offline" : "online", core);
|
||||
rc = -1;
|
||||
continue;
|
||||
}
|
||||
- desired_core_state[core] = state;
|
||||
+
|
||||
+ desired_core_state[core_idx] = state;
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
- free(core_state);
|
||||
- free(desired_core_state);
|
||||
- return rc;
|
||||
+ goto cleanup;
|
||||
}
|
||||
|
||||
- for (i = 0; i < cpus_in_system; i++) {
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
if (desired_core_state[i] != -1) {
|
||||
- rc = set_one_core(smt_state, i, state);
|
||||
- if (rc)
|
||||
+ rc = set_one_core(smt_state, present_cores[i], state);
|
||||
+ if (rc) {
|
||||
+ fprintf(stderr, "Failed to set core %d to %s\n", present_cores[i], state == 0 ? "offline" : "online");
|
||||
break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
+cleanup:
|
||||
free(core_state);
|
||||
free(desired_core_state);
|
||||
+ free(present_cores);
|
||||
+
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int do_cores_on(char *state)
|
||||
{
|
||||
int smt_state;
|
||||
- int *core_state;
|
||||
- int cores_now_online = 0;
|
||||
- int i, rc;
|
||||
+ int cores_now_online = 0, core_id = 0;
|
||||
+ int i, rc = 0;
|
||||
int number_to_have, number_to_change = 0, number_changed = 0;
|
||||
+ int *core_state = NULL;
|
||||
int new_state;
|
||||
char *end_state;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
|
||||
if (state) {
|
||||
if (!sysattr_is_writeable("online")) {
|
||||
@@ -1194,24 +1252,33 @@ static int do_cores_on(char *state)
|
||||
if (!core_state)
|
||||
return -ENOMEM;
|
||||
|
||||
- for (i = 0; i < cpus_in_system ; i++) {
|
||||
- core_state[i] = (get_one_smt_state(i) > 0);
|
||||
- if (core_state[i])
|
||||
+ rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
|
||||
+ if (rc != 0) {
|
||||
+ fprintf(stderr, "Failed to retrieve present core list\n");
|
||||
+ free(core_state);
|
||||
+ return rc;
|
||||
+ }
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ int core = present_cores[i];
|
||||
+ core_state[i] = (get_one_smt_state(core) > 0);
|
||||
+ if (core_state[i]) {
|
||||
cores_now_online++;
|
||||
+ }
|
||||
}
|
||||
|
||||
if (!state) {
|
||||
printf("Number of cores online = %d\n", cores_now_online);
|
||||
- free(core_state);
|
||||
- return 0;
|
||||
+ rc = 0;
|
||||
+ goto cleanup;
|
||||
}
|
||||
|
||||
smt_state = get_smt_state();
|
||||
if (smt_state == -1) {
|
||||
printf("Bad or inconsistent SMT state: use ppc64_cpu --smt=on|off to set all\n"
|
||||
- "cores to have the same number of online threads to continue.\n");
|
||||
+ "cores to have the same number of online threads to continue.\n");
|
||||
do_info();
|
||||
- return -1;
|
||||
+ rc = -1;
|
||||
+ goto cleanup;
|
||||
}
|
||||
|
||||
if (!strcmp(state, "all")) {
|
||||
@@ -1227,15 +1294,16 @@ static int do_cores_on(char *state)
|
||||
}
|
||||
|
||||
if (number_to_have == cores_now_online) {
|
||||
- free(core_state);
|
||||
- return 0;
|
||||
+ rc = 0;
|
||||
+ goto cleanup;
|
||||
}
|
||||
|
||||
- if (number_to_have > cpus_in_system) {
|
||||
- printf("Cannot online more cores than are present.\n");
|
||||
+ if (number_to_have <= 0 || number_to_have > cpus_in_system) {
|
||||
+ printf("Error: Invalid number of cores requested: %d, possible values \
|
||||
+ should be in range: (1-%d)\n", number_to_have, cpus_in_system);
|
||||
do_cores_present();
|
||||
- free(core_state);
|
||||
- return -1;
|
||||
+ rc = -1;
|
||||
+ goto cleanup;
|
||||
}
|
||||
|
||||
if (number_to_have > cores_now_online) {
|
||||
@@ -1248,41 +1316,50 @@ static int do_cores_on(char *state)
|
||||
|
||||
if (new_state) {
|
||||
for (i = 0; i < cpus_in_system; i++) {
|
||||
+ core_id = present_cores[i];
|
||||
if (!core_state[i]) {
|
||||
- rc = set_one_core(smt_state, i, new_state);
|
||||
- if (!rc)
|
||||
+ rc = set_one_core(smt_state, core_id, new_state);
|
||||
+ if (!rc) {
|
||||
number_changed++;
|
||||
- if (number_changed >= number_to_change)
|
||||
+ }
|
||||
+ if (number_changed >= number_to_change) {
|
||||
break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
- for (i = cpus_in_system - 1; i > 0; i--) {
|
||||
+ for (i = cpus_in_system - 1; i >= 0; i--) {
|
||||
+ core_id = present_cores[i];
|
||||
if (core_state[i]) {
|
||||
- rc = set_one_core(smt_state, i, new_state);
|
||||
- if (!rc)
|
||||
+ rc = set_one_core(smt_state, core_id, new_state);
|
||||
+ if (!rc) {
|
||||
number_changed++;
|
||||
- if (number_changed >= number_to_change)
|
||||
+ }
|
||||
+ if (number_changed >= number_to_change) {
|
||||
break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (number_changed != number_to_change) {
|
||||
cores_now_online = 0;
|
||||
- for (i = 0; i < cpus_in_system ; i++) {
|
||||
- if (cpu_online(i * threads_per_cpu))
|
||||
+ for (i = 0; i < cpus_in_system; i++) {
|
||||
+ core_id = present_cores[i];
|
||||
+ if (cpu_online(core_id * threads_per_cpu)) {
|
||||
cores_now_online++;
|
||||
+ }
|
||||
}
|
||||
printf("Failed to set requested number of cores online.\n"
|
||||
- "Requested: %d cores, Onlined: %d cores\n",
|
||||
- number_to_have, cores_now_online);
|
||||
- free(core_state);
|
||||
- return -1;
|
||||
+ "Requested: %d cores, Onlined: %d cores\n",
|
||||
+ number_to_have, cores_now_online);
|
||||
+ rc = -1;
|
||||
}
|
||||
|
||||
+cleanup:
|
||||
free(core_state);
|
||||
- return 0;
|
||||
+ free(present_cores);
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
static bool core_is_online(int core)
|
||||
@@ -1294,35 +1371,45 @@ static int do_info(void)
|
||||
{
|
||||
int i, j, thread_num;
|
||||
char online;
|
||||
- int core, subcores = 0;
|
||||
+ int subcores = 0, core_id = 0;
|
||||
+ int *present_cores = NULL;
|
||||
+ int num_present_cores;
|
||||
|
||||
- if (is_subcore_capable())
|
||||
+ if (is_subcore_capable()) {
|
||||
subcores = num_subcores();
|
||||
+ }
|
||||
|
||||
- for (i = 0, core = 0; core < cpus_in_system; i++) {
|
||||
+ int rc = get_present_core_list(&present_cores, &num_present_cores, threads_per_cpu);
|
||||
+ if (rc != 0) {
|
||||
+ fprintf(stderr, "Failed to retrieve present core list\n");
|
||||
+ return rc;
|
||||
+ }
|
||||
|
||||
- if (!core_is_online(i))
|
||||
+ for (i = 0; i < num_present_cores; i++) {
|
||||
+ core_id = present_cores[i];
|
||||
+ if (!core_is_online(core_id)) {
|
||||
continue;
|
||||
+ }
|
||||
|
||||
if (subcores > 1) {
|
||||
- if (core % subcores == 0)
|
||||
- printf("Core %3d:\n", core/subcores);
|
||||
- printf(" Subcore %3d: ", core);
|
||||
+ if (core_id % subcores == 0) {
|
||||
+ printf("Core %3d:\n", core_id / subcores);
|
||||
+ }
|
||||
+ printf(" Subcore %3d: ", core_id);
|
||||
} else {
|
||||
- printf("Core %3d: ", core);
|
||||
+ printf("Core %3d: ", core_id);
|
||||
}
|
||||
|
||||
- thread_num = i * threads_per_cpu;
|
||||
- for (j = 0; j < threads_per_cpu; j++, thread_num++) {
|
||||
+ for (j = 0; j < threads_per_cpu; j++) {
|
||||
+ thread_num = core_id * threads_per_cpu + j;
|
||||
online = cpu_online(thread_num) ? '*' : ' ';
|
||||
printf("%4d%c ", thread_num, online);
|
||||
}
|
||||
printf("\n");
|
||||
- core++;
|
||||
}
|
||||
+ free(present_cores);
|
||||
return 0;
|
||||
}
|
||||
-
|
||||
static void usage(void)
|
||||
{
|
||||
printf(
|
@ -1,6 +1,6 @@
|
||||
Name: powerpc-utils
|
||||
Version: 1.3.13
|
||||
Release: 1%{?dist}
|
||||
Release: 2%{?dist}
|
||||
Summary: PERL-based scripts for maintaining and servicing PowerPC systems
|
||||
|
||||
License: GPL-2.0-only
|
||||
@ -10,6 +10,8 @@ Source1: nx-gzip.udev
|
||||
Patch0: powerpc-utils-1.3.11-manpages.patch
|
||||
|
||||
# upstream patches
|
||||
Patch100: powerpc-utils-1.3.13-cpu_info_helpers.patch
|
||||
Patch101: powerpc-utils-1.3.13-ppc64_cpu-Fix-handling-of-non-contiguous-CPU-IDs.patch
|
||||
|
||||
ExclusiveArch: ppc %{power64}
|
||||
|
||||
@ -211,6 +213,9 @@ systemctl enable hcn-init.service >/dev/null 2>&1 || :
|
||||
|
||||
|
||||
%changelog
|
||||
* Sat Feb 01 2025 Than Ngo <than@redhat.com> - 1.3.13-2
|
||||
- Resolves: RHEL-76285, Fix handling of non-contiguous CPU IDs
|
||||
|
||||
* Wed Nov 20 2024 Than Ngo <than@redhat.com> - 1.3.13-1
|
||||
- Update to 1.3.13
|
||||
Resolves: RHEL-24535
|
||||
|
Loading…
Reference in New Issue
Block a user