150 lines
5.0 KiB
Diff
150 lines
5.0 KiB
Diff
commit f1a8ed892e18b83cb0483e8f8f8cbc512fa8510c
|
|
Author: Laurent Dufour <ldufour@linux.ibm.com>
|
|
Date: Thu Aug 10 11:47:07 2023 +0200
|
|
|
|
ppc64_cpu/info: fix bad report when non continuous CPU ids
|
|
|
|
When CPU ids are not continuous, let say that the kernel didn't reuse a set
|
|
of CPU ids already used on a different nodes, the output of ppc64_cpu
|
|
--info is not correct.
|
|
|
|
For instance, in the example below the CPU id 48 to 55 haven't been reused
|
|
by the kernel when a CPU has been added after a LPM operation.
|
|
Note that the system is running in SMT=4.
|
|
|
|
The numactl -H command is providing the correct set of CPU:
|
|
ltczep3-lp4:~ # numactl -H
|
|
available: 2 nodes (0-1)
|
|
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 64 65 66 67 68 69 70 71
|
|
node 0 size: 7177 MB
|
|
node 0 free: 4235 MB
|
|
node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
|
node 1 size: 24508 MB
|
|
node 1 free: 23539 MB
|
|
node distances:
|
|
node 0 1
|
|
0: 10 40
|
|
1: 40 10
|
|
|
|
But ppc64_cpu --info is reporting the CPUs 48 to 55 offlined while not
|
|
reporting at all the CPU 65 to 71:
|
|
ltczep3-lp4:~ # ppc64_cpu --info
|
|
Core 0: 0* 1* 2* 3* 4* 5* 6* 7*
|
|
Core 1: 8* 9* 10* 11* 12* 13* 14* 15*
|
|
Core 2: 16* 17* 18* 19* 20* 21* 22* 23*
|
|
Core 3: 24* 25* 26* 27* 28* 29* 30* 31*
|
|
Core 4: 32* 33* 34* 35* 36* 37* 38* 39*
|
|
Core 5: 40* 41* 42* 43* 44* 45* 46* 47*
|
|
Core 6: 48 49 50 51 52 53 54 55
|
|
|
|
This is because it is considering that the CPU id are continuous which is
|
|
not the case here.
|
|
|
|
To prevent that, when looking for a core, it is now first checking that the
|
|
physical_id of the first thread in that core is defined (not -1). If that
|
|
the case this means that CPU/core is present.
|
|
|
|
With that patch applied, ppc64_cpu --info is reporting:
|
|
ltczep3-lp4:~ # pc64_cpu --info
|
|
Core 0: 0* 1* 2* 3* 4 5 6 7
|
|
Core 1: 8* 9* 10* 11* 12 13 14 15
|
|
Core 2: 16* 17* 18* 19* 20 21 22 23
|
|
Core 3: 24* 25* 26* 27* 28 29 30 31
|
|
Core 4: 32* 33* 34* 35* 36 37 38 39
|
|
Core 5: 40* 41* 42* 43* 44 45 46 47
|
|
Core 6: 64* 65* 66* 67* 68 69 70 71
|
|
|
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
|
|
|
diff --git a/src/common/cpu_info_helpers.c b/src/common/cpu_info_helpers.c
|
|
index c05d96d..8c57db8 100644
|
|
--- a/src/common/cpu_info_helpers.c
|
|
+++ b/src/common/cpu_info_helpers.c
|
|
@@ -83,6 +83,20 @@ int __sysattr_is_writeable(char *attribute, int threads_in_system)
|
|
return test_sysattr(attribute, W_OK, threads_in_system);
|
|
}
|
|
|
|
+int cpu_physical_id(int thread)
|
|
+{
|
|
+ char path[SYSFS_PATH_MAX];
|
|
+ int rc, physical_id;
|
|
+
|
|
+ sprintf(path, SYSFS_CPUDIR"/physical_id", thread);
|
|
+ rc = get_attribute(path, "%d", &physical_id);
|
|
+
|
|
+ /* This attribute does not exist in kernels without hotplug enabled */
|
|
+ if (rc && errno == ENOENT)
|
|
+ return -1;
|
|
+ return physical_id;
|
|
+}
|
|
+
|
|
int cpu_online(int thread)
|
|
{
|
|
char path[SYSFS_PATH_MAX];
|
|
diff --git a/src/common/cpu_info_helpers.h b/src/common/cpu_info_helpers.h
|
|
index 8f09d79..c063fff 100644
|
|
--- a/src/common/cpu_info_helpers.h
|
|
+++ b/src/common/cpu_info_helpers.h
|
|
@@ -32,6 +32,7 @@
|
|
|
|
extern int __sysattr_is_readable(char *attribute, int threads_in_system);
|
|
extern int __sysattr_is_writeable(char *attribute, int threads_in_system);
|
|
+extern int cpu_physical_id(int thread);
|
|
extern int cpu_online(int thread);
|
|
extern int is_subcore_capable(void);
|
|
extern int num_subcores(void);
|
|
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
|
|
index 5fdf86a..ad9f4dc 100644
|
|
--- a/src/ppc64_cpu.c
|
|
+++ b/src/ppc64_cpu.c
|
|
@@ -1251,31 +1251,40 @@ static int do_cores_on(char *state)
|
|
return 0;
|
|
}
|
|
|
|
+static bool core_is_online(int core)
|
|
+{
|
|
+ return cpu_physical_id(core * threads_per_cpu) != -1;
|
|
+}
|
|
+
|
|
static int do_info(void)
|
|
{
|
|
int i, j, thread_num;
|
|
char online;
|
|
- int subcores = 0;
|
|
+ int core, subcores = 0;
|
|
|
|
if (is_subcore_capable())
|
|
subcores = num_subcores();
|
|
|
|
- for (i = 0; i < cpus_in_system; i++) {
|
|
+ for (i = 0, core = 0; core < cpus_in_system; i++) {
|
|
+
|
|
+ if (!core_is_online(i))
|
|
+ continue;
|
|
|
|
if (subcores > 1) {
|
|
- if (i % subcores == 0)
|
|
- printf("Core %3d:\n", i/subcores);
|
|
- printf(" Subcore %3d: ", i);
|
|
+ if (core % subcores == 0)
|
|
+ printf("Core %3d:\n", core/subcores);
|
|
+ printf(" Subcore %3d: ", core);
|
|
} else {
|
|
- printf("Core %3d: ", i);
|
|
+ printf("Core %3d: ", core);
|
|
}
|
|
|
|
- for (j = 0; j < threads_per_cpu; j++) {
|
|
- thread_num = i*threads_per_cpu + j;
|
|
+ thread_num = i * threads_per_cpu;
|
|
+ for (j = 0; j < threads_per_cpu; j++, thread_num++) {
|
|
online = cpu_online(thread_num) ? '*' : ' ';
|
|
printf("%4d%c ", thread_num, online);
|
|
}
|
|
printf("\n");
|
|
+ core++;
|
|
}
|
|
return 0;
|
|
}
|