import UBI runc-1.1.12-6.module+el8.10.0+22722+0028f543
This commit is contained in:
parent
c2cd91596e
commit
984afe0af6
508
SOURCES/0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch
Normal file
508
SOURCES/0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch
Normal file
@ -0,0 +1,508 @@
|
||||
From 50f50245235097b0c87b31e97b86fd11685232a3 Mon Sep 17 00:00:00 2001
|
||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Date: Thu, 16 Jan 2025 15:40:28 -0800
|
||||
Subject: [PATCH 1/2] [1.1] Bump runtime-spec to latest git HEAD
|
||||
|
||||
This is to include
|
||||
- https://github.com/opencontainers/runtime-spec/pull/1261
|
||||
- https://github.com/opencontainers/runtime-spec/pull/1253
|
||||
|
||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
---
|
||||
go.mod | 2 +-
|
||||
go.sum | 4 +-
|
||||
.../runtime-spec/specs-go/config.go | 239 ++++++++++++++++--
|
||||
.../runtime-spec/specs-go/version.go | 6 +-
|
||||
vendor/modules.txt | 2 +-
|
||||
5 files changed, 225 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/go.mod b/go.mod
|
||||
index f51b6432..87c8d4b4 100644
|
||||
--- a/go.mod
|
||||
+++ b/go.mod
|
||||
@@ -12,7 +12,7 @@ require (
|
||||
github.com/godbus/dbus/v5 v5.0.6
|
||||
github.com/moby/sys/mountinfo v0.5.0
|
||||
github.com/mrunalp/fileutils v0.5.1
|
||||
- github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
|
||||
+ github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
|
||||
github.com/opencontainers/selinux v1.10.0
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
diff --git a/go.sum b/go.sum
|
||||
index ecabd398..9d3bedc0 100644
|
||||
--- a/go.sum
|
||||
+++ b/go.sum
|
||||
@@ -33,8 +33,8 @@ github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9
|
||||
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
|
||||
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
|
||||
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
|
||||
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU=
|
||||
+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU=
|
||||
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
|
||||
index 6a7a91e5..671f0d01 100644
|
||||
--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
|
||||
+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
|
||||
@@ -12,10 +12,12 @@ type Spec struct {
|
||||
Root *Root `json:"root,omitempty"`
|
||||
// Hostname configures the container's hostname.
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
+ // Domainname configures the container's domainname.
|
||||
+ Domainname string `json:"domainname,omitempty"`
|
||||
// Mounts configures additional mounts (on top of Root).
|
||||
Mounts []Mount `json:"mounts,omitempty"`
|
||||
// Hooks configures callbacks for container lifecycle events.
|
||||
- Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"`
|
||||
+ Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"`
|
||||
// Annotations contains arbitrary metadata for the container.
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
|
||||
@@ -27,6 +29,36 @@ type Spec struct {
|
||||
Windows *Windows `json:"windows,omitempty" platform:"windows"`
|
||||
// VM specifies configuration for virtual-machine-based containers.
|
||||
VM *VM `json:"vm,omitempty" platform:"vm"`
|
||||
+ // ZOS is platform-specific configuration for z/OS based containers.
|
||||
+ ZOS *ZOS `json:"zos,omitempty" platform:"zos"`
|
||||
+}
|
||||
+
|
||||
+// Scheduler represents the scheduling attributes for a process. It is based on
|
||||
+// the Linux sched_setattr(2) syscall.
|
||||
+type Scheduler struct {
|
||||
+ // Policy represents the scheduling policy (e.g., SCHED_FIFO, SCHED_RR, SCHED_OTHER).
|
||||
+ Policy LinuxSchedulerPolicy `json:"policy"`
|
||||
+
|
||||
+ // Nice is the nice value for the process, which affects its priority.
|
||||
+ Nice int32 `json:"nice,omitempty"`
|
||||
+
|
||||
+ // Priority represents the static priority of the process.
|
||||
+ Priority int32 `json:"priority,omitempty"`
|
||||
+
|
||||
+ // Flags is an array of scheduling flags.
|
||||
+ Flags []LinuxSchedulerFlag `json:"flags,omitempty"`
|
||||
+
|
||||
+ // The following ones are used by the DEADLINE scheduler.
|
||||
+
|
||||
+ // Runtime is the amount of time in nanoseconds during which the process
|
||||
+ // is allowed to run in a given period.
|
||||
+ Runtime uint64 `json:"runtime,omitempty"`
|
||||
+
|
||||
+ // Deadline is the absolute deadline for the process to complete its execution.
|
||||
+ Deadline uint64 `json:"deadline,omitempty"`
|
||||
+
|
||||
+ // Period is the length of the period in nanoseconds used for determining the process runtime.
|
||||
+ Period uint64 `json:"period,omitempty"`
|
||||
}
|
||||
|
||||
// Process contains information to start a specific application inside the container.
|
||||
@@ -49,15 +81,21 @@ type Process struct {
|
||||
// Capabilities are Linux capabilities that are kept for the process.
|
||||
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
|
||||
// Rlimits specifies rlimit options to apply to the process.
|
||||
- Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"`
|
||||
+ Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"`
|
||||
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
|
||||
NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"`
|
||||
// ApparmorProfile specifies the apparmor profile for the container.
|
||||
ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"`
|
||||
// Specify an oom_score_adj for the container.
|
||||
OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"`
|
||||
+ // Scheduler specifies the scheduling attributes for a process
|
||||
+ Scheduler *Scheduler `json:"scheduler,omitempty" platform:"linux"`
|
||||
// SelinuxLabel specifies the selinux context that the container process is run as.
|
||||
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
|
||||
+ // IOPriority contains the I/O priority settings for the cgroup.
|
||||
+ IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"`
|
||||
+ // ExecCPUAffinity specifies CPU affinity for exec processes.
|
||||
+ ExecCPUAffinity *CPUAffinity `json:"execCPUAffinity,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// LinuxCapabilities specifies the list of allowed capabilities that are kept for a process.
|
||||
@@ -75,6 +113,28 @@ type LinuxCapabilities struct {
|
||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
+// IOPriority represents I/O priority settings for the container's processes within the process group.
|
||||
+type LinuxIOPriority struct {
|
||||
+ Class IOPriorityClass `json:"class"`
|
||||
+ Priority int `json:"priority"`
|
||||
+}
|
||||
+
|
||||
+// IOPriorityClass represents an I/O scheduling class.
|
||||
+type IOPriorityClass string
|
||||
+
|
||||
+// Possible values for IOPriorityClass.
|
||||
+const (
|
||||
+ IOPRIO_CLASS_RT IOPriorityClass = "IOPRIO_CLASS_RT"
|
||||
+ IOPRIO_CLASS_BE IOPriorityClass = "IOPRIO_CLASS_BE"
|
||||
+ IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE"
|
||||
+)
|
||||
+
|
||||
+// CPUAffinity specifies process' CPU affinity.
|
||||
+type CPUAffinity struct {
|
||||
+ Initial string `json:"initial,omitempty"`
|
||||
+ Final string `json:"final,omitempty"`
|
||||
+}
|
||||
+
|
||||
// Box specifies dimensions of a rectangle. Used for specifying the size of a console.
|
||||
type Box struct {
|
||||
// Height is the vertical dimension of a box.
|
||||
@@ -86,11 +146,11 @@ type Box struct {
|
||||
// User specifies specific user (and group) information for the container process.
|
||||
type User struct {
|
||||
// UID is the user id.
|
||||
- UID uint32 `json:"uid" platform:"linux,solaris"`
|
||||
+ UID uint32 `json:"uid" platform:"linux,solaris,zos"`
|
||||
// GID is the group id.
|
||||
- GID uint32 `json:"gid" platform:"linux,solaris"`
|
||||
+ GID uint32 `json:"gid" platform:"linux,solaris,zos"`
|
||||
// Umask is the umask for the init process.
|
||||
- Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris"`
|
||||
+ Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"`
|
||||
// AdditionalGids are additional group ids set for the container's process.
|
||||
AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"`
|
||||
// Username is the user name.
|
||||
@@ -110,11 +170,16 @@ type Mount struct {
|
||||
// Destination is the absolute path where the mount will be placed in the container.
|
||||
Destination string `json:"destination"`
|
||||
// Type specifies the mount kind.
|
||||
- Type string `json:"type,omitempty" platform:"linux,solaris"`
|
||||
+ Type string `json:"type,omitempty" platform:"linux,solaris,zos"`
|
||||
// Source specifies the source path of the mount.
|
||||
Source string `json:"source,omitempty"`
|
||||
// Options are fstab style mount options.
|
||||
Options []string `json:"options,omitempty"`
|
||||
+
|
||||
+ // UID/GID mappings used for changing file owners w/o calling chown, fs should support it.
|
||||
+ // Every mount point could have its own mapping.
|
||||
+ UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty" platform:"linux"`
|
||||
+ GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// Hook specifies a command that is run at a particular event in the lifecycle of a container
|
||||
@@ -130,6 +195,10 @@ type Hook struct {
|
||||
type Hooks struct {
|
||||
// Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed.
|
||||
// It is called in the Runtime Namespace
|
||||
+ //
|
||||
+ // Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and
|
||||
+ // [Hooks.StartContainer] instead, which allow more granular hook control
|
||||
+ // during the create and start phase.
|
||||
Prestart []Hook `json:"prestart,omitempty"`
|
||||
// CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called
|
||||
// It is called in the Runtime Namespace
|
||||
@@ -178,10 +247,12 @@ type Linux struct {
|
||||
// MountLabel specifies the selinux context for the mounts in the container.
|
||||
MountLabel string `json:"mountLabel,omitempty"`
|
||||
// IntelRdt contains Intel Resource Director Technology (RDT) information for
|
||||
- // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container
|
||||
+ // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container
|
||||
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
|
||||
// Personality contains configuration for the Linux personality syscall
|
||||
Personality *LinuxPersonality `json:"personality,omitempty"`
|
||||
+ // TimeOffsets specifies the offset for supporting time namespaces.
|
||||
+ TimeOffsets map[string]LinuxTimeOffset `json:"timeOffsets,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxNamespace is the configuration for a Linux namespace
|
||||
@@ -211,6 +282,8 @@ const (
|
||||
UserNamespace LinuxNamespaceType = "user"
|
||||
// CgroupNamespace for isolating cgroup hierarchies
|
||||
CgroupNamespace LinuxNamespaceType = "cgroup"
|
||||
+ // TimeNamespace for isolating the clocks
|
||||
+ TimeNamespace LinuxNamespaceType = "time"
|
||||
)
|
||||
|
||||
// LinuxIDMapping specifies UID/GID mappings
|
||||
@@ -223,6 +296,14 @@ type LinuxIDMapping struct {
|
||||
Size uint32 `json:"size"`
|
||||
}
|
||||
|
||||
+// LinuxTimeOffset specifies the offset for Time Namespace
|
||||
+type LinuxTimeOffset struct {
|
||||
+ // Secs is the offset of clock (in secs) in the container
|
||||
+ Secs int64 `json:"secs,omitempty"`
|
||||
+ // Nanosecs is the additional offset for Secs (in nanosecs)
|
||||
+ Nanosecs uint32 `json:"nanosecs,omitempty"`
|
||||
+}
|
||||
+
|
||||
// POSIXRlimit type and restrictions
|
||||
type POSIXRlimit struct {
|
||||
// Type of the rlimit to set
|
||||
@@ -233,12 +314,13 @@ type POSIXRlimit struct {
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
-// LinuxHugepageLimit structure corresponds to limiting kernel hugepages
|
||||
+// LinuxHugepageLimit structure corresponds to limiting kernel hugepages.
|
||||
+// Default to reservation limits if supported. Otherwise fallback to page fault limits.
|
||||
type LinuxHugepageLimit struct {
|
||||
- // Pagesize is the hugepage size
|
||||
- // Format: "<size><unit-prefix>B' (e.g. 64KB, 2MB, 1GB, etc.)
|
||||
+ // Pagesize is the hugepage size.
|
||||
+ // Format: "<size><unit-prefix>B' (e.g. 64KB, 2MB, 1GB, etc.).
|
||||
Pagesize string `json:"pageSize"`
|
||||
- // Limit is the limit of "hugepagesize" hugetlb usage
|
||||
+ // Limit is the limit of "hugepagesize" hugetlb reservations (if supported) or usage.
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
@@ -250,8 +332,8 @@ type LinuxInterfacePriority struct {
|
||||
Priority uint32 `json:"priority"`
|
||||
}
|
||||
|
||||
-// linuxBlockIODevice holds major:minor format supported in blkio cgroup
|
||||
-type linuxBlockIODevice struct {
|
||||
+// LinuxBlockIODevice holds major:minor format supported in blkio cgroup
|
||||
+type LinuxBlockIODevice struct {
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number.
|
||||
@@ -260,7 +342,7 @@ type linuxBlockIODevice struct {
|
||||
|
||||
// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice
|
||||
type LinuxWeightDevice struct {
|
||||
- linuxBlockIODevice
|
||||
+ LinuxBlockIODevice
|
||||
// Weight is the bandwidth rate for the device.
|
||||
Weight *uint16 `json:"weight,omitempty"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only
|
||||
@@ -269,7 +351,7 @@ type LinuxWeightDevice struct {
|
||||
|
||||
// LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type LinuxThrottleDevice struct {
|
||||
- linuxBlockIODevice
|
||||
+ LinuxBlockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate uint64 `json:"rate"`
|
||||
}
|
||||
@@ -301,6 +383,12 @@ type LinuxMemory struct {
|
||||
// Total memory limit (memory + swap).
|
||||
Swap *int64 `json:"swap,omitempty"`
|
||||
// Kernel memory limit (in bytes).
|
||||
+ //
|
||||
+ // Deprecated: kernel-memory limits are not supported in cgroups v2, and
|
||||
+ // were obsoleted in [kernel v5.4]. This field should no longer be used,
|
||||
+ // as it may be ignored by runtimes.
|
||||
+ //
|
||||
+ // [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0
|
||||
Kernel *int64 `json:"kernel,omitempty"`
|
||||
// Kernel memory limit for tcp (in bytes)
|
||||
KernelTCP *int64 `json:"kernelTCP,omitempty"`
|
||||
@@ -310,6 +398,10 @@ type LinuxMemory struct {
|
||||
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
|
||||
// Enables hierarchical memory accounting
|
||||
UseHierarchy *bool `json:"useHierarchy,omitempty"`
|
||||
+ // CheckBeforeUpdate enables checking if a new memory limit is lower
|
||||
+ // than the current usage during update, and if so, rejecting the new
|
||||
+ // limit.
|
||||
+ CheckBeforeUpdate *bool `json:"checkBeforeUpdate,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxCPU for Linux cgroup 'cpu' resource management
|
||||
@@ -318,6 +410,9 @@ type LinuxCPU struct {
|
||||
Shares *uint64 `json:"shares,omitempty"`
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
Quota *int64 `json:"quota,omitempty"`
|
||||
+ // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a
|
||||
+ // given period.
|
||||
+ Burst *uint64 `json:"burst,omitempty"`
|
||||
// CPU period to be used for hardcapping (in usecs).
|
||||
Period *uint64 `json:"period,omitempty"`
|
||||
// How much time realtime scheduling may use (in usecs).
|
||||
@@ -328,6 +423,8 @@ type LinuxCPU struct {
|
||||
Cpus string `json:"cpus,omitempty"`
|
||||
// List of memory nodes in the cpuset. Default is to use any available memory node.
|
||||
Mems string `json:"mems,omitempty"`
|
||||
+ // cgroups are configured with minimum weight, 0: default behavior, 1: SCHED_IDLE.
|
||||
+ Idle *int64 `json:"idle,omitempty"`
|
||||
}
|
||||
|
||||
// LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3)
|
||||
@@ -364,7 +461,7 @@ type LinuxResources struct {
|
||||
Pids *LinuxPids `json:"pids,omitempty"`
|
||||
// BlockIO restriction configuration
|
||||
BlockIO *LinuxBlockIO `json:"blockIO,omitempty"`
|
||||
- // Hugetlb limit (in bytes)
|
||||
+ // Hugetlb limits (in bytes). Default to reservation limits if supported.
|
||||
HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"`
|
||||
// Network restriction configuration
|
||||
Network *LinuxNetwork `json:"network,omitempty"`
|
||||
@@ -522,11 +619,21 @@ type WindowsMemoryResources struct {
|
||||
|
||||
// WindowsCPUResources contains CPU resource management settings.
|
||||
type WindowsCPUResources struct {
|
||||
- // Number of CPUs available to the container.
|
||||
+ // Count is the number of CPUs available to the container. It represents the
|
||||
+ // fraction of the configured processor `count` in a container in relation
|
||||
+ // to the processors available in the host. The fraction ultimately
|
||||
+ // determines the portion of processor cycles that the threads in a
|
||||
+ // container can use during each scheduling interval, as the number of
|
||||
+ // cycles per 10,000 cycles.
|
||||
Count *uint64 `json:"count,omitempty"`
|
||||
- // CPU shares (relative weight to other containers with cpu shares).
|
||||
+ // Shares limits the share of processor time given to the container relative
|
||||
+ // to other workloads on the processor. The processor `shares` (`weight` at
|
||||
+ // the platform level) is a value between 0 and 10000.
|
||||
Shares *uint16 `json:"shares,omitempty"`
|
||||
- // Specifies the portion of processor cycles that this container can use as a percentage times 100.
|
||||
+ // Maximum determines the portion of processor cycles that the threads in a
|
||||
+ // container can use during each scheduling interval, as the number of
|
||||
+ // cycles per 10,000 cycles. Set processor `maximum` to a percentage times
|
||||
+ // 100.
|
||||
Maximum *uint16 `json:"maximum,omitempty"`
|
||||
}
|
||||
|
||||
@@ -613,6 +720,23 @@ type Arch string
|
||||
// LinuxSeccompFlag is a flag to pass to seccomp(2).
|
||||
type LinuxSeccompFlag string
|
||||
|
||||
+const (
|
||||
+ // LinuxSeccompFlagLog is a seccomp flag to request all returned
|
||||
+ // actions except SECCOMP_RET_ALLOW to be logged. An administrator may
|
||||
+ // override this filter flag by preventing specific actions from being
|
||||
+ // logged via the /proc/sys/kernel/seccomp/actions_logged file. (since
|
||||
+ // Linux 4.14)
|
||||
+ LinuxSeccompFlagLog LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_LOG"
|
||||
+
|
||||
+ // LinuxSeccompFlagSpecAllow can be used to disable Speculative Store
|
||||
+ // Bypass mitigation. (since Linux 4.17)
|
||||
+ LinuxSeccompFlagSpecAllow LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_SPEC_ALLOW"
|
||||
+
|
||||
+ // LinuxSeccompFlagWaitKillableRecv can be used to switch to the wait
|
||||
+ // killable semantics. (since Linux 5.19)
|
||||
+ LinuxSeccompFlagWaitKillableRecv LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV"
|
||||
+)
|
||||
+
|
||||
// Additional architectures permitted to be used for system calls
|
||||
// By default only the native architecture of the kernel is permitted
|
||||
const (
|
||||
@@ -683,8 +807,9 @@ type LinuxSyscall struct {
|
||||
Args []LinuxSeccompArg `json:"args,omitempty"`
|
||||
}
|
||||
|
||||
-// LinuxIntelRdt has container runtime resource constraints for Intel RDT
|
||||
-// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel
|
||||
+// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA
|
||||
+// features and flags enabling Intel RDT CMT and MBM features.
|
||||
+// Intel RDT features are available in Linux 4.14 and newer kernel versions.
|
||||
type LinuxIntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
@@ -697,4 +822,76 @@ type LinuxIntelRdt struct {
|
||||
// The unit of memory bandwidth is specified in "percentages" by
|
||||
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
+
|
||||
+ // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of
|
||||
+ // the last-level cache (LLC) occupancy for the container.
|
||||
+ EnableCMT bool `json:"enableCMT,omitempty"`
|
||||
+
|
||||
+ // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of
|
||||
+ // total and local memory bandwidth for the container.
|
||||
+ EnableMBM bool `json:"enableMBM,omitempty"`
|
||||
+}
|
||||
+
|
||||
+// ZOS contains platform-specific configuration for z/OS based containers.
|
||||
+type ZOS struct {
|
||||
+ // Devices are a list of device nodes that are created for the container
|
||||
+ Devices []ZOSDevice `json:"devices,omitempty"`
|
||||
+}
|
||||
+
|
||||
+// ZOSDevice represents the mknod information for a z/OS special device file
|
||||
+type ZOSDevice struct {
|
||||
+ // Path to the device.
|
||||
+ Path string `json:"path"`
|
||||
+ // Device type, block, char, etc.
|
||||
+ Type string `json:"type"`
|
||||
+ // Major is the device's major number.
|
||||
+ Major int64 `json:"major"`
|
||||
+ // Minor is the device's minor number.
|
||||
+ Minor int64 `json:"minor"`
|
||||
+ // FileMode permission bits for the device.
|
||||
+ FileMode *os.FileMode `json:"fileMode,omitempty"`
|
||||
+ // UID of the device.
|
||||
+ UID *uint32 `json:"uid,omitempty"`
|
||||
+ // Gid of the device.
|
||||
+ GID *uint32 `json:"gid,omitempty"`
|
||||
}
|
||||
+
|
||||
+// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler
|
||||
+type LinuxSchedulerPolicy string
|
||||
+
|
||||
+const (
|
||||
+ // SchedOther is the default scheduling policy
|
||||
+ SchedOther LinuxSchedulerPolicy = "SCHED_OTHER"
|
||||
+ // SchedFIFO is the First-In-First-Out scheduling policy
|
||||
+ SchedFIFO LinuxSchedulerPolicy = "SCHED_FIFO"
|
||||
+ // SchedRR is the Round-Robin scheduling policy
|
||||
+ SchedRR LinuxSchedulerPolicy = "SCHED_RR"
|
||||
+ // SchedBatch is the Batch scheduling policy
|
||||
+ SchedBatch LinuxSchedulerPolicy = "SCHED_BATCH"
|
||||
+ // SchedISO is the Isolation scheduling policy
|
||||
+ SchedISO LinuxSchedulerPolicy = "SCHED_ISO"
|
||||
+ // SchedIdle is the Idle scheduling policy
|
||||
+ SchedIdle LinuxSchedulerPolicy = "SCHED_IDLE"
|
||||
+ // SchedDeadline is the Deadline scheduling policy
|
||||
+ SchedDeadline LinuxSchedulerPolicy = "SCHED_DEADLINE"
|
||||
+)
|
||||
+
|
||||
+// LinuxSchedulerFlag represents the flags used by the Linux Scheduler.
|
||||
+type LinuxSchedulerFlag string
|
||||
+
|
||||
+const (
|
||||
+ // SchedFlagResetOnFork represents the reset on fork scheduling flag
|
||||
+ SchedFlagResetOnFork LinuxSchedulerFlag = "SCHED_FLAG_RESET_ON_FORK"
|
||||
+ // SchedFlagReclaim represents the reclaim scheduling flag
|
||||
+ SchedFlagReclaim LinuxSchedulerFlag = "SCHED_FLAG_RECLAIM"
|
||||
+ // SchedFlagDLOverrun represents the deadline overrun scheduling flag
|
||||
+ SchedFlagDLOverrun LinuxSchedulerFlag = "SCHED_FLAG_DL_OVERRUN"
|
||||
+ // SchedFlagKeepPolicy represents the keep policy scheduling flag
|
||||
+ SchedFlagKeepPolicy LinuxSchedulerFlag = "SCHED_FLAG_KEEP_POLICY"
|
||||
+ // SchedFlagKeepParams represents the keep parameters scheduling flag
|
||||
+ SchedFlagKeepParams LinuxSchedulerFlag = "SCHED_FLAG_KEEP_PARAMS"
|
||||
+ // SchedFlagUtilClampMin represents the utilization clamp minimum scheduling flag
|
||||
+ SchedFlagUtilClampMin LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MIN"
|
||||
+ // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag
|
||||
+ SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX"
|
||||
+)
|
||||
diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
|
||||
index 596af0c2..f6c15f6c 100644
|
||||
--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
|
||||
+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
|
||||
@@ -6,12 +6,12 @@ const (
|
||||
// VersionMajor is for an API incompatible changes
|
||||
VersionMajor = 1
|
||||
// VersionMinor is for functionality in a backwards-compatible manner
|
||||
- VersionMinor = 0
|
||||
+ VersionMinor = 2
|
||||
// VersionPatch is for backwards-compatible bug fixes
|
||||
- VersionPatch = 2
|
||||
+ VersionPatch = 0
|
||||
|
||||
// VersionDev indicates development branch. Releases will be empty string.
|
||||
- VersionDev = "-dev"
|
||||
+ VersionDev = "+dev"
|
||||
)
|
||||
|
||||
// Version is the specification version that the package types support.
|
||||
diff --git a/vendor/modules.txt b/vendor/modules.txt
|
||||
index a5537dfe..40089cd4 100644
|
||||
--- a/vendor/modules.txt
|
||||
+++ b/vendor/modules.txt
|
||||
@@ -35,7 +35,7 @@ github.com/moby/sys/mountinfo
|
||||
# github.com/mrunalp/fileutils v0.5.1
|
||||
## explicit; go 1.13
|
||||
github.com/mrunalp/fileutils
|
||||
-# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
|
||||
+# github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
|
||||
## explicit
|
||||
github.com/opencontainers/runtime-spec/specs-go
|
||||
# github.com/opencontainers/selinux v1.10.0
|
||||
--
|
||||
2.47.1
|
||||
|
475
SOURCES/0002-1.1-runc-exec-implement-CPU-affinity.patch
Normal file
475
SOURCES/0002-1.1-runc-exec-implement-CPU-affinity.patch
Normal file
@ -0,0 +1,475 @@
|
||||
From 1af672a2635628ca24ce3b5ed3344d316548f1ca Mon Sep 17 00:00:00 2001
|
||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
Date: Mon, 21 Oct 2024 15:50:38 -0700
|
||||
Subject: [PATCH 2/2] [1.1] runc exec: implement CPU affinity
|
||||
|
||||
As per
|
||||
- https://github.com/opencontainers/runtime-spec/pull/1253
|
||||
- https://github.com/opencontainers/runtime-spec/pull/1261
|
||||
|
||||
CPU affinity can be set in two ways:
|
||||
1. When creating/starting a container, in config.json's
|
||||
Process.ExecCPUAffinity, which is when applied to all execs.
|
||||
2. When running an exec, in process.json's CPUAffinity, which
|
||||
applied to a given exec and overrides the value from (1).
|
||||
|
||||
Add some basic tests.
|
||||
|
||||
Note that older kernels (RHEL8, Ubuntu 20.04) change CPU affinity of a
|
||||
process to that of a container's cgroup, as soon as it is moved to that
|
||||
cgroup, while newer kernels (Ubuntu 24.04, Fedora 41) don't do that.
|
||||
|
||||
Because of the above,
|
||||
- it's impossible to really test initial CPU affinity without adding
|
||||
debug logging to libcontainer/nsenter;
|
||||
- for older kernels, there can be a brief moment when exec's affinity
|
||||
is different than either initial or final affinity being set;
|
||||
- exec's final CPU affinity, if not specified, can be different
|
||||
depending on the kernel, therefore we don't test it.
|
||||
|
||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||
---
|
||||
libcontainer/configs/config.go | 73 ++++++++++++++++++++
|
||||
libcontainer/container_linux.go | 4 ++
|
||||
libcontainer/init_linux.go | 1 +
|
||||
libcontainer/nsenter/nsexec.c | 36 +++++++++-
|
||||
libcontainer/process.go | 2 +
|
||||
libcontainer/process_linux.go | 51 +++++++++++++-
|
||||
libcontainer/specconv/spec_linux.go | 5 ++
|
||||
tests/integration/cpu_affinity.bats | 101 ++++++++++++++++++++++++++++
|
||||
utils_linux.go | 6 ++
|
||||
9 files changed, 275 insertions(+), 4 deletions(-)
|
||||
create mode 100644 tests/integration/cpu_affinity.bats
|
||||
|
||||
diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go
|
||||
index 6ebf5ec7..997f2724 100644
|
||||
--- a/libcontainer/configs/config.go
|
||||
+++ b/libcontainer/configs/config.go
|
||||
@@ -3,11 +3,15 @@ package configs
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
+ "errors"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
+ "strconv"
|
||||
+ "strings"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
+ "golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
@@ -211,6 +215,75 @@ type Config struct {
|
||||
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
+
|
||||
+ // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
|
||||
+ ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
|
||||
+}
|
||||
+
|
||||
+type CPUAffinity struct {
|
||||
+ Initial, Final *unix.CPUSet
|
||||
+}
|
||||
+
|
||||
+func toCPUSet(str string) (*unix.CPUSet, error) {
|
||||
+ if str == "" {
|
||||
+ return nil, nil
|
||||
+ }
|
||||
+ s := new(unix.CPUSet)
|
||||
+ for _, r := range strings.Split(str, ",") {
|
||||
+ // Allow extra spaces around.
|
||||
+ r = strings.TrimSpace(r)
|
||||
+ // Allow empty elements (extra commas).
|
||||
+ if r == "" {
|
||||
+ continue
|
||||
+ }
|
||||
+ if r0, r1, found := strings.Cut(r, "-"); found {
|
||||
+ start, err := strconv.ParseUint(r0, 10, 32)
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ end, err := strconv.ParseUint(r1, 10, 32)
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ if start > end {
|
||||
+ return nil, errors.New("invalid range: " + r)
|
||||
+ }
|
||||
+ for i := int(start); i <= int(end); i++ {
|
||||
+ s.Set(i)
|
||||
+ }
|
||||
+ } else {
|
||||
+ val, err := strconv.ParseUint(r, 10, 32)
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ s.Set(int(val))
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return s, nil
|
||||
+}
|
||||
+
|
||||
+// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity].
|
||||
+func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
|
||||
+ if sa == nil {
|
||||
+ return nil, nil
|
||||
+ }
|
||||
+ initial, err := toCPUSet(sa.Initial)
|
||||
+ if err != nil {
|
||||
+ return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
|
||||
+ }
|
||||
+ final, err := toCPUSet(sa.Final)
|
||||
+ if err != nil {
|
||||
+ return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
|
||||
+ }
|
||||
+ if initial == nil && final == nil {
|
||||
+ return nil, nil
|
||||
+ }
|
||||
+
|
||||
+ return &CPUAffinity{
|
||||
+ Initial: initial,
|
||||
+ Final: final,
|
||||
+ }, nil
|
||||
}
|
||||
|
||||
type (
|
||||
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
|
||||
index 40b332f9..68b6a74f 100644
|
||||
--- a/libcontainer/container_linux.go
|
||||
+++ b/libcontainer/container_linux.go
|
||||
@@ -692,6 +692,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
+ CPUAffinity: c.config.ExecCPUAffinity,
|
||||
CreateConsole: process.ConsoleSocket != nil,
|
||||
ConsoleWidth: process.ConsoleWidth,
|
||||
ConsoleHeight: process.ConsoleHeight,
|
||||
@@ -708,6 +709,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
if len(process.Rlimits) > 0 {
|
||||
cfg.Rlimits = process.Rlimits
|
||||
}
|
||||
+ if process.CPUAffinity != nil {
|
||||
+ cfg.CPUAffinity = process.CPUAffinity
|
||||
+ }
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
cfg.Cgroup2Path = c.cgroupManager.Path("")
|
||||
}
|
||||
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
|
||||
index d9f18139..1f8562ec 100644
|
||||
--- a/libcontainer/init_linux.go
|
||||
+++ b/libcontainer/init_linux.go
|
||||
@@ -70,6 +70,7 @@ type initConfig struct {
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
SpecState *specs.State `json:"spec_state,omitempty"`
|
||||
Cgroup2Path string `json:"cgroup2_path,omitempty"`
|
||||
+ CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
|
||||
index 2d224bab..6f70aa87 100644
|
||||
--- a/libcontainer/nsenter/nsexec.c
|
||||
+++ b/libcontainer/nsenter/nsexec.c
|
||||
@@ -149,13 +149,18 @@ int setns(int fd, int nstype)
|
||||
}
|
||||
#endif
|
||||
|
||||
+bool log_enabled_for(int level)
|
||||
+{
|
||||
+ return (logfd >= 0 && level <= loglevel);
|
||||
+}
|
||||
+
|
||||
static void write_log(int level, const char *format, ...)
|
||||
{
|
||||
char *message = NULL, *stage = NULL, *json = NULL;
|
||||
va_list args;
|
||||
int ret;
|
||||
|
||||
- if (logfd < 0 || level > loglevel)
|
||||
+ if (!log_enabled_for(level))
|
||||
goto out;
|
||||
|
||||
va_start(args, format);
|
||||
@@ -851,6 +856,25 @@ void try_unshare(int flags, const char *msg)
|
||||
bail("failed to unshare %s", msg);
|
||||
}
|
||||
|
||||
+void print_cpu_affinity()
|
||||
+{
|
||||
+ cpu_set_t cpus = { };
|
||||
+ size_t i, mask = 0;
|
||||
+
|
||||
+ if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) {
|
||||
+ write_log(WARNING, "sched_getaffinity: %m");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Do not print the complete mask, we only need a few first CPUs. */
|
||||
+ for (i = 0; i < sizeof(mask) * 8; i++) {
|
||||
+ if (CPU_ISSET(i, &cpus))
|
||||
+ mask |= 1 << i;
|
||||
+ }
|
||||
+
|
||||
+ write_log(DEBUG, "affinity: 0x%zx", mask);
|
||||
+}
|
||||
+
|
||||
void nsexec(void)
|
||||
{
|
||||
int pipenum;
|
||||
@@ -892,6 +916,16 @@ void nsexec(void)
|
||||
|
||||
write_log(DEBUG, "=> nsexec container setup");
|
||||
|
||||
+ /* This is for ../../tests/integration/cpu_affinity.bats test only.
|
||||
+ *
|
||||
+ * Printing this from Go code might be too late as some kernels
|
||||
+ * change the process' CPU affinity to that of container's cpuset
|
||||
+ * as soon as the process is moved into container's cgroup.
|
||||
+ */
|
||||
+ if (log_enabled_for(DEBUG)) {
|
||||
+ print_cpu_affinity();
|
||||
+ }
|
||||
+
|
||||
/* Parse all of the netlink configuration. */
|
||||
nl_parse(pipenum, &config);
|
||||
|
||||
diff --git a/libcontainer/process.go b/libcontainer/process.go
|
||||
index 8a5d340d..99167274 100644
|
||||
--- a/libcontainer/process.go
|
||||
+++ b/libcontainer/process.go
|
||||
@@ -89,6 +89,8 @@ type Process struct {
|
||||
//
|
||||
// For cgroup v2, the only key allowed is "".
|
||||
SubCgroupPaths map[string]string
|
||||
+
|
||||
+ CPUAffinity *configs.CPUAffinity
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
|
||||
index 0d9ceb9c..3b48ae76 100644
|
||||
--- a/libcontainer/process_linux.go
|
||||
+++ b/libcontainer/process_linux.go
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
+ "runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
@@ -78,12 +79,52 @@ func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
return unix.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
+// Starts setns process with specified initial CPU affinity.
|
||||
+func (p *setnsProcess) startWithCPUAffinity() error {
|
||||
+ aff := p.config.CPUAffinity
|
||||
+ if aff == nil || aff.Initial == nil {
|
||||
+ return p.cmd.Start()
|
||||
+ }
|
||||
+ errCh := make(chan error)
|
||||
+ defer close(errCh)
|
||||
+
|
||||
+ // Use a goroutine to dedicate an OS thread.
|
||||
+ go func() {
|
||||
+ runtime.LockOSThread()
|
||||
+ // Command inherits the CPU affinity.
|
||||
+ if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil {
|
||||
+ runtime.UnlockOSThread()
|
||||
+ errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err)
|
||||
+ return
|
||||
+ }
|
||||
+
|
||||
+ errCh <- p.cmd.Start()
|
||||
+ // Deliberately omit runtime.UnlockOSThread here.
|
||||
+ // https://pkg.go.dev/runtime#LockOSThread says:
|
||||
+ // "If the calling goroutine exits without unlocking the
|
||||
+ // thread, the thread will be terminated".
|
||||
+ }()
|
||||
+
|
||||
+ return <-errCh
|
||||
+}
|
||||
+
|
||||
+func (p *setnsProcess) setFinalCPUAffinity() error {
|
||||
+ aff := p.config.CPUAffinity
|
||||
+ if aff == nil || aff.Final == nil {
|
||||
+ return nil
|
||||
+ }
|
||||
+ if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil {
|
||||
+ return fmt.Errorf("error setting final CPU affinity: %w", err)
|
||||
+ }
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
func (p *setnsProcess) start() (retErr error) {
|
||||
defer p.messageSockPair.parent.Close()
|
||||
- // get the "before" value of oom kill count
|
||||
+ // Get the "before" value of oom kill count.
|
||||
oom, _ := p.manager.OOMKillCount()
|
||||
- err := p.cmd.Start()
|
||||
- // close the write-side of the pipes (controlled by child)
|
||||
+ err := p.startWithCPUAffinity()
|
||||
+ // Close the child-side of the pipes (controlled by child).
|
||||
p.messageSockPair.child.Close()
|
||||
p.logFilePair.child.Close()
|
||||
if err != nil {
|
||||
@@ -143,6 +184,10 @@ func (p *setnsProcess) start() (retErr error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
+ // Set final CPU affinity right after the process is moved into container's cgroup.
|
||||
+ if err := p.setFinalCPUAffinity(); err != nil {
|
||||
+ return err
|
||||
+ }
|
||||
if p.intelRdtPath != "" {
|
||||
// if Intel RDT "resource control" filesystem path exists
|
||||
_, err := os.Stat(p.intelRdtPath)
|
||||
diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go
|
||||
index 7dbfb869..b59e0d59 100644
|
||||
--- a/libcontainer/specconv/spec_linux.go
|
||||
+++ b/libcontainer/specconv/spec_linux.go
|
||||
@@ -493,6 +493,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
||||
Ambient: spec.Process.Capabilities.Ambient,
|
||||
}
|
||||
}
|
||||
+ config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity)
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+
|
||||
}
|
||||
createHooks(spec, config)
|
||||
config.Version = specs.Version
|
||||
diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats
|
||||
new file mode 100644
|
||||
index 00000000..f6adfa2a
|
||||
--- /dev/null
|
||||
+++ b/tests/integration/cpu_affinity.bats
|
||||
@@ -0,0 +1,101 @@
|
||||
+#!/usr/bin/env bats
|
||||
+# Exec CPU affinity tests. For more details, see:
|
||||
+# - https://github.com/opencontainers/runtime-spec/pull/1253
|
||||
+
|
||||
+load helpers
|
||||
+
|
||||
+function setup() {
|
||||
+ requires smp cgroups_cpuset
|
||||
+ setup_busybox
|
||||
+}
|
||||
+
|
||||
+function teardown() {
|
||||
+ teardown_bundle
|
||||
+}
|
||||
+
|
||||
+function first_cpu() {
|
||||
+ sed 's/[-,].*//g' </sys/devices/system/cpu/online
|
||||
+}
|
||||
+
|
||||
+# Convert list of cpus ("0,1" or "0-1") to mask as printed by nsexec.
|
||||
+# NOTE the range conversion is not proper, merely sufficient for tests here.
|
||||
+function cpus_to_mask() {
|
||||
+ local cpus=$* mask=0
|
||||
+
|
||||
+ cpus=${cpus//,/-} # 1. "," --> "-".
|
||||
+ cpus=${cpus//-/ } # 2. "-" --> " ".
|
||||
+
|
||||
+ for c in $cpus; do
|
||||
+ mask=$((mask | 1 << c))
|
||||
+ done
|
||||
+
|
||||
+ printf "0x%x" $mask
|
||||
+}
|
||||
+
|
||||
+@test "runc exec [CPU affinity, only initial set from process.json]" {
|
||||
+ first="$(first_cpu)"
|
||||
+ second=$((first + 1)) # Hacky; might not work in all environments.
|
||||
+
|
||||
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
|
||||
+ [ "$status" -eq 0 ]
|
||||
+
|
||||
+ for cpus in "$second" "$first-$second" "$first,$second" "$first"; do
|
||||
+ proc='
|
||||
+{
|
||||
+ "terminal": false,
|
||||
+ "execCPUAffinity": {
|
||||
+ "initial": "'$cpus'"
|
||||
+ },
|
||||
+ "args": [ "/bin/true" ],
|
||||
+ "cwd": "/"
|
||||
+}'
|
||||
+ mask=$(cpus_to_mask "$cpus")
|
||||
+ echo "CPUS: $cpus, mask: $mask"
|
||||
+ runc --debug exec --process <(echo "$proc") ct1
|
||||
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
|
||||
+ done
|
||||
+}
|
||||
+
|
||||
+@test "runc exec [CPU affinity, initial and final set from process.json]" {
|
||||
+ first="$(first_cpu)"
|
||||
+ second=$((first + 1)) # Hacky; might not work in all environments.
|
||||
+
|
||||
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
|
||||
+ [ "$status" -eq 0 ]
|
||||
+
|
||||
+ for cpus in "$second" "$first-$second" "$first,$second" "$first"; do
|
||||
+ proc='
|
||||
+{
|
||||
+ "terminal": false,
|
||||
+ "execCPUAffinity": {
|
||||
+ "initial": "'$cpus'",
|
||||
+ "final": "'$cpus'"
|
||||
+ },
|
||||
+ "args": [ "/bin/grep", "-F", "Cpus_allowed_list:", "/proc/self/status" ],
|
||||
+ "cwd": "/"
|
||||
+}'
|
||||
+ mask=$(cpus_to_mask "$cpus")
|
||||
+ exp=${cpus//,/-} # "," --> "-".
|
||||
+ echo "CPUS: $cpus, mask: $mask, final: $exp"
|
||||
+ runc --debug exec --process <(echo "$proc") ct1
|
||||
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
|
||||
+ [[ "$output" == *"Cpus_allowed_list: $exp"* ]] # Mind the literal tab.
|
||||
+ done
|
||||
+}
|
||||
+
|
||||
+@test "runc exec [CPU affinity, initial and final set from config.json]" {
|
||||
+ initial="$(first_cpu)"
|
||||
+ final=$((initial + 1)) # Hacky; might not work in all environments.
|
||||
+
|
||||
+ update_config " .process.execCPUAffinity.initial = \"$initial\"
|
||||
+ | .process.execCPUAffinity.final = \"$final\""
|
||||
+
|
||||
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
|
||||
+ [ "$status" -eq 0 ]
|
||||
+
|
||||
+ runc --debug exec ct1 grep "Cpus_allowed_list:" /proc/self/status
|
||||
+ [ "$status" -eq 0 ]
|
||||
+ mask=$(cpus_to_mask "$initial")
|
||||
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
|
||||
+ [[ "$output" == *"Cpus_allowed_list: $final"* ]] # Mind the literal tab.
|
||||
+}
|
||||
diff --git a/utils_linux.go b/utils_linux.go
|
||||
index 60d534e8..30204133 100644
|
||||
--- a/utils_linux.go
|
||||
+++ b/utils_linux.go
|
||||
@@ -109,6 +109,12 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
|
||||
}
|
||||
lp.Rlimits = append(lp.Rlimits, rl)
|
||||
}
|
||||
+ aff, err := configs.ConvertCPUAffinity(p.ExecCPUAffinity)
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ lp.CPUAffinity = aff
|
||||
+
|
||||
return lp, nil
|
||||
}
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
@ -23,7 +23,7 @@ go build -buildmode pie -compiler gc -tags="rpm_crashtraceback libtrust_openssl
|
||||
Epoch: 1
|
||||
Name: %{repo}
|
||||
Version: 1.1.12
|
||||
Release: 5%{?dist}
|
||||
Release: 6%{?dist}
|
||||
Summary: CLI for running Open Containers
|
||||
# https://fedoraproject.org/wiki/PackagingDrafts/Go#Go_Language_Architectures
|
||||
#ExclusiveArch: %%{go_arches}
|
||||
@ -33,6 +33,8 @@ ExcludeArch: %{ix86}
|
||||
License: ASL 2.0
|
||||
URL: %{git0}
|
||||
Source0: %{git0}/archive/v%{version}.tar.gz
|
||||
Patch0: 0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch
|
||||
Patch1: 0002-1.1-runc-exec-implement-CPU-affinity.patch
|
||||
Provides: oci-runtime
|
||||
BuildRequires: golang >= 1.21.4
|
||||
BuildRequires: git
|
||||
@ -85,6 +87,10 @@ make install install-man install-bash DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix}
|
||||
%{_datadir}/bash-completion/completions/%{name}
|
||||
|
||||
%changelog
|
||||
* Mon Jan 20 2025 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-6
|
||||
- Add CPU affinity feature from Kir Kolishkin
|
||||
- Resolves: RHEL-74865
|
||||
|
||||
* Tue Oct 01 2024 Kir Kolyshkin <kir@redhat.com> - 1:1.1.12-5
|
||||
- bump golang buildrequires
|
||||
- add no_openssl build tag
|
||||
|
Loading…
Reference in New Issue
Block a user