Compare commits

...

No commits in common. "c8-stream-1.0" and "c8-stream-rhel8" have entirely different histories.

11 changed files with 1230 additions and 733 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/runc-2abd837.tar.gz
SOURCES/v1.1.12.tar.gz

View File

@ -1 +1 @@
cf7119a838db2963e7af6ecdba90a2cc95ec0d56 SOURCES/runc-2abd837.tar.gz
3fac650358578b8694012a44b1d5b156523c3402 SOURCES/v1.1.12.tar.gz

View File

@ -0,0 +1,508 @@
From 50f50245235097b0c87b31e97b86fd11685232a3 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Thu, 16 Jan 2025 15:40:28 -0800
Subject: [PATCH 1/2] [1.1] Bump runtime-spec to latest git HEAD
This is to include
- https://github.com/opencontainers/runtime-spec/pull/1261
- https://github.com/opencontainers/runtime-spec/pull/1253
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
go.mod | 2 +-
go.sum | 4 +-
.../runtime-spec/specs-go/config.go | 239 ++++++++++++++++--
.../runtime-spec/specs-go/version.go | 6 +-
vendor/modules.txt | 2 +-
5 files changed, 225 insertions(+), 28 deletions(-)
diff --git a/go.mod b/go.mod
index f51b6432..87c8d4b4 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,7 @@ require (
github.com/godbus/dbus/v5 v5.0.6
github.com/moby/sys/mountinfo v0.5.0
github.com/mrunalp/fileutils v0.5.1
- github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
+ github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
github.com/opencontainers/selinux v1.10.0
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646
github.com/sirupsen/logrus v1.8.1
diff --git a/go.sum b/go.sum
index ecabd398..9d3bedc0 100644
--- a/go.sum
+++ b/go.sum
@@ -33,8 +33,8 @@ github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q=
github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU=
+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU=
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
index 6a7a91e5..671f0d01 100644
--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go
@@ -12,10 +12,12 @@ type Spec struct {
Root *Root `json:"root,omitempty"`
// Hostname configures the container's hostname.
Hostname string `json:"hostname,omitempty"`
+ // Domainname configures the container's domainname.
+ Domainname string `json:"domainname,omitempty"`
// Mounts configures additional mounts (on top of Root).
Mounts []Mount `json:"mounts,omitempty"`
// Hooks configures callbacks for container lifecycle events.
- Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"`
+ Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"`
// Annotations contains arbitrary metadata for the container.
Annotations map[string]string `json:"annotations,omitempty"`
@@ -27,6 +29,36 @@ type Spec struct {
Windows *Windows `json:"windows,omitempty" platform:"windows"`
// VM specifies configuration for virtual-machine-based containers.
VM *VM `json:"vm,omitempty" platform:"vm"`
+ // ZOS is platform-specific configuration for z/OS based containers.
+ ZOS *ZOS `json:"zos,omitempty" platform:"zos"`
+}
+
+// Scheduler represents the scheduling attributes for a process. It is based on
+// the Linux sched_setattr(2) syscall.
+type Scheduler struct {
+ // Policy represents the scheduling policy (e.g., SCHED_FIFO, SCHED_RR, SCHED_OTHER).
+ Policy LinuxSchedulerPolicy `json:"policy"`
+
+ // Nice is the nice value for the process, which affects its priority.
+ Nice int32 `json:"nice,omitempty"`
+
+ // Priority represents the static priority of the process.
+ Priority int32 `json:"priority,omitempty"`
+
+ // Flags is an array of scheduling flags.
+ Flags []LinuxSchedulerFlag `json:"flags,omitempty"`
+
+ // The following ones are used by the DEADLINE scheduler.
+
+ // Runtime is the amount of time in nanoseconds during which the process
+ // is allowed to run in a given period.
+ Runtime uint64 `json:"runtime,omitempty"`
+
+ // Deadline is the absolute deadline for the process to complete its execution.
+ Deadline uint64 `json:"deadline,omitempty"`
+
+ // Period is the length of the period in nanoseconds used for determining the process runtime.
+ Period uint64 `json:"period,omitempty"`
}
// Process contains information to start a specific application inside the container.
@@ -49,15 +81,21 @@ type Process struct {
// Capabilities are Linux capabilities that are kept for the process.
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
// Rlimits specifies rlimit options to apply to the process.
- Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"`
+ Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"`
// NoNewPrivileges controls whether additional privileges could be gained by processes in the container.
NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"`
// ApparmorProfile specifies the apparmor profile for the container.
ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"`
// Specify an oom_score_adj for the container.
OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"`
+ // Scheduler specifies the scheduling attributes for a process
+ Scheduler *Scheduler `json:"scheduler,omitempty" platform:"linux"`
// SelinuxLabel specifies the selinux context that the container process is run as.
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
+ // IOPriority contains the I/O priority settings for the cgroup.
+ IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"`
+ // ExecCPUAffinity specifies CPU affinity for exec processes.
+ ExecCPUAffinity *CPUAffinity `json:"execCPUAffinity,omitempty" platform:"linux"`
}
// LinuxCapabilities specifies the list of allowed capabilities that are kept for a process.
@@ -75,6 +113,28 @@ type LinuxCapabilities struct {
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}
+// IOPriority represents I/O priority settings for the container's processes within the process group.
+type LinuxIOPriority struct {
+ Class IOPriorityClass `json:"class"`
+ Priority int `json:"priority"`
+}
+
+// IOPriorityClass represents an I/O scheduling class.
+type IOPriorityClass string
+
+// Possible values for IOPriorityClass.
+const (
+ IOPRIO_CLASS_RT IOPriorityClass = "IOPRIO_CLASS_RT"
+ IOPRIO_CLASS_BE IOPriorityClass = "IOPRIO_CLASS_BE"
+ IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE"
+)
+
+// CPUAffinity specifies process' CPU affinity.
+type CPUAffinity struct {
+ Initial string `json:"initial,omitempty"`
+ Final string `json:"final,omitempty"`
+}
+
// Box specifies dimensions of a rectangle. Used for specifying the size of a console.
type Box struct {
// Height is the vertical dimension of a box.
@@ -86,11 +146,11 @@ type Box struct {
// User specifies specific user (and group) information for the container process.
type User struct {
// UID is the user id.
- UID uint32 `json:"uid" platform:"linux,solaris"`
+ UID uint32 `json:"uid" platform:"linux,solaris,zos"`
// GID is the group id.
- GID uint32 `json:"gid" platform:"linux,solaris"`
+ GID uint32 `json:"gid" platform:"linux,solaris,zos"`
// Umask is the umask for the init process.
- Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris"`
+ Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"`
// AdditionalGids are additional group ids set for the container's process.
AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"`
// Username is the user name.
@@ -110,11 +170,16 @@ type Mount struct {
// Destination is the absolute path where the mount will be placed in the container.
Destination string `json:"destination"`
// Type specifies the mount kind.
- Type string `json:"type,omitempty" platform:"linux,solaris"`
+ Type string `json:"type,omitempty" platform:"linux,solaris,zos"`
// Source specifies the source path of the mount.
Source string `json:"source,omitempty"`
// Options are fstab style mount options.
Options []string `json:"options,omitempty"`
+
+ // UID/GID mappings used for changing file owners w/o calling chown, fs should support it.
+ // Every mount point could have its own mapping.
+ UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty" platform:"linux"`
+ GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty" platform:"linux"`
}
// Hook specifies a command that is run at a particular event in the lifecycle of a container
@@ -130,6 +195,10 @@ type Hook struct {
type Hooks struct {
// Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed.
// It is called in the Runtime Namespace
+ //
+ // Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and
+ // [Hooks.StartContainer] instead, which allow more granular hook control
+ // during the create and start phase.
Prestart []Hook `json:"prestart,omitempty"`
// CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called
// It is called in the Runtime Namespace
@@ -178,10 +247,12 @@ type Linux struct {
// MountLabel specifies the selinux context for the mounts in the container.
MountLabel string `json:"mountLabel,omitempty"`
// IntelRdt contains Intel Resource Director Technology (RDT) information for
- // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container
+ // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container
IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"`
// Personality contains configuration for the Linux personality syscall
Personality *LinuxPersonality `json:"personality,omitempty"`
+ // TimeOffsets specifies the offset for supporting time namespaces.
+ TimeOffsets map[string]LinuxTimeOffset `json:"timeOffsets,omitempty"`
}
// LinuxNamespace is the configuration for a Linux namespace
@@ -211,6 +282,8 @@ const (
UserNamespace LinuxNamespaceType = "user"
// CgroupNamespace for isolating cgroup hierarchies
CgroupNamespace LinuxNamespaceType = "cgroup"
+ // TimeNamespace for isolating the clocks
+ TimeNamespace LinuxNamespaceType = "time"
)
// LinuxIDMapping specifies UID/GID mappings
@@ -223,6 +296,14 @@ type LinuxIDMapping struct {
Size uint32 `json:"size"`
}
+// LinuxTimeOffset specifies the offset for Time Namespace
+type LinuxTimeOffset struct {
+ // Secs is the offset of clock (in secs) in the container
+ Secs int64 `json:"secs,omitempty"`
+ // Nanosecs is the additional offset for Secs (in nanosecs)
+ Nanosecs uint32 `json:"nanosecs,omitempty"`
+}
+
// POSIXRlimit type and restrictions
type POSIXRlimit struct {
// Type of the rlimit to set
@@ -233,12 +314,13 @@ type POSIXRlimit struct {
Soft uint64 `json:"soft"`
}
-// LinuxHugepageLimit structure corresponds to limiting kernel hugepages
+// LinuxHugepageLimit structure corresponds to limiting kernel hugepages.
+// Default to reservation limits if supported. Otherwise fallback to page fault limits.
type LinuxHugepageLimit struct {
- // Pagesize is the hugepage size
- // Format: "<size><unit-prefix>B' (e.g. 64KB, 2MB, 1GB, etc.)
+ // Pagesize is the hugepage size.
+ // Format: "<size><unit-prefix>B' (e.g. 64KB, 2MB, 1GB, etc.).
Pagesize string `json:"pageSize"`
- // Limit is the limit of "hugepagesize" hugetlb usage
+ // Limit is the limit of "hugepagesize" hugetlb reservations (if supported) or usage.
Limit uint64 `json:"limit"`
}
@@ -250,8 +332,8 @@ type LinuxInterfacePriority struct {
Priority uint32 `json:"priority"`
}
-// linuxBlockIODevice holds major:minor format supported in blkio cgroup
-type linuxBlockIODevice struct {
+// LinuxBlockIODevice holds major:minor format supported in blkio cgroup
+type LinuxBlockIODevice struct {
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
@@ -260,7 +342,7 @@ type linuxBlockIODevice struct {
// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice
type LinuxWeightDevice struct {
- linuxBlockIODevice
+ LinuxBlockIODevice
// Weight is the bandwidth rate for the device.
Weight *uint16 `json:"weight,omitempty"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only
@@ -269,7 +351,7 @@ type LinuxWeightDevice struct {
// LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair
type LinuxThrottleDevice struct {
- linuxBlockIODevice
+ LinuxBlockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
@@ -301,6 +383,12 @@ type LinuxMemory struct {
// Total memory limit (memory + swap).
Swap *int64 `json:"swap,omitempty"`
// Kernel memory limit (in bytes).
+ //
+ // Deprecated: kernel-memory limits are not supported in cgroups v2, and
+ // were obsoleted in [kernel v5.4]. This field should no longer be used,
+ // as it may be ignored by runtimes.
+ //
+ // [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0
Kernel *int64 `json:"kernel,omitempty"`
// Kernel memory limit for tcp (in bytes)
KernelTCP *int64 `json:"kernelTCP,omitempty"`
@@ -310,6 +398,10 @@ type LinuxMemory struct {
DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"`
// Enables hierarchical memory accounting
UseHierarchy *bool `json:"useHierarchy,omitempty"`
+ // CheckBeforeUpdate enables checking if a new memory limit is lower
+ // than the current usage during update, and if so, rejecting the new
+ // limit.
+ CheckBeforeUpdate *bool `json:"checkBeforeUpdate,omitempty"`
}
// LinuxCPU for Linux cgroup 'cpu' resource management
@@ -318,6 +410,9 @@ type LinuxCPU struct {
Shares *uint64 `json:"shares,omitempty"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
Quota *int64 `json:"quota,omitempty"`
+ // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a
+ // given period.
+ Burst *uint64 `json:"burst,omitempty"`
// CPU period to be used for hardcapping (in usecs).
Period *uint64 `json:"period,omitempty"`
// How much time realtime scheduling may use (in usecs).
@@ -328,6 +423,8 @@ type LinuxCPU struct {
Cpus string `json:"cpus,omitempty"`
// List of memory nodes in the cpuset. Default is to use any available memory node.
Mems string `json:"mems,omitempty"`
+ // cgroups are configured with minimum weight, 0: default behavior, 1: SCHED_IDLE.
+ Idle *int64 `json:"idle,omitempty"`
}
// LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3)
@@ -364,7 +461,7 @@ type LinuxResources struct {
Pids *LinuxPids `json:"pids,omitempty"`
// BlockIO restriction configuration
BlockIO *LinuxBlockIO `json:"blockIO,omitempty"`
- // Hugetlb limit (in bytes)
+ // Hugetlb limits (in bytes). Default to reservation limits if supported.
HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"`
// Network restriction configuration
Network *LinuxNetwork `json:"network,omitempty"`
@@ -522,11 +619,21 @@ type WindowsMemoryResources struct {
// WindowsCPUResources contains CPU resource management settings.
type WindowsCPUResources struct {
- // Number of CPUs available to the container.
+ // Count is the number of CPUs available to the container. It represents the
+ // fraction of the configured processor `count` in a container in relation
+ // to the processors available in the host. The fraction ultimately
+ // determines the portion of processor cycles that the threads in a
+ // container can use during each scheduling interval, as the number of
+ // cycles per 10,000 cycles.
Count *uint64 `json:"count,omitempty"`
- // CPU shares (relative weight to other containers with cpu shares).
+ // Shares limits the share of processor time given to the container relative
+ // to other workloads on the processor. The processor `shares` (`weight` at
+ // the platform level) is a value between 0 and 10000.
Shares *uint16 `json:"shares,omitempty"`
- // Specifies the portion of processor cycles that this container can use as a percentage times 100.
+ // Maximum determines the portion of processor cycles that the threads in a
+ // container can use during each scheduling interval, as the number of
+ // cycles per 10,000 cycles. Set processor `maximum` to a percentage times
+ // 100.
Maximum *uint16 `json:"maximum,omitempty"`
}
@@ -613,6 +720,23 @@ type Arch string
// LinuxSeccompFlag is a flag to pass to seccomp(2).
type LinuxSeccompFlag string
+const (
+ // LinuxSeccompFlagLog is a seccomp flag to request all returned
+ // actions except SECCOMP_RET_ALLOW to be logged. An administrator may
+ // override this filter flag by preventing specific actions from being
+ // logged via the /proc/sys/kernel/seccomp/actions_logged file. (since
+ // Linux 4.14)
+ LinuxSeccompFlagLog LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_LOG"
+
+ // LinuxSeccompFlagSpecAllow can be used to disable Speculative Store
+ // Bypass mitigation. (since Linux 4.17)
+ LinuxSeccompFlagSpecAllow LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_SPEC_ALLOW"
+
+ // LinuxSeccompFlagWaitKillableRecv can be used to switch to the wait
+ // killable semantics. (since Linux 5.19)
+ LinuxSeccompFlagWaitKillableRecv LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV"
+)
+
// Additional architectures permitted to be used for system calls
// By default only the native architecture of the kernel is permitted
const (
@@ -683,8 +807,9 @@ type LinuxSyscall struct {
Args []LinuxSeccompArg `json:"args,omitempty"`
}
-// LinuxIntelRdt has container runtime resource constraints for Intel RDT
-// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel
+// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA
+// features and flags enabling Intel RDT CMT and MBM features.
+// Intel RDT features are available in Linux 4.14 and newer kernel versions.
type LinuxIntelRdt struct {
// The identity for RDT Class of Service
ClosID string `json:"closID,omitempty"`
@@ -697,4 +822,76 @@ type LinuxIntelRdt struct {
// The unit of memory bandwidth is specified in "percentages" by
// default, and in "MBps" if MBA Software Controller is enabled.
MemBwSchema string `json:"memBwSchema,omitempty"`
+
+ // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of
+ // the last-level cache (LLC) occupancy for the container.
+ EnableCMT bool `json:"enableCMT,omitempty"`
+
+ // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of
+ // total and local memory bandwidth for the container.
+ EnableMBM bool `json:"enableMBM,omitempty"`
+}
+
+// ZOS contains platform-specific configuration for z/OS based containers.
+type ZOS struct {
+ // Devices are a list of device nodes that are created for the container
+ Devices []ZOSDevice `json:"devices,omitempty"`
+}
+
+// ZOSDevice represents the mknod information for a z/OS special device file
+type ZOSDevice struct {
+ // Path to the device.
+ Path string `json:"path"`
+ // Device type, block, char, etc.
+ Type string `json:"type"`
+ // Major is the device's major number.
+ Major int64 `json:"major"`
+ // Minor is the device's minor number.
+ Minor int64 `json:"minor"`
+ // FileMode permission bits for the device.
+ FileMode *os.FileMode `json:"fileMode,omitempty"`
+ // UID of the device.
+ UID *uint32 `json:"uid,omitempty"`
+ // Gid of the device.
+ GID *uint32 `json:"gid,omitempty"`
}
+
+// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler
+type LinuxSchedulerPolicy string
+
+const (
+ // SchedOther is the default scheduling policy
+ SchedOther LinuxSchedulerPolicy = "SCHED_OTHER"
+ // SchedFIFO is the First-In-First-Out scheduling policy
+ SchedFIFO LinuxSchedulerPolicy = "SCHED_FIFO"
+ // SchedRR is the Round-Robin scheduling policy
+ SchedRR LinuxSchedulerPolicy = "SCHED_RR"
+ // SchedBatch is the Batch scheduling policy
+ SchedBatch LinuxSchedulerPolicy = "SCHED_BATCH"
+ // SchedISO is the Isolation scheduling policy
+ SchedISO LinuxSchedulerPolicy = "SCHED_ISO"
+ // SchedIdle is the Idle scheduling policy
+ SchedIdle LinuxSchedulerPolicy = "SCHED_IDLE"
+ // SchedDeadline is the Deadline scheduling policy
+ SchedDeadline LinuxSchedulerPolicy = "SCHED_DEADLINE"
+)
+
+// LinuxSchedulerFlag represents the flags used by the Linux Scheduler.
+type LinuxSchedulerFlag string
+
+const (
+ // SchedFlagResetOnFork represents the reset on fork scheduling flag
+ SchedFlagResetOnFork LinuxSchedulerFlag = "SCHED_FLAG_RESET_ON_FORK"
+ // SchedFlagReclaim represents the reclaim scheduling flag
+ SchedFlagReclaim LinuxSchedulerFlag = "SCHED_FLAG_RECLAIM"
+ // SchedFlagDLOverrun represents the deadline overrun scheduling flag
+ SchedFlagDLOverrun LinuxSchedulerFlag = "SCHED_FLAG_DL_OVERRUN"
+ // SchedFlagKeepPolicy represents the keep policy scheduling flag
+ SchedFlagKeepPolicy LinuxSchedulerFlag = "SCHED_FLAG_KEEP_POLICY"
+ // SchedFlagKeepParams represents the keep parameters scheduling flag
+ SchedFlagKeepParams LinuxSchedulerFlag = "SCHED_FLAG_KEEP_PARAMS"
+ // SchedFlagUtilClampMin represents the utilization clamp minimum scheduling flag
+ SchedFlagUtilClampMin LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MIN"
+ // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag
+ SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX"
+)
diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
index 596af0c2..f6c15f6c 100644
--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go
@@ -6,12 +6,12 @@ const (
// VersionMajor is for an API incompatible changes
VersionMajor = 1
// VersionMinor is for functionality in a backwards-compatible manner
- VersionMinor = 0
+ VersionMinor = 2
// VersionPatch is for backwards-compatible bug fixes
- VersionPatch = 2
+ VersionPatch = 0
// VersionDev indicates development branch. Releases will be empty string.
- VersionDev = "-dev"
+ VersionDev = "+dev"
)
// Version is the specification version that the package types support.
diff --git a/vendor/modules.txt b/vendor/modules.txt
index a5537dfe..40089cd4 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -35,7 +35,7 @@ github.com/moby/sys/mountinfo
# github.com/mrunalp/fileutils v0.5.1
## explicit; go 1.13
github.com/mrunalp/fileutils
-# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
+# github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95
## explicit
github.com/opencontainers/runtime-spec/specs-go
# github.com/opencontainers/selinux v1.10.0
--
2.47.1

View File

@ -1,62 +0,0 @@
From dfb3496c174377b860b62872ce6af951364cc3ac Mon Sep 17 00:00:00 2001
From: Lokesh Mandvekar <lsm5@fedoraproject.org>
Date: Tue, 12 Dec 2017 13:22:42 +0530
Subject: [PATCH] Revert "Apply cgroups earlier"
This reverts commit 7062c7556b71188abc18d7516441ff4b03fbc1fc.
---
libcontainer/process_linux.go | 31 ++++++++++++++-----------------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
index 149b1126..b8a395af 100644
--- a/libcontainer/process_linux.go
+++ b/libcontainer/process_linux.go
@@ -272,6 +272,20 @@ func (p *initProcess) start() error {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
+ if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
+ return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
+ }
+ if err := p.execSetns(); err != nil {
+ return newSystemErrorWithCause(err, "running exec setns process for init")
+ }
+ // Save the standard descriptor names before the container process
+ // can potentially move them (e.g., via dup2()). If we don't do this now,
+ // we won't know at checkpoint time which file descriptor to look up.
+ fds, err := getPipeFds(p.pid())
+ if err != nil {
+ return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
+ }
+ p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
@@ -292,23 +306,6 @@ func (p *initProcess) start() error {
}
}
}()
-
- if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
- return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
- }
-
- if err := p.execSetns(); err != nil {
- return newSystemErrorWithCause(err, "running exec setns process for init")
- }
-
- // Save the standard descriptor names before the container process
- // can potentially move them (e.g., via dup2()). If we don't do this now,
- // we won't know at checkpoint time which file descriptor to look up.
- fds, err := getPipeFds(p.pid())
- if err != nil {
- return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
- }
- p.setExternalDescriptors(fds)
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces")
}
--
2.14.3

View File

@ -1,290 +0,0 @@
From bf6405284aa3870a39b402309003633a1c230ed9 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Wed, 9 Jan 2019 13:40:01 +1100
Subject: [PATCH 1/1] nsenter: clone /proc/self/exe to avoid exposing host
binary to container
There are quite a few circumstances where /proc/self/exe pointing to a
pretty important container binary is a _bad_ thing, so to avoid this we
have to make a copy (preferably doing self-clean-up and not being
writeable).
As a hotfix we require memfd_create(2), but we can always extend this to
use a scratch MNT_DETACH overlayfs or tmpfs. The main downside to this
approach is no page-cache sharing for the runc binary (which overlayfs
would give us) but this is far less complicated.
This is only done during nsenter so that it happens transparently to the
Go code, and any libcontainer users benefit from it. This also makes
ExtraFiles and --preserve-fds handling trivial (because we don't need to
worry about it).
Fixes: CVE-2019-5736
Co-developed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Aleksa Sarai <asarai@suse.de>
Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
---
libcontainer/nsenter/cloned_binary.c | 221 +++++++++++++++++++++++++++
libcontainer/nsenter/nsexec.c | 11 ++
2 files changed, 232 insertions(+)
create mode 100644 libcontainer/nsenter/cloned_binary.c
diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
new file mode 100644
index 00000000..d9f6093a
--- /dev/null
+++ b/libcontainer/nsenter/cloned_binary.c
@@ -0,0 +1,221 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <sys/mman.h>
+#include <sys/sendfile.h>
+#include <sys/syscall.h>
+
+#include <linux/magic.h>
+#include <linux/memfd.h>
+
+/* Use our own wrapper for memfd_create. */
+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
+# define SYS_memfd_create __NR_memfd_create
+#endif
+#ifndef SYS_memfd_create
+# error "memfd_create(2) syscall not supported by this glibc version"
+#endif
+int memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(SYS_memfd_create, name, flags);
+}
+
+/* This comes directly from <linux/fcntl.h>. */
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE 1024
+#endif
+#ifndef F_ADD_SEALS
+# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+#endif
+#ifndef F_SEAL_SEAL
+# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+# define F_SEAL_GROW 0x0004 /* prevent file from growing */
+# define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+
+#define OUR_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
+#define OUR_MEMFD_SEALS \
+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
+
+static void *must_realloc(void *ptr, size_t size)
+{
+ void *old = ptr;
+ do {
+ ptr = realloc(old, size);
+ } while(!ptr);
+ return ptr;
+}
+
+/*
+ * Verify whether we are currently in a self-cloned program (namely, is
+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
+ * for shmem files), and we want to be sure it's actually sealed.
+ */
+static int is_self_cloned(void)
+{
+ int fd, seals;
+
+ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -ENOTRECOVERABLE;
+
+ seals = fcntl(fd, F_GET_SEALS);
+ close(fd);
+ return seals == OUR_MEMFD_SEALS;
+}
+
+/*
+ * Basic wrapper around mmap(2) that gives you the file length so you can
+ * safely treat it as an ordinary buffer. Only gives you read access.
+ */
+static char *read_file(char *path, size_t *length)
+{
+ int fd;
+ char buf[4096], *copy = NULL;
+
+ if (!length)
+ return NULL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return NULL;
+
+ *length = 0;
+ for (;;) {
+ int n;
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 0)
+ goto error;
+ if (!n)
+ break;
+
+ copy = must_realloc(copy, (*length + n) * sizeof(*copy));
+ memcpy(copy + *length, buf, n);
+ *length += n;
+ }
+ close(fd);
+ return copy;
+
+error:
+ close(fd);
+ free(copy);
+ return NULL;
+}
+
+/*
+ * A poor-man's version of "xargs -0". Basically parses a given block of
+ * NUL-delimited data, within the given length and adds a pointer to each entry
+ * to the array of pointers.
+ */
+static int parse_xargs(char *data, int data_length, char ***output)
+{
+ int num = 0;
+ char *cur = data;
+
+ if (!data || *output != NULL)
+ return -1;
+
+ while (cur < data + data_length) {
+ num++;
+ *output = must_realloc(*output, (num + 1) * sizeof(**output));
+ (*output)[num - 1] = cur;
+ cur += strlen(cur) + 1;
+ }
+ (*output)[num] = NULL;
+ return num;
+}
+
+/*
+ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ.
+ * This is necessary because we are running in a context where we don't have a
+ * main() that we can just get the arguments from.
+ */
+static int fetchve(char ***argv, char ***envp)
+{
+ char *cmdline = NULL, *environ = NULL;
+ size_t cmdline_size, environ_size;
+
+ cmdline = read_file("/proc/self/cmdline", &cmdline_size);
+ if (!cmdline)
+ goto error;
+ environ = read_file("/proc/self/environ", &environ_size);
+ if (!environ)
+ goto error;
+
+ if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
+ goto error;
+ if (parse_xargs(environ, environ_size, envp) <= 0)
+ goto error;
+
+ return 0;
+
+error:
+ free(environ);
+ free(cmdline);
+ return -EINVAL;
+}
+
+#define SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
+static int clone_binary(void)
+{
+ int binfd, memfd, err;
+ ssize_t sent = 0;
+
+ memfd = memfd_create(OUR_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ if (memfd < 0)
+ return -ENOTRECOVERABLE;
+
+ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
+ if (binfd < 0)
+ goto error;
+
+ sent = sendfile(memfd, binfd, NULL, SENDFILE_MAX);
+ close(binfd);
+ if (sent < 0)
+ goto error;
+
+ err = fcntl(memfd, F_ADD_SEALS, OUR_MEMFD_SEALS);
+ if (err < 0)
+ goto error;
+
+ return memfd;
+
+error:
+ close(memfd);
+ return -EIO;
+}
+
+int ensure_cloned_binary(void)
+{
+ int execfd;
+ char **argv = NULL, **envp = NULL;
+
+ /* Check that we're not self-cloned, and if we are then bail. */
+ int cloned = is_self_cloned();
+ if (cloned > 0 || cloned == -ENOTRECOVERABLE)
+ return cloned;
+
+ if (fetchve(&argv, &envp) < 0)
+ return -EINVAL;
+
+ execfd = clone_binary();
+ if (execfd < 0)
+ return -EIO;
+
+ fexecve(execfd, argv, envp);
+ return -ENOEXEC;
+}
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
index cb224314..784fd9b0 100644
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -528,6 +528,9 @@ void join_namespaces(char *nslist)
free(namespaces);
}
+/* Defined in cloned_binary.c. */
+int ensure_cloned_binary(void);
+
void nsexec(void)
{
int pipenum;
@@ -543,6 +546,14 @@ void nsexec(void)
if (pipenum == -1)
return;
+ /*
+ * We need to re-exec if we are not in a cloned binary. This is necessary
+ * to ensure that containers won't be able to access the host binary
+ * through /proc/self/exe. See CVE-2019-5736.
+ */
+ if (ensure_cloned_binary() < 0)
+ bail("could not ensure we are a cloned binary");
+
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
--
2.20.1

View File

@ -0,0 +1,475 @@
From 1af672a2635628ca24ce3b5ed3344d316548f1ca Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Mon, 21 Oct 2024 15:50:38 -0700
Subject: [PATCH 2/2] [1.1] runc exec: implement CPU affinity
As per
- https://github.com/opencontainers/runtime-spec/pull/1253
- https://github.com/opencontainers/runtime-spec/pull/1261
CPU affinity can be set in two ways:
1. When creating/starting a container, in config.json's
Process.ExecCPUAffinity, which is when applied to all execs.
2. When running an exec, in process.json's CPUAffinity, which
applied to a given exec and overrides the value from (1).
Add some basic tests.
Note that older kernels (RHEL8, Ubuntu 20.04) change CPU affinity of a
process to that of a container's cgroup, as soon as it is moved to that
cgroup, while newer kernels (Ubuntu 24.04, Fedora 41) don't do that.
Because of the above,
- it's impossible to really test initial CPU affinity without adding
debug logging to libcontainer/nsenter;
- for older kernels, there can be a brief moment when exec's affinity
is different than either initial or final affinity being set;
- exec's final CPU affinity, if not specified, can be different
depending on the kernel, therefore we don't test it.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
libcontainer/configs/config.go | 73 ++++++++++++++++++++
libcontainer/container_linux.go | 4 ++
libcontainer/init_linux.go | 1 +
libcontainer/nsenter/nsexec.c | 36 +++++++++-
libcontainer/process.go | 2 +
libcontainer/process_linux.go | 51 +++++++++++++-
libcontainer/specconv/spec_linux.go | 5 ++
tests/integration/cpu_affinity.bats | 101 ++++++++++++++++++++++++++++
utils_linux.go | 6 ++
9 files changed, 275 insertions(+), 4 deletions(-)
create mode 100644 tests/integration/cpu_affinity.bats
diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go
index 6ebf5ec7..997f2724 100644
--- a/libcontainer/configs/config.go
+++ b/libcontainer/configs/config.go
@@ -3,11 +3,15 @@ package configs
import (
"bytes"
"encoding/json"
+ "errors"
"fmt"
"os/exec"
+ "strconv"
+ "strings"
"time"
"github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -211,6 +215,75 @@ type Config struct {
// RootlessCgroups is set when unlikely to have the full access to cgroups.
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
+
+ // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container.
+ ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"`
+}
+
+type CPUAffinity struct {
+ Initial, Final *unix.CPUSet
+}
+
+func toCPUSet(str string) (*unix.CPUSet, error) {
+ if str == "" {
+ return nil, nil
+ }
+ s := new(unix.CPUSet)
+ for _, r := range strings.Split(str, ",") {
+ // Allow extra spaces around.
+ r = strings.TrimSpace(r)
+ // Allow empty elements (extra commas).
+ if r == "" {
+ continue
+ }
+ if r0, r1, found := strings.Cut(r, "-"); found {
+ start, err := strconv.ParseUint(r0, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+ end, err := strconv.ParseUint(r1, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+ if start > end {
+ return nil, errors.New("invalid range: " + r)
+ }
+ for i := int(start); i <= int(end); i++ {
+ s.Set(i)
+ }
+ } else {
+ val, err := strconv.ParseUint(r, 10, 32)
+ if err != nil {
+ return nil, err
+ }
+ s.Set(int(val))
+ }
+ }
+
+ return s, nil
+}
+
+// ConvertCPUAffinity converts [specs.CPUAffinity] to [CPUAffinity].
+func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
+ if sa == nil {
+ return nil, nil
+ }
+ initial, err := toCPUSet(sa.Initial)
+ if err != nil {
+ return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
+ }
+ final, err := toCPUSet(sa.Final)
+ if err != nil {
+ return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
+ }
+ if initial == nil && final == nil {
+ return nil, nil
+ }
+
+ return &CPUAffinity{
+ Initial: initial,
+ Final: final,
+ }, nil
}
type (
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
index 40b332f9..68b6a74f 100644
--- a/libcontainer/container_linux.go
+++ b/libcontainer/container_linux.go
@@ -692,6 +692,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
+ CPUAffinity: c.config.ExecCPUAffinity,
CreateConsole: process.ConsoleSocket != nil,
ConsoleWidth: process.ConsoleWidth,
ConsoleHeight: process.ConsoleHeight,
@@ -708,6 +709,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
if len(process.Rlimits) > 0 {
cfg.Rlimits = process.Rlimits
}
+ if process.CPUAffinity != nil {
+ cfg.CPUAffinity = process.CPUAffinity
+ }
if cgroups.IsCgroup2UnifiedMode() {
cfg.Cgroup2Path = c.cgroupManager.Path("")
}
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
index d9f18139..1f8562ec 100644
--- a/libcontainer/init_linux.go
+++ b/libcontainer/init_linux.go
@@ -70,6 +70,7 @@ type initConfig struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
SpecState *specs.State `json:"spec_state,omitempty"`
Cgroup2Path string `json:"cgroup2_path,omitempty"`
+ CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"`
}
type initer interface {
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
index 2d224bab..6f70aa87 100644
--- a/libcontainer/nsenter/nsexec.c
+++ b/libcontainer/nsenter/nsexec.c
@@ -149,13 +149,18 @@ int setns(int fd, int nstype)
}
#endif
+bool log_enabled_for(int level)
+{
+ return (logfd >= 0 && level <= loglevel);
+}
+
static void write_log(int level, const char *format, ...)
{
char *message = NULL, *stage = NULL, *json = NULL;
va_list args;
int ret;
- if (logfd < 0 || level > loglevel)
+ if (!log_enabled_for(level))
goto out;
va_start(args, format);
@@ -851,6 +856,25 @@ void try_unshare(int flags, const char *msg)
bail("failed to unshare %s", msg);
}
+void print_cpu_affinity()
+{
+ cpu_set_t cpus = { };
+ size_t i, mask = 0;
+
+ if (sched_getaffinity(0, sizeof(cpus), &cpus) < 0) {
+ write_log(WARNING, "sched_getaffinity: %m");
+ return;
+ }
+
+ /* Do not print the complete mask, we only need a few first CPUs. */
+ for (i = 0; i < sizeof(mask) * 8; i++) {
+ if (CPU_ISSET(i, &cpus))
+ mask |= 1 << i;
+ }
+
+ write_log(DEBUG, "affinity: 0x%zx", mask);
+}
+
void nsexec(void)
{
int pipenum;
@@ -892,6 +916,16 @@ void nsexec(void)
write_log(DEBUG, "=> nsexec container setup");
+ /* This is for ../../tests/integration/cpu_affinity.bats test only.
+ *
+ * Printing this from Go code might be too late as some kernels
+ * change the process' CPU affinity to that of container's cpuset
+ * as soon as the process is moved into container's cgroup.
+ */
+ if (log_enabled_for(DEBUG)) {
+ print_cpu_affinity();
+ }
+
/* Parse all of the netlink configuration. */
nl_parse(pipenum, &config);
diff --git a/libcontainer/process.go b/libcontainer/process.go
index 8a5d340d..99167274 100644
--- a/libcontainer/process.go
+++ b/libcontainer/process.go
@@ -89,6 +89,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string
+
+ CPUAffinity *configs.CPUAffinity
}
// Wait waits for the process to exit.
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
index 0d9ceb9c..3b48ae76 100644
--- a/libcontainer/process_linux.go
+++ b/libcontainer/process_linux.go
@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "runtime"
"strconv"
"time"
@@ -78,12 +79,52 @@ func (p *setnsProcess) signal(sig os.Signal) error {
return unix.Kill(p.pid(), s)
}
+// Starts setns process with specified initial CPU affinity.
+func (p *setnsProcess) startWithCPUAffinity() error {
+ aff := p.config.CPUAffinity
+ if aff == nil || aff.Initial == nil {
+ return p.cmd.Start()
+ }
+ errCh := make(chan error)
+ defer close(errCh)
+
+ // Use a goroutine to dedicate an OS thread.
+ go func() {
+ runtime.LockOSThread()
+ // Command inherits the CPU affinity.
+ if err := unix.SchedSetaffinity(unix.Gettid(), aff.Initial); err != nil {
+ runtime.UnlockOSThread()
+ errCh <- fmt.Errorf("error setting initial CPU affinity: %w", err)
+ return
+ }
+
+ errCh <- p.cmd.Start()
+ // Deliberately omit runtime.UnlockOSThread here.
+ // https://pkg.go.dev/runtime#LockOSThread says:
+ // "If the calling goroutine exits without unlocking the
+ // thread, the thread will be terminated".
+ }()
+
+ return <-errCh
+}
+
+func (p *setnsProcess) setFinalCPUAffinity() error {
+ aff := p.config.CPUAffinity
+ if aff == nil || aff.Final == nil {
+ return nil
+ }
+ if err := unix.SchedSetaffinity(p.pid(), aff.Final); err != nil {
+ return fmt.Errorf("error setting final CPU affinity: %w", err)
+ }
+ return nil
+}
+
func (p *setnsProcess) start() (retErr error) {
defer p.messageSockPair.parent.Close()
- // get the "before" value of oom kill count
+ // Get the "before" value of oom kill count.
oom, _ := p.manager.OOMKillCount()
- err := p.cmd.Start()
- // close the write-side of the pipes (controlled by child)
+ err := p.startWithCPUAffinity()
+ // Close the child-side of the pipes (controlled by child).
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
@@ -143,6 +184,10 @@ func (p *setnsProcess) start() (retErr error) {
}
}
}
+ // Set final CPU affinity right after the process is moved into container's cgroup.
+ if err := p.setFinalCPUAffinity(); err != nil {
+ return err
+ }
if p.intelRdtPath != "" {
// if Intel RDT "resource control" filesystem path exists
_, err := os.Stat(p.intelRdtPath)
diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go
index 7dbfb869..b59e0d59 100644
--- a/libcontainer/specconv/spec_linux.go
+++ b/libcontainer/specconv/spec_linux.go
@@ -493,6 +493,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
+ config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity)
+ if err != nil {
+ return nil, err
+ }
+
}
createHooks(spec, config)
config.Version = specs.Version
diff --git a/tests/integration/cpu_affinity.bats b/tests/integration/cpu_affinity.bats
new file mode 100644
index 00000000..f6adfa2a
--- /dev/null
+++ b/tests/integration/cpu_affinity.bats
@@ -0,0 +1,101 @@
+#!/usr/bin/env bats
+# Exec CPU affinity tests. For more details, see:
+# - https://github.com/opencontainers/runtime-spec/pull/1253
+
+load helpers
+
+function setup() {
+ requires smp cgroups_cpuset
+ setup_busybox
+}
+
+function teardown() {
+ teardown_bundle
+}
+
+function first_cpu() {
+ sed 's/[-,].*//g' </sys/devices/system/cpu/online
+}
+
+# Convert list of cpus ("0,1" or "0-1") to mask as printed by nsexec.
+# NOTE the range conversion is not proper, merely sufficient for tests here.
+function cpus_to_mask() {
+ local cpus=$* mask=0
+
+ cpus=${cpus//,/-} # 1. "," --> "-".
+ cpus=${cpus//-/ } # 2. "-" --> " ".
+
+ for c in $cpus; do
+ mask=$((mask | 1 << c))
+ done
+
+ printf "0x%x" $mask
+}
+
+@test "runc exec [CPU affinity, only initial set from process.json]" {
+ first="$(first_cpu)"
+ second=$((first + 1)) # Hacky; might not work in all environments.
+
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
+ [ "$status" -eq 0 ]
+
+ for cpus in "$second" "$first-$second" "$first,$second" "$first"; do
+ proc='
+{
+ "terminal": false,
+ "execCPUAffinity": {
+ "initial": "'$cpus'"
+ },
+ "args": [ "/bin/true" ],
+ "cwd": "/"
+}'
+ mask=$(cpus_to_mask "$cpus")
+ echo "CPUS: $cpus, mask: $mask"
+ runc --debug exec --process <(echo "$proc") ct1
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
+ done
+}
+
+@test "runc exec [CPU affinity, initial and final set from process.json]" {
+ first="$(first_cpu)"
+ second=$((first + 1)) # Hacky; might not work in all environments.
+
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
+ [ "$status" -eq 0 ]
+
+ for cpus in "$second" "$first-$second" "$first,$second" "$first"; do
+ proc='
+{
+ "terminal": false,
+ "execCPUAffinity": {
+ "initial": "'$cpus'",
+ "final": "'$cpus'"
+ },
+ "args": [ "/bin/grep", "-F", "Cpus_allowed_list:", "/proc/self/status" ],
+ "cwd": "/"
+}'
+ mask=$(cpus_to_mask "$cpus")
+ exp=${cpus//,/-} # "," --> "-".
+ echo "CPUS: $cpus, mask: $mask, final: $exp"
+ runc --debug exec --process <(echo "$proc") ct1
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
+ [[ "$output" == *"Cpus_allowed_list: $exp"* ]] # Mind the literal tab.
+ done
+}
+
+@test "runc exec [CPU affinity, initial and final set from config.json]" {
+ initial="$(first_cpu)"
+ final=$((initial + 1)) # Hacky; might not work in all environments.
+
+ update_config " .process.execCPUAffinity.initial = \"$initial\"
+ | .process.execCPUAffinity.final = \"$final\""
+
+ runc run -d --console-socket "$CONSOLE_SOCKET" ct1
+ [ "$status" -eq 0 ]
+
+ runc --debug exec ct1 grep "Cpus_allowed_list:" /proc/self/status
+ [ "$status" -eq 0 ]
+ mask=$(cpus_to_mask "$initial")
+ [[ "$output" == *"nsexec"*": affinity: $mask"* ]]
+ [[ "$output" == *"Cpus_allowed_list: $final"* ]] # Mind the literal tab.
+}
diff --git a/utils_linux.go b/utils_linux.go
index 60d534e8..30204133 100644
--- a/utils_linux.go
+++ b/utils_linux.go
@@ -109,6 +109,12 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
}
lp.Rlimits = append(lp.Rlimits, rl)
}
+ aff, err := configs.ConvertCPUAffinity(p.ExecCPUAffinity)
+ if err != nil {
+ return nil, err
+ }
+ lp.CPUAffinity = aff
+
return lp, nil
}
--
2.47.1

View File

@ -1,200 +0,0 @@
From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Fri, 25 May 2018 18:04:06 +0200
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
if NOTIFY_SOCKET is used, do not block the main runc process waiting
for events on the notify socket. Change the logic to create a new
process that monitors exclusively the notify socket until an event is
received.
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
init.go | 12 +++++++
notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++---------
signals.go | 5 +--
3 files changed, 99 insertions(+), 19 deletions(-)
diff --git a/init.go b/init.go
index c8f453192..6a3d9e91c 100644
--- a/init.go
+++ b/init.go
@@ -20,6 +20,18 @@ var initCommand = cli.Command{
Name: "init",
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
Action: func(context *cli.Context) error {
+ // If NOTIFY_SOCKET is used create a new process that stays around
+ // so to not block "runc start". It will automatically exits when the
+ // container notifies that it is ready, or when the container is deleted
+ if os.Getenv("_NOTIFY_SOCKET_FD") != "" {
+ fd := os.Getenv("_NOTIFY_SOCKET_FD")
+ pid := os.Getenv("_NOTIFY_SOCKET_PID")
+ hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST")
+ notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH")
+ notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath)
+ os.Exit(0)
+ }
+
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
// as the error is sent back to the parent there is no need to log
diff --git a/notify_socket.go b/notify_socket.go
index cd6c0a989..e04e9d660 100644
--- a/notify_socket.go
+++ b/notify_socket.go
@@ -6,10 +6,13 @@ import (
"bytes"
"fmt"
"net"
+ "os"
+ "os/exec"
"path/filepath"
+ "strconv"
+ "time"
"github.com/opencontainers/runtime-spec/specs-go"
-
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error {
return nil
}
+func (notifySocket *notifySocket) notifyNewPid(pid int) {
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
+ client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
+ if err != nil {
+ return
+ }
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid)
+ client.Write([]byte(newPid))
+}
+
// pid1 must be set only with -d, as it is used to set the new process as the main process
// for the service in systemd
func (notifySocket *notifySocket) run(pid1 int) {
- buf := make([]byte, 512)
- notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
- client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
+ file, err := notifySocket.socket.File()
if err != nil {
logrus.Error(err)
return
}
- for {
- r, err := notifySocket.socket.Read(buf)
- if err != nil {
- break
+ defer file.Close()
+ defer notifySocket.socket.Close()
+
+ cmd := exec.Command("/proc/self/exe", "init")
+ cmd.ExtraFiles = []*os.File{file}
+ cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3",
+ fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1),
+ fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host),
+ fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath))
+
+ if err := cmd.Start(); err != nil {
+ logrus.Fatal(err)
+ }
+ notifySocket.notifyNewPid(cmd.Process.Pid)
+ cmd.Process.Release()
+}
+
+func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) {
+ intFd, err := strconv.Atoi(envFd)
+ if err != nil {
+ return
+ }
+ pid1, err := strconv.Atoi(envPid)
+ if err != nil {
+ return
+ }
+
+ file := os.NewFile(uintptr(intFd), "unixgram")
+ defer file.Close()
+
+ fileChan := make(chan []byte)
+ exitChan := make(chan bool)
+
+ go func() {
+ for {
+ buf := make([]byte, 512)
+ r, err := file.Read(buf)
+ if err != nil {
+ return
+ }
+ fileChan <- buf[0:r]
}
- var out bytes.Buffer
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
- if bytes.HasPrefix(line, []byte("READY=")) {
+ }()
+ go func() {
+ for {
+ if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) {
+ exitChan <- true
+ return
+ }
+ time.Sleep(time.Second)
+ }
+ }()
+
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"}
+ client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
+ if err != nil {
+ return
+ }
+
+ for {
+ select {
+ case <-exitChan:
+ return
+ case b := <-fileChan:
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
+ if !bytes.HasPrefix(line, []byte("READY=")) {
+ continue
+ }
+
+ var out bytes.Buffer
_, err = out.Write(line)
if err != nil {
return
@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) {
}
// now we can inform systemd to use pid1 as the pid to monitor
- if pid1 > 0 {
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
- client.Write([]byte(newPid))
- }
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
+ client.Write([]byte(newPid))
return
}
}
diff --git a/signals.go b/signals.go
index 1811de837..d0988cb39 100644
--- a/signals.go
+++ b/signals.go
@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
h.notifySocket.run(pid1)
return 0, nil
} else {
- go h.notifySocket.run(0)
+ h.notifySocket.run(os.Getpid())
}
}
@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
// status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return.
process.Wait()
- if h.notifySocket != nil {
- h.notifySocket.Close()
- }
return e.status, nil
}
}

View File

@ -1 +0,0 @@
fs.may_detach_mounts=1

View File

@ -1,61 +0,0 @@
diff --git a/list.go b/list.go
index 0313d8c..328798b 100644
--- a/list.go
+++ b/list.go
@@ -50,7 +50,7 @@ var listCommand = cli.Command{
ArgsUsage: `
Where the given root is specified via the global option "--root"
-(default: "/run/runc").
+(default: "/run/runc-ctrs").
EXAMPLE 1:
To list containers created via the default "--root":
diff --git a/main.go b/main.go
index 278399a..0f49fce 100644
--- a/main.go
+++ b/main.go
@@ -62,7 +62,7 @@ func main() {
v = append(v, fmt.Sprintf("spec: %s", specs.Version))
app.Version = strings.Join(v, "\n")
- root := "/run/runc"
+ root := "/run/runc-ctrs"
rootless, err := isRootless(nil)
if err != nil {
fatal(err)
@@ -70,7 +70,7 @@ func main() {
if rootless {
runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
if runtimeDir != "" {
- root = runtimeDir + "/runc"
+ root = runtimeDir + "/runc-ctrs"
// According to the XDG specification, we need to set anything in
// XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get
// auto-pruned.
diff --git a/man/runc-list.8.md b/man/runc-list.8.md
index f737424..107220e 100644
--- a/man/runc-list.8.md
+++ b/man/runc-list.8.md
@@ -6,7 +6,7 @@
# EXAMPLE
Where the given root is specified via the global option "--root"
-(default: "/run/runc").
+(default: "/run/runc-ctrs").
To list containers created via the default "--root":
# runc list
diff --git a/man/runc.8.md b/man/runc.8.md
index 6d0ddff..337bc73 100644
--- a/man/runc.8.md
+++ b/man/runc.8.md
@@ -51,7 +51,7 @@ value for "bundle" is the current directory.
--debug enable debug output for logging
--log value set the log file path where internal debug information is written (default: "/dev/null")
--log-format value set the format used by logs ('text' (default), or 'json') (default: "text")
- --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers)
+ --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc-ctrs" or $XDG_RUNTIME_DIR/runc-ctrs for rootless containers)
--criu value path to the criu binary used for checkpoint and restore (default: "criu")
--systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234"
--rootless value enable rootless mode ('true', 'false', or 'auto') (default: "auto")

View File

@ -1,72 +0,0 @@
From 28a697cce3e4f905dca700eda81d681a30eef9cd Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Fri, 11 Jan 2019 21:53:45 +0100
Subject: [PATCH] rootfs: umount all procfs and sysfs with --no-pivot
When creating a new user namespace, the kernel doesn't allow to mount
a new procfs or sysfs file system if there is not already one instance
fully visible in the current mount namespace.
When using --no-pivot we were effectively inhibiting this protection
from the kernel, as /proc and /sys from the host are still present in
the container mount namespace.
A container without full access to /proc could then create a new user
namespace, and from there able to mount a fully visible /proc, bypassing
the limitations in the container.
A simple reproducer for this issue is:
unshare -mrfp sh -c "mount -t proc none /proc && echo c > /proc/sysrq-trigger"
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
libcontainer/rootfs_linux.go | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index e7c2f8ada..6bd6da74a 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -748,6 +748,41 @@ func pivotRoot(rootfs string) error {
}
func msMoveRoot(rootfs string) error {
+ mountinfos, err := mount.GetMounts()
+ if err != nil {
+ return err
+ }
+
+ absRootfs, err := filepath.Abs(rootfs)
+ if err != nil {
+ return err
+ }
+
+ for _, info := range mountinfos {
+ p, err := filepath.Abs(info.Mountpoint)
+ if err != nil {
+ return err
+ }
+ // Umount every syfs and proc file systems, except those under the container rootfs
+ if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
+ continue
+ }
+ // Be sure umount events are not propagated to the host.
+ if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
+ return err
+ }
+ if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
+ if err != unix.EINVAL && err != unix.EPERM {
+ return err
+ } else {
+ // If we have not privileges for umounting (e.g. rootless), then
+ // cover the path.
+ if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
+ return err
+ }
+ }
+ }
+ }
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
return err
}

View File

@ -1,52 +1,47 @@
%global with_debug 1
%global with_bundled 1
%global with_check 0
%if 0%{?with_debug}
%global _find_debuginfo_dwz_opts %{nil}
%global _dwz_low_mem_die_limit 0
%else
%global debug_package %{nil}
%endif
%if 0%{?rhel} > 7 && ! 0%{?fedora}
%define gobuild(o:) \
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
%endif # distro
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback libtrust_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -linkmode=external -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v %{?**};
%else
%if ! 0%{?gobuild:1}
%define gobuild(o:) GO111MODULE=off go build -buildmode pie -compiler gc -tags="rpm_crashtraceback ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -linkmode=external -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '-Wl,-z,relro -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld '" -a -v %{?**};
%endif
%endif
%global provider github
%global provider_tld com
%global project opencontainers
%global repo runc
# https://github.com/opencontainers/runc
%global provider_prefix %{provider}.%{provider_tld}/%{project}/%{repo}
%global import_path %{provider_prefix}
%global git0 https://github.com/opencontainers/runc
%global commit0 2abd837c8c25b0102ac4ce14f17bc0bc7ddffba7
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
%global import_path %{provider}.%{provider_tld}/%{project}/%{repo}
%global git0 https://%{import_path}
Epoch: 1
Name: %{repo}
Version: 1.0.0
Release: 56.rc5.dev.git%{shortcommit0}%{?dist}
Version: 1.1.12
Release: 6%{?dist}
Summary: CLI for running Open Containers
# https://fedoraproject.org/wiki/PackagingDrafts/Go#Go_Language_Architectures
#ExclusiveArch: %%{go_arches}
# still use arch exclude as the macro above still refers %%{ix86} in RHEL8.4:
# https://bugzilla.redhat.com/show_bug.cgi?id=1905383
ExcludeArch: %{ix86}
License: ASL 2.0
URL: http//%{provider_prefix}
Source0: %{git0}/archive/%{commit0}/%{repo}-%{shortcommit0}.tar.gz
Source1: 99-containers.conf
Patch0: change-default-root.patch
Patch1: 0001-Revert-Apply-cgroups-earlier.patch
Patch2: 1807.patch
Patch3: 0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
Patch4: pivot-root.patch
Requires: criu
Requires(pre): container-selinux >= 2:2.2-2
# If go_compiler is not set to 1, there is no virtual provide. Use golang instead.
BuildRequires: %{?go_compiler:compiler(go-compiler)}%{!?go_compiler:golang} >= 1.6.2
URL: %{git0}
Source0: %{git0}/archive/v%{version}.tar.gz
Patch0: 0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch
Patch1: 0002-1.1-runc-exec-implement-CPU-affinity.patch
Provides: oci-runtime
BuildRequires: golang >= 1.21.4
BuildRequires: git
BuildRequires: go-md2man
BuildRequires: libseccomp-devel
BuildRequires: /usr/bin/go-md2man
BuildRequires: libseccomp-devel >= 2.5
Requires: libseccomp >= 2.5
Requires: criu
%description
The runc command can be used to start containers which are packaged
@ -54,7 +49,7 @@ in accordance with the Open Container Initiative's specifications,
and to manage containers running under runc.
%prep
%autosetup -Sgit -n %{repo}-%{commit0}
%autosetup -Sgit
sed -i '/\#\!\/bin\/bash/d' contrib/completions/bash/%{name}
%build
@ -65,24 +60,19 @@ pushd GOPATH
popd
pushd GOPATH/src/%{import_path}
export GO111MODULE=off
export GOPATH=%{gopath}:$(pwd)/GOPATH
export BUILDTAGS="selinux seccomp"
%gobuild -o %{name} %{import_path}
export CGO_CFLAGS="%{optflags} -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64"
export BUILDTAGS="selinux seccomp no_openssl"
export LDFLAGS="-X main.gitCommit= -X main.version=%{version}"
%gobuild -o %{name} %{import_path}
pushd man
./md2man-all.sh
popd
%install
install -d -p %{buildroot}%{_bindir}
install -p -m 755 %{name} %{buildroot}%{_bindir}
# install man pages
install -d -p %{buildroot}%{_mandir}/man8
install -p -m 644 man/man8/* %{buildroot}%{_mandir}/man8
# install bash completion
install -d -p %{buildroot}%{_datadir}/bash-completion/completions
install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash-completion/completions
make install install-man install-bash DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} LIBDIR=%{_libdir} BINDIR=%{_bindir}
%check
@ -97,12 +87,222 @@ install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash
%{_datadir}/bash-completion/completions/%{name}
%changelog
* Thu Nov 28 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
* Mon Jan 20 2025 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-6
- Add CPU affinity feature from Kir Kolishkin
- Resolves: RHEL-74865
* Tue Oct 01 2024 Kir Kolyshkin <kir@redhat.com> - 1:1.1.12-5
- bump golang buildrequires
- add no_openssl build tag
- Resolves RHEL-55757
* Mon Aug 05 2024 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-4
- rebuild for golang fixes
- Related: RHEL-28452
* Thu Aug 01 2024 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-3
- rebuild for golang fixes
- Related: RHEL-28452
* Fri Jun 21 2024 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-2
- rebuild for CVE-2024-1394
- Resolves: RHEL-24297
* Thu Feb 01 2024 Jindrich Novy <jnovy@redhat.com> - 1:1.1.12-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.12
- Related: Jira:RHEL-2110
* Tue Jan 02 2024 Jindrich Novy <jnovy@redhat.com> - 1:1.1.11-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.11
- Related: Jira:RHEL-2110
* Wed Nov 08 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.10-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.10
- require container-selinux >= 2.224.0 for dmz feature
- Related: Jira:RHEL-2110
* Fri Aug 11 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.9-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.9
- Related: #2176055
* Fri Jul 21 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.8-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.8
- Related: #2176055
* Fri Jun 16 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.7-2
- rebuild for following CVEs:
CVE-2022-41724
- Resolves: #2179972
* Wed May 03 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.7-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.7
- Related: #2176055
* Wed Apr 12 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.6-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.6
- Related: #2176055
* Fri Mar 31 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.5-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.5
- Related: #2176055
* Thu Mar 09 2023 Jindrich Novy <jnovy@redhat.com> - 1:1.1.4-2
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.4
- Related: #2176055
* Fri Aug 26 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.1.4-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.4
- Related: #2061390
* Thu Aug 25 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.1.3-3
- fix "Error: runc: exec failed: unable to start container process:
open /dev/pts/0: operation not permitted: OCI permission denied"
- Related: #2061390
* Wed Jun 15 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.1.3-2
- add patch in attempt to fix gating tests - thanks to Kir Kolyshkin
- Related: #2061390
* Thu Jun 09 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.1.3-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.3
- Related: #2061390
* Fri Jun 03 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.1.2-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.2
- Related: #2061390
* Thu May 12 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.0.3-6
- Fix every podman run invocation generates two "Couldn't stat device
/dev/char/10:200: No such file or directory" lines in the journal
- Related: #2061390
* Wed May 11 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.0.3-5
- BuildRequires: /usr/bin/go-md2man
- Related: #2061390
* Fri Apr 08 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.0.3-4
- Related: #2061390
* Tue Mar 08 2022 Jindrich Novy <jnovy@redhat.com> - 1:1.0.3-3
- require at least libseccomp >= 2.5
- Resolves: #2053990
* Wed Feb 16 2022 Jindrich Novy <jnovy@redhat.com> - 1.0.3-2
- rollback to 1.0.3 due to gating test issues
- Related: #2001445
* Tue Jan 18 2022 Jindrich Novy <jnovy@redhat.com> - 1.1.0-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.1.0
- Related: #2001445
* Mon Dec 06 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.3-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.3
- Related: #2001445
* Wed Aug 25 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.2-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.2
- Related: #1934415
* Fri Aug 06 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.1-5
- do not use versioned provide
- Related: #1934415
* Thu Jul 29 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.1-4
- fix "unknown version" displayed by runc -v
- Related: #1934415
* Mon Jul 26 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.1-3
- be sure to compile runc binaries the right way
- Related: #1934415
* Mon Jul 26 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.1-2
- use Makefile
- Related: #1934415
* Wed Jul 21 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.1-1
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.1
- Related: #1934415
* Thu May 20 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-76.rc95
- updated to rc95 to fix CVE-2021-30465
- Related: #1934415
* Tue May 18 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-75.rc94
- set GO111MODULE=off to fix build
- Related: #1934415
* Fri May 14 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-74.rc94
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.0-rc94
- Related: #1934415
* Tue May 11 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-73.rc93
- fix CVE-2021-30465
- Related: #1934415
* Tue Mar 30 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-72.rc93
- upload rc93 tarball
- Related: #1934415
* Tue Mar 30 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-71.rc93
- update to rc93
- Related: #1934415
* Fri Jan 29 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-70.rc92
- add missing Provides: oci-runtime = 1
- Related: #1883490
* Tue Dec 08 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-69.rc92
- still use ExcludeArch as go_arches macro is broken for 8.4
- Related: #1883490
* Tue Aug 11 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-68.rc92
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.0-rc92
- propagate proper CFLAGS to CGO_CFLAGS to assure code hardening and optimization
- Related: #1821193
* Thu Jul 02 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-67.rc91
- update to https://github.com/opencontainers/runc/releases/tag/v1.0.0-rc91
- Related: #1821193
* Tue May 12 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-66.rc10
- synchronize containter-tools 8.3.0 with 8.2.1
- Related: #1821193
* Wed Feb 12 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-65.rc10
- address CVE-2019-19921 by updating to rc10
- Resolves: #1801887
* Wed Dec 11 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-64.rc9
- use no_openssl in BUILDTAGS (no vendored crypto in runc)
- Related: RHELPLAN-25139
* Mon Dec 09 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-63.rc9
- be sure to use golang >= 1.12.12-4
- Related: RHELPLAN-25139
* Thu Nov 28 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-62.rc9
- rebuild because of CVE-2019-9512 and CVE-2019-9514
- Resolves: #1766328, #1766300
- Resolves: #1766331, #1766303
* Thu Nov 21 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-61.rc9
- update to runc 1.0.0-rc9 release
- amend golang deps
- fixes CVE-2019-16884
- Resolves: #1759651
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-60.rc8
- Resolves: #1721247 - enable fips mode
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-59.rc8
- Resolves: #1720654 - rebase to v1.0.0-rc8
* Thu Apr 11 2019 Eduardo Santiago <santiago@redhat.com> - 1.0.0-57.rc5.dev.git2abd837
- Resolves: #1693424 - podman rootless: cannot specify gid= mount options
* Wed Feb 27 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
- change-default-root patch not needed as there's no docker on rhel8
* Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
- Resolves: #1665770 - rootfs: umount all procfs and sysfs with --no-pivot
- Resolves: CVE-2019-5736
* Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837