diff --git a/.gitignore b/.gitignore index d9972d0..d42edf0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/v1.1.12.tar.gz +SOURCES/v1.2.5.tar.gz diff --git a/.runc.metadata b/.runc.metadata index 88ea42c..d8eb787 100644 --- a/.runc.metadata +++ b/.runc.metadata @@ -1 +1 @@ -3fac650358578b8694012a44b1d5b156523c3402 SOURCES/v1.1.12.tar.gz +35e5289a5b1ac1a12a35c3475b7d0bee2232ef39 SOURCES/v1.2.5.tar.gz diff --git a/SOURCES/0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch b/SOURCES/0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch deleted file mode 100644 index 2a48db6..0000000 --- a/SOURCES/0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch +++ /dev/null @@ -1,508 +0,0 @@ -From 50f50245235097b0c87b31e97b86fd11685232a3 Mon Sep 17 00:00:00 2001 -From: Kir Kolyshkin -Date: Thu, 16 Jan 2025 15:40:28 -0800 -Subject: [PATCH 1/2] [1.1] Bump runtime-spec to latest git HEAD - -This is to include - - https://github.com/opencontainers/runtime-spec/pull/1261 - - https://github.com/opencontainers/runtime-spec/pull/1253 - -Signed-off-by: Kir Kolyshkin ---- - go.mod | 2 +- - go.sum | 4 +- - .../runtime-spec/specs-go/config.go | 239 ++++++++++++++++-- - .../runtime-spec/specs-go/version.go | 6 +- - vendor/modules.txt | 2 +- - 5 files changed, 225 insertions(+), 28 deletions(-) - -diff --git a/go.mod b/go.mod -index f51b6432..87c8d4b4 100644 ---- a/go.mod -+++ b/go.mod -@@ -12,7 +12,7 @@ require ( - github.com/godbus/dbus/v5 v5.0.6 - github.com/moby/sys/mountinfo v0.5.0 - github.com/mrunalp/fileutils v0.5.1 -- github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 -+ github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 - github.com/opencontainers/selinux v1.10.0 - github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 - github.com/sirupsen/logrus v1.8.1 -diff --git a/go.sum b/go.sum -index ecabd398..9d3bedc0 100644 ---- a/go.sum -+++ b/go.sum -@@ -33,8 +33,8 @@ github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9 - github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU= - github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= - github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= --github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= --github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU= -+github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= - github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU= - github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI= - github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go -index 6a7a91e5..671f0d01 100644 ---- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go -+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go -@@ -12,10 +12,12 @@ type Spec struct { - Root *Root `json:"root,omitempty"` - // Hostname configures the container's hostname. - Hostname string `json:"hostname,omitempty"` -+ // Domainname configures the container's domainname. -+ Domainname string `json:"domainname,omitempty"` - // Mounts configures additional mounts (on top of Root). - Mounts []Mount `json:"mounts,omitempty"` - // Hooks configures callbacks for container lifecycle events. -- Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` -+ Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris,zos"` - // Annotations contains arbitrary metadata for the container. - Annotations map[string]string `json:"annotations,omitempty"` - -@@ -27,6 +29,36 @@ type Spec struct { - Windows *Windows `json:"windows,omitempty" platform:"windows"` - // VM specifies configuration for virtual-machine-based containers. - VM *VM `json:"vm,omitempty" platform:"vm"` -+ // ZOS is platform-specific configuration for z/OS based containers. -+ ZOS *ZOS `json:"zos,omitempty" platform:"zos"` -+} -+ -+// Scheduler represents the scheduling attributes for a process. It is based on -+// the Linux sched_setattr(2) syscall. -+type Scheduler struct { -+ // Policy represents the scheduling policy (e.g., SCHED_FIFO, SCHED_RR, SCHED_OTHER). -+ Policy LinuxSchedulerPolicy `json:"policy"` -+ -+ // Nice is the nice value for the process, which affects its priority. -+ Nice int32 `json:"nice,omitempty"` -+ -+ // Priority represents the static priority of the process. -+ Priority int32 `json:"priority,omitempty"` -+ -+ // Flags is an array of scheduling flags. -+ Flags []LinuxSchedulerFlag `json:"flags,omitempty"` -+ -+ // The following ones are used by the DEADLINE scheduler. -+ -+ // Runtime is the amount of time in nanoseconds during which the process -+ // is allowed to run in a given period. -+ Runtime uint64 `json:"runtime,omitempty"` -+ -+ // Deadline is the absolute deadline for the process to complete its execution. -+ Deadline uint64 `json:"deadline,omitempty"` -+ -+ // Period is the length of the period in nanoseconds used for determining the process runtime. -+ Period uint64 `json:"period,omitempty"` - } - - // Process contains information to start a specific application inside the container. -@@ -49,15 +81,21 @@ type Process struct { - // Capabilities are Linux capabilities that are kept for the process. - Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` - // Rlimits specifies rlimit options to apply to the process. -- Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` -+ Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris,zos"` - // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. - NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` - // ApparmorProfile specifies the apparmor profile for the container. - ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` - // Specify an oom_score_adj for the container. - OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` -+ // Scheduler specifies the scheduling attributes for a process -+ Scheduler *Scheduler `json:"scheduler,omitempty" platform:"linux"` - // SelinuxLabel specifies the selinux context that the container process is run as. - SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` -+ // IOPriority contains the I/O priority settings for the cgroup. -+ IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"` -+ // ExecCPUAffinity specifies CPU affinity for exec processes. -+ ExecCPUAffinity *CPUAffinity `json:"execCPUAffinity,omitempty" platform:"linux"` - } - - // LinuxCapabilities specifies the list of allowed capabilities that are kept for a process. -@@ -75,6 +113,28 @@ type LinuxCapabilities struct { - Ambient []string `json:"ambient,omitempty" platform:"linux"` - } - -+// IOPriority represents I/O priority settings for the container's processes within the process group. -+type LinuxIOPriority struct { -+ Class IOPriorityClass `json:"class"` -+ Priority int `json:"priority"` -+} -+ -+// IOPriorityClass represents an I/O scheduling class. -+type IOPriorityClass string -+ -+// Possible values for IOPriorityClass. -+const ( -+ IOPRIO_CLASS_RT IOPriorityClass = "IOPRIO_CLASS_RT" -+ IOPRIO_CLASS_BE IOPriorityClass = "IOPRIO_CLASS_BE" -+ IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE" -+) -+ -+// CPUAffinity specifies process' CPU affinity. -+type CPUAffinity struct { -+ Initial string `json:"initial,omitempty"` -+ Final string `json:"final,omitempty"` -+} -+ - // Box specifies dimensions of a rectangle. Used for specifying the size of a console. - type Box struct { - // Height is the vertical dimension of a box. -@@ -86,11 +146,11 @@ type Box struct { - // User specifies specific user (and group) information for the container process. - type User struct { - // UID is the user id. -- UID uint32 `json:"uid" platform:"linux,solaris"` -+ UID uint32 `json:"uid" platform:"linux,solaris,zos"` - // GID is the group id. -- GID uint32 `json:"gid" platform:"linux,solaris"` -+ GID uint32 `json:"gid" platform:"linux,solaris,zos"` - // Umask is the umask for the init process. -- Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris"` -+ Umask *uint32 `json:"umask,omitempty" platform:"linux,solaris,zos"` - // AdditionalGids are additional group ids set for the container's process. - AdditionalGids []uint32 `json:"additionalGids,omitempty" platform:"linux,solaris"` - // Username is the user name. -@@ -110,11 +170,16 @@ type Mount struct { - // Destination is the absolute path where the mount will be placed in the container. - Destination string `json:"destination"` - // Type specifies the mount kind. -- Type string `json:"type,omitempty" platform:"linux,solaris"` -+ Type string `json:"type,omitempty" platform:"linux,solaris,zos"` - // Source specifies the source path of the mount. - Source string `json:"source,omitempty"` - // Options are fstab style mount options. - Options []string `json:"options,omitempty"` -+ -+ // UID/GID mappings used for changing file owners w/o calling chown, fs should support it. -+ // Every mount point could have its own mapping. -+ UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty" platform:"linux"` -+ GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty" platform:"linux"` - } - - // Hook specifies a command that is run at a particular event in the lifecycle of a container -@@ -130,6 +195,10 @@ type Hook struct { - type Hooks struct { - // Prestart is Deprecated. Prestart is a list of hooks to be run before the container process is executed. - // It is called in the Runtime Namespace -+ // -+ // Deprecated: use [Hooks.CreateRuntime], [Hooks.CreateContainer], and -+ // [Hooks.StartContainer] instead, which allow more granular hook control -+ // during the create and start phase. - Prestart []Hook `json:"prestart,omitempty"` - // CreateRuntime is a list of hooks to be run after the container has been created but before pivot_root or any equivalent operation has been called - // It is called in the Runtime Namespace -@@ -178,10 +247,12 @@ type Linux struct { - // MountLabel specifies the selinux context for the mounts in the container. - MountLabel string `json:"mountLabel,omitempty"` - // IntelRdt contains Intel Resource Director Technology (RDT) information for -- // handling resource constraints (e.g., L3 cache, memory bandwidth) for the container -+ // handling resource constraints and monitoring metrics (e.g., L3 cache, memory bandwidth) for the container - IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` - // Personality contains configuration for the Linux personality syscall - Personality *LinuxPersonality `json:"personality,omitempty"` -+ // TimeOffsets specifies the offset for supporting time namespaces. -+ TimeOffsets map[string]LinuxTimeOffset `json:"timeOffsets,omitempty"` - } - - // LinuxNamespace is the configuration for a Linux namespace -@@ -211,6 +282,8 @@ const ( - UserNamespace LinuxNamespaceType = "user" - // CgroupNamespace for isolating cgroup hierarchies - CgroupNamespace LinuxNamespaceType = "cgroup" -+ // TimeNamespace for isolating the clocks -+ TimeNamespace LinuxNamespaceType = "time" - ) - - // LinuxIDMapping specifies UID/GID mappings -@@ -223,6 +296,14 @@ type LinuxIDMapping struct { - Size uint32 `json:"size"` - } - -+// LinuxTimeOffset specifies the offset for Time Namespace -+type LinuxTimeOffset struct { -+ // Secs is the offset of clock (in secs) in the container -+ Secs int64 `json:"secs,omitempty"` -+ // Nanosecs is the additional offset for Secs (in nanosecs) -+ Nanosecs uint32 `json:"nanosecs,omitempty"` -+} -+ - // POSIXRlimit type and restrictions - type POSIXRlimit struct { - // Type of the rlimit to set -@@ -233,12 +314,13 @@ type POSIXRlimit struct { - Soft uint64 `json:"soft"` - } - --// LinuxHugepageLimit structure corresponds to limiting kernel hugepages -+// LinuxHugepageLimit structure corresponds to limiting kernel hugepages. -+// Default to reservation limits if supported. Otherwise fallback to page fault limits. - type LinuxHugepageLimit struct { -- // Pagesize is the hugepage size -- // Format: "B' (e.g. 64KB, 2MB, 1GB, etc.) -+ // Pagesize is the hugepage size. -+ // Format: "B' (e.g. 64KB, 2MB, 1GB, etc.). - Pagesize string `json:"pageSize"` -- // Limit is the limit of "hugepagesize" hugetlb usage -+ // Limit is the limit of "hugepagesize" hugetlb reservations (if supported) or usage. - Limit uint64 `json:"limit"` - } - -@@ -250,8 +332,8 @@ type LinuxInterfacePriority struct { - Priority uint32 `json:"priority"` - } - --// linuxBlockIODevice holds major:minor format supported in blkio cgroup --type linuxBlockIODevice struct { -+// LinuxBlockIODevice holds major:minor format supported in blkio cgroup -+type LinuxBlockIODevice struct { - // Major is the device's major number. - Major int64 `json:"major"` - // Minor is the device's minor number. -@@ -260,7 +342,7 @@ type linuxBlockIODevice struct { - - // LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice - type LinuxWeightDevice struct { -- linuxBlockIODevice -+ LinuxBlockIODevice - // Weight is the bandwidth rate for the device. - Weight *uint16 `json:"weight,omitempty"` - // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only -@@ -269,7 +351,7 @@ type LinuxWeightDevice struct { - - // LinuxThrottleDevice struct holds a `major:minor rate_per_second` pair - type LinuxThrottleDevice struct { -- linuxBlockIODevice -+ LinuxBlockIODevice - // Rate is the IO rate limit per cgroup per device - Rate uint64 `json:"rate"` - } -@@ -301,6 +383,12 @@ type LinuxMemory struct { - // Total memory limit (memory + swap). - Swap *int64 `json:"swap,omitempty"` - // Kernel memory limit (in bytes). -+ // -+ // Deprecated: kernel-memory limits are not supported in cgroups v2, and -+ // were obsoleted in [kernel v5.4]. This field should no longer be used, -+ // as it may be ignored by runtimes. -+ // -+ // [kernel v5.4]: https://github.com/torvalds/linux/commit/0158115f702b0ba208ab0 - Kernel *int64 `json:"kernel,omitempty"` - // Kernel memory limit for tcp (in bytes) - KernelTCP *int64 `json:"kernelTCP,omitempty"` -@@ -310,6 +398,10 @@ type LinuxMemory struct { - DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"` - // Enables hierarchical memory accounting - UseHierarchy *bool `json:"useHierarchy,omitempty"` -+ // CheckBeforeUpdate enables checking if a new memory limit is lower -+ // than the current usage during update, and if so, rejecting the new -+ // limit. -+ CheckBeforeUpdate *bool `json:"checkBeforeUpdate,omitempty"` - } - - // LinuxCPU for Linux cgroup 'cpu' resource management -@@ -318,6 +410,9 @@ type LinuxCPU struct { - Shares *uint64 `json:"shares,omitempty"` - // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - Quota *int64 `json:"quota,omitempty"` -+ // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a -+ // given period. -+ Burst *uint64 `json:"burst,omitempty"` - // CPU period to be used for hardcapping (in usecs). - Period *uint64 `json:"period,omitempty"` - // How much time realtime scheduling may use (in usecs). -@@ -328,6 +423,8 @@ type LinuxCPU struct { - Cpus string `json:"cpus,omitempty"` - // List of memory nodes in the cpuset. Default is to use any available memory node. - Mems string `json:"mems,omitempty"` -+ // cgroups are configured with minimum weight, 0: default behavior, 1: SCHED_IDLE. -+ Idle *int64 `json:"idle,omitempty"` - } - - // LinuxPids for Linux cgroup 'pids' resource management (Linux 4.3) -@@ -364,7 +461,7 @@ type LinuxResources struct { - Pids *LinuxPids `json:"pids,omitempty"` - // BlockIO restriction configuration - BlockIO *LinuxBlockIO `json:"blockIO,omitempty"` -- // Hugetlb limit (in bytes) -+ // Hugetlb limits (in bytes). Default to reservation limits if supported. - HugepageLimits []LinuxHugepageLimit `json:"hugepageLimits,omitempty"` - // Network restriction configuration - Network *LinuxNetwork `json:"network,omitempty"` -@@ -522,11 +619,21 @@ type WindowsMemoryResources struct { - - // WindowsCPUResources contains CPU resource management settings. - type WindowsCPUResources struct { -- // Number of CPUs available to the container. -+ // Count is the number of CPUs available to the container. It represents the -+ // fraction of the configured processor `count` in a container in relation -+ // to the processors available in the host. The fraction ultimately -+ // determines the portion of processor cycles that the threads in a -+ // container can use during each scheduling interval, as the number of -+ // cycles per 10,000 cycles. - Count *uint64 `json:"count,omitempty"` -- // CPU shares (relative weight to other containers with cpu shares). -+ // Shares limits the share of processor time given to the container relative -+ // to other workloads on the processor. The processor `shares` (`weight` at -+ // the platform level) is a value between 0 and 10000. - Shares *uint16 `json:"shares,omitempty"` -- // Specifies the portion of processor cycles that this container can use as a percentage times 100. -+ // Maximum determines the portion of processor cycles that the threads in a -+ // container can use during each scheduling interval, as the number of -+ // cycles per 10,000 cycles. Set processor `maximum` to a percentage times -+ // 100. - Maximum *uint16 `json:"maximum,omitempty"` - } - -@@ -613,6 +720,23 @@ type Arch string - // LinuxSeccompFlag is a flag to pass to seccomp(2). - type LinuxSeccompFlag string - -+const ( -+ // LinuxSeccompFlagLog is a seccomp flag to request all returned -+ // actions except SECCOMP_RET_ALLOW to be logged. An administrator may -+ // override this filter flag by preventing specific actions from being -+ // logged via the /proc/sys/kernel/seccomp/actions_logged file. (since -+ // Linux 4.14) -+ LinuxSeccompFlagLog LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_LOG" -+ -+ // LinuxSeccompFlagSpecAllow can be used to disable Speculative Store -+ // Bypass mitigation. (since Linux 4.17) -+ LinuxSeccompFlagSpecAllow LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_SPEC_ALLOW" -+ -+ // LinuxSeccompFlagWaitKillableRecv can be used to switch to the wait -+ // killable semantics. (since Linux 5.19) -+ LinuxSeccompFlagWaitKillableRecv LinuxSeccompFlag = "SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV" -+) -+ - // Additional architectures permitted to be used for system calls - // By default only the native architecture of the kernel is permitted - const ( -@@ -683,8 +807,9 @@ type LinuxSyscall struct { - Args []LinuxSeccompArg `json:"args,omitempty"` - } - --// LinuxIntelRdt has container runtime resource constraints for Intel RDT --// CAT and MBA features which introduced in Linux 4.10 and 4.12 kernel -+// LinuxIntelRdt has container runtime resource constraints for Intel RDT CAT and MBA -+// features and flags enabling Intel RDT CMT and MBM features. -+// Intel RDT features are available in Linux 4.14 and newer kernel versions. - type LinuxIntelRdt struct { - // The identity for RDT Class of Service - ClosID string `json:"closID,omitempty"` -@@ -697,4 +822,76 @@ type LinuxIntelRdt struct { - // The unit of memory bandwidth is specified in "percentages" by - // default, and in "MBps" if MBA Software Controller is enabled. - MemBwSchema string `json:"memBwSchema,omitempty"` -+ -+ // EnableCMT is the flag to indicate if the Intel RDT CMT is enabled. CMT (Cache Monitoring Technology) supports monitoring of -+ // the last-level cache (LLC) occupancy for the container. -+ EnableCMT bool `json:"enableCMT,omitempty"` -+ -+ // EnableMBM is the flag to indicate if the Intel RDT MBM is enabled. MBM (Memory Bandwidth Monitoring) supports monitoring of -+ // total and local memory bandwidth for the container. -+ EnableMBM bool `json:"enableMBM,omitempty"` -+} -+ -+// ZOS contains platform-specific configuration for z/OS based containers. -+type ZOS struct { -+ // Devices are a list of device nodes that are created for the container -+ Devices []ZOSDevice `json:"devices,omitempty"` -+} -+ -+// ZOSDevice represents the mknod information for a z/OS special device file -+type ZOSDevice struct { -+ // Path to the device. -+ Path string `json:"path"` -+ // Device type, block, char, etc. -+ Type string `json:"type"` -+ // Major is the device's major number. -+ Major int64 `json:"major"` -+ // Minor is the device's minor number. -+ Minor int64 `json:"minor"` -+ // FileMode permission bits for the device. -+ FileMode *os.FileMode `json:"fileMode,omitempty"` -+ // UID of the device. -+ UID *uint32 `json:"uid,omitempty"` -+ // Gid of the device. -+ GID *uint32 `json:"gid,omitempty"` - } -+ -+// LinuxSchedulerPolicy represents different scheduling policies used with the Linux Scheduler -+type LinuxSchedulerPolicy string -+ -+const ( -+ // SchedOther is the default scheduling policy -+ SchedOther LinuxSchedulerPolicy = "SCHED_OTHER" -+ // SchedFIFO is the First-In-First-Out scheduling policy -+ SchedFIFO LinuxSchedulerPolicy = "SCHED_FIFO" -+ // SchedRR is the Round-Robin scheduling policy -+ SchedRR LinuxSchedulerPolicy = "SCHED_RR" -+ // SchedBatch is the Batch scheduling policy -+ SchedBatch LinuxSchedulerPolicy = "SCHED_BATCH" -+ // SchedISO is the Isolation scheduling policy -+ SchedISO LinuxSchedulerPolicy = "SCHED_ISO" -+ // SchedIdle is the Idle scheduling policy -+ SchedIdle LinuxSchedulerPolicy = "SCHED_IDLE" -+ // SchedDeadline is the Deadline scheduling policy -+ SchedDeadline LinuxSchedulerPolicy = "SCHED_DEADLINE" -+) -+ -+// LinuxSchedulerFlag represents the flags used by the Linux Scheduler. -+type LinuxSchedulerFlag string -+ -+const ( -+ // SchedFlagResetOnFork represents the reset on fork scheduling flag -+ SchedFlagResetOnFork LinuxSchedulerFlag = "SCHED_FLAG_RESET_ON_FORK" -+ // SchedFlagReclaim represents the reclaim scheduling flag -+ SchedFlagReclaim LinuxSchedulerFlag = "SCHED_FLAG_RECLAIM" -+ // SchedFlagDLOverrun represents the deadline overrun scheduling flag -+ SchedFlagDLOverrun LinuxSchedulerFlag = "SCHED_FLAG_DL_OVERRUN" -+ // SchedFlagKeepPolicy represents the keep policy scheduling flag -+ SchedFlagKeepPolicy LinuxSchedulerFlag = "SCHED_FLAG_KEEP_POLICY" -+ // SchedFlagKeepParams represents the keep parameters scheduling flag -+ SchedFlagKeepParams LinuxSchedulerFlag = "SCHED_FLAG_KEEP_PARAMS" -+ // SchedFlagUtilClampMin represents the utilization clamp minimum scheduling flag -+ SchedFlagUtilClampMin LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MIN" -+ // SchedFlagUtilClampMin represents the utilization clamp maximum scheduling flag -+ SchedFlagUtilClampMax LinuxSchedulerFlag = "SCHED_FLAG_UTIL_CLAMP_MAX" -+) -diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go -index 596af0c2..f6c15f6c 100644 ---- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go -+++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go -@@ -6,12 +6,12 @@ const ( - // VersionMajor is for an API incompatible changes - VersionMajor = 1 - // VersionMinor is for functionality in a backwards-compatible manner -- VersionMinor = 0 -+ VersionMinor = 2 - // VersionPatch is for backwards-compatible bug fixes -- VersionPatch = 2 -+ VersionPatch = 0 - - // VersionDev indicates development branch. Releases will be empty string. -- VersionDev = "-dev" -+ VersionDev = "+dev" - ) - - // Version is the specification version that the package types support. -diff --git a/vendor/modules.txt b/vendor/modules.txt -index a5537dfe..40089cd4 100644 ---- a/vendor/modules.txt -+++ b/vendor/modules.txt -@@ -35,7 +35,7 @@ github.com/moby/sys/mountinfo - # github.com/mrunalp/fileutils v0.5.1 - ## explicit; go 1.13 - github.com/mrunalp/fileutils --# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 -+# github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 - ## explicit - github.com/opencontainers/runtime-spec/specs-go - # github.com/opencontainers/selinux v1.10.0 --- -2.47.1 - diff --git a/SOURCES/0001-1.2-openat2-improve-resilience-on-busy-systems.patch b/SOURCES/0001-1.2-openat2-improve-resilience-on-busy-systems.patch new file mode 100644 index 0000000..ce491cf --- /dev/null +++ b/SOURCES/0001-1.2-openat2-improve-resilience-on-busy-systems.patch @@ -0,0 +1,416 @@ +From 4ad5d01eeda006ba9ae067cbf999a77fe096fe00 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Sat, 1 Nov 2025 17:21:36 +1100 +Subject: [PATCH 1/2] [1.2] openat2: improve resilience on busy systems + +Previously, we would see a ~3% failure rate when starting containers +with mounts that contain ".." (which can trigger -EAGAIN). To counteract +this, filepath-securejoin v0.5.1 includes a bump of the internal retry +limit from 32 to 128, which lowers the failure rate to 0.12%. + +However, there is still a risk of spurious failure on regular systems. +In order to try to provide more resilience (while avoiding DoS attacks), +this patch also includes an additional retry loop that terminates based +on a deadline rather than retry count. The deadline is 2ms, as my +testing found that ~800us for a single pathrs operation was the longest +latency due to -EAGAIN retries, and that was an outlier compared to the +more common ~400us latencies -- so 2ms should be more than enough for +any real system. + +The failure rates above were based on more 50k runs of runc with an +attack script (from libpathrs) running a rename attack on all cores of a +16-core system, which is arguably a worst-case but heavily utilised +servers could likely approach similar results. + +Signed-off-by: Aleksa Sarai +Signed-off-by: Kir Kolyshkin +--- + go.mod | 2 +- + go.sum | 4 +- + internal/pathrs/mkdirall_pathrslite.go | 4 +- + internal/pathrs/procfs_pathrslite.go | 22 ++++--- + internal/pathrs/retry.go | 66 +++++++++++++++++++ + internal/pathrs/root_pathrslite.go | 7 +- + .../cyphar/filepath-securejoin/CHANGELOG.md | 34 +++++++++- + .../cyphar/filepath-securejoin/VERSION | 2 +- + .../internal/{errors.go => errors_linux.go} | 15 ++++- + .../pathrs-lite/internal/fd/openat2_linux.go | 12 ++-- + vendor/modules.txt | 2 +- + 11 files changed, 144 insertions(+), 26 deletions(-) + create mode 100644 internal/pathrs/retry.go + rename vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/{errors.go => errors_linux.go} (70%) + +diff --git a/go.mod b/go.mod +index 5f00a576..90fa2e5b 100644 +--- a/go.mod ++++ b/go.mod +@@ -12,7 +12,7 @@ require ( + github.com/cilium/ebpf v0.16.0 + github.com/containerd/console v1.0.5 + github.com/coreos/go-systemd/v22 v22.5.0 +- github.com/cyphar/filepath-securejoin v0.5.0 ++ github.com/cyphar/filepath-securejoin v0.5.1 + github.com/docker/go-units v0.5.0 + github.com/godbus/dbus/v5 v5.1.0 + github.com/moby/sys/mountinfo v0.7.1 +diff --git a/go.sum b/go.sum +index 1f930ce4..049597b6 100644 +--- a/go.sum ++++ b/go.sum +@@ -9,8 +9,8 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8 + github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= + github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= + github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +-github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= +-github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= ++github.com/cyphar/filepath-securejoin v0.5.1 h1:eYgfMq5yryL4fbWfkLpFFy2ukSELzaJOTaUTuh+oF48= ++github.com/cyphar/filepath-securejoin v0.5.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= + github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +diff --git a/internal/pathrs/mkdirall_pathrslite.go b/internal/pathrs/mkdirall_pathrslite.go +index fb4f7842..a9a0157c 100644 +--- a/internal/pathrs/mkdirall_pathrslite.go ++++ b/internal/pathrs/mkdirall_pathrslite.go +@@ -83,7 +83,9 @@ func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (*os.File, er + } + defer rootDir.Close() + +- return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) ++ return retryEAGAIN(func() (*os.File, error) { ++ return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) ++ }) + } + + // MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +diff --git a/internal/pathrs/procfs_pathrslite.go b/internal/pathrs/procfs_pathrslite.go +index a02b0d39..37450a0e 100644 +--- a/internal/pathrs/procfs_pathrslite.go ++++ b/internal/pathrs/procfs_pathrslite.go +@@ -27,13 +27,15 @@ import ( + ) + + func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath string, flags int) (*os.File, error) { +- handle, err := openFn(subpath) ++ handle, err := retryEAGAIN(func() (*os.File, error) { ++ return openFn(subpath) ++ }) + if err != nil { + return nil, err + } + defer handle.Close() + +- f, err := pathrs.Reopen(handle, flags) ++ f, err := Reopen(handle, flags) + if err != nil { + return nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } +@@ -44,7 +46,7 @@ func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath strin + // [pathrs.Reopen], to let you one-shot open a procfs file with the given + // flags. + func ProcSelfOpen(subpath string, flags int) (*os.File, error) { +- proc, err := procfs.OpenProcRoot() ++ proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } +@@ -55,7 +57,7 @@ func ProcSelfOpen(subpath string, flags int) (*os.File, error) { + // ProcPidOpen is a wrapper around [procfs.Handle.OpenPid] and [pathrs.Reopen], + // to let you one-shot open a procfs file with the given flags. + func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { +- proc, err := procfs.OpenProcRoot() ++ proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, err + } +@@ -70,13 +72,15 @@ func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { + // flags. The returned [procfs.ProcThreadSelfCloser] needs the same handling as + // when using pathrs-lite. + func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThreadSelfCloser, Err error) { +- proc, err := procfs.OpenProcRoot() ++ proc, err := retryEAGAIN(procfs.OpenProcRoot) + if err != nil { + return nil, nil, err + } + defer proc.Close() + +- handle, closer, err := proc.OpenThreadSelf(subpath) ++ handle, closer, err := retryEAGAIN2(func() (*os.File, procfs.ProcThreadSelfCloser, error) { ++ return proc.OpenThreadSelf(subpath) ++ }) + if err != nil { + return nil, nil, err + } +@@ -89,7 +93,7 @@ func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThr + } + defer handle.Close() + +- f, err := pathrs.Reopen(handle, flags) ++ f, err := Reopen(handle, flags) + if err != nil { + return nil, nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) + } +@@ -98,5 +102,7 @@ func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThr + + // Reopen is a wrapper around pathrs.Reopen. + func Reopen(file *os.File, flags int) (*os.File, error) { +- return pathrs.Reopen(file, flags) ++ return retryEAGAIN(func() (*os.File, error) { ++ return pathrs.Reopen(file, flags) ++ }) + } +diff --git a/internal/pathrs/retry.go b/internal/pathrs/retry.go +new file mode 100644 +index 00000000..a51d335c +--- /dev/null ++++ b/internal/pathrs/retry.go +@@ -0,0 +1,66 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import ( ++ "errors" ++ "fmt" ++ "time" ++ ++ "golang.org/x/sys/unix" ++) ++ ++// Based on >50k tests running "runc run" on a 16-core system with very heavy ++// rename(2) load, the single longest latency caused by -EAGAIN retries was ++// ~800us (with the vast majority being closer to 400us). So, a 2ms limit ++// should give more than enough headroom for any real system in practice. ++const retryDeadline = 2 * time.Millisecond ++ ++// retryEAGAIN is a top-level retry loop for pathrs to try to returning ++// spurious errors in most normal user cases when using openat2 (libpathrs ++// itself does up to 128 retries already, but this method takes a ++// wallclock-deadline approach to simply retry until a timer elapses). ++func retryEAGAIN[T any](fn func() (T, error)) (T, error) { ++ deadline := time.After(retryDeadline) ++ for { ++ v, err := fn() ++ if !errors.Is(err, unix.EAGAIN) { ++ return v, err ++ } ++ select { ++ case <-deadline: ++ return *new(T), fmt.Errorf("%v retry deadline exceeded: %w", retryDeadline, err) ++ default: ++ // retry ++ } ++ } ++} ++ ++// retryEAGAIN2 is like retryEAGAIN except it returns two values. ++func retryEAGAIN2[T1, T2 any](fn func() (T1, T2, error)) (T1, T2, error) { ++ type ret struct { ++ v1 T1 ++ v2 T2 ++ } ++ v, err := retryEAGAIN(func() (ret, error) { ++ v1, v2, err := fn() ++ return ret{v1: v1, v2: v2}, err ++ }) ++ return v.v1, v.v2, err ++} +diff --git a/internal/pathrs/root_pathrslite.go b/internal/pathrs/root_pathrslite.go +index 0ef81fae..899af270 100644 +--- a/internal/pathrs/root_pathrslite.go ++++ b/internal/pathrs/root_pathrslite.go +@@ -31,12 +31,15 @@ import ( + // is effectively shorthand for [securejoin.OpenInRoot] followed by + // [securejoin.Reopen]. + func OpenInRoot(root, subpath string, flags int) (*os.File, error) { +- handle, err := pathrs.OpenInRoot(root, subpath) ++ handle, err := retryEAGAIN(func() (*os.File, error) { ++ return pathrs.OpenInRoot(root, subpath) ++ }) + if err != nil { + return nil, err + } + defer handle.Close() +- return pathrs.Reopen(handle, flags) ++ ++ return Reopen(handle, flags) + } + + // CreateInRoot creates a new file inside a root (as well as any missing parent +diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +index 6862467c..3faee0bc 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md ++++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +@@ -4,7 +4,36 @@ All notable changes to this project will be documented in this file. + The format is based on [Keep a Changelog](http://keepachangelog.com/) + and this project adheres to [Semantic Versioning](http://semver.org/). + +-## [Unreleased] ## ++## [Unreleased 0.5.z] ## ++ ++## [0.5.1] - 2025-10-31 ## ++ ++> Spooky scary skeletons send shivers down your spine! ++ ++### Changed ### ++- `openat2` can return `-EAGAIN` if it detects a possible attack in certain ++ scenarios (namely if there was a rename or mount while walking a path with a ++ `..` component). While this is necessary to avoid a denial-of-service in the ++ kernel, it does require retry loops in userspace. ++ ++ In previous versions, `pathrs-lite` would retry `openat2` 32 times before ++ returning an error, but we've received user reports that this limit can be ++ hit on systems with very heavy load. In some synthetic benchmarks (testing ++ the worst-case of an attacker doing renames in a tight loop on every core of ++ a 16-core machine) we managed to get a ~3% failure rate in runc. We have ++ improved this situation in two ways: ++ ++ * We have now increased this limit to 128, which should be good enough for ++ most use-cases without becoming a denial-of-service vector (the number of ++ syscalls called by the `O_PATH` resolver in a typical case is within the ++ same ballpark). The same benchmarks show a failure rate of ~0.12% which ++ (while not zero) is probably sufficient for most users. ++ ++ * In addition, we now return a `unix.EAGAIN` error that is bubbled up and can ++ be detected by callers. This means that callers with stricter requirements ++ to avoid spurious errors can choose to do their own infinite `EAGAIN` retry ++ loop (though we would strongly recommend users use time-based deadlines in ++ such retry loops to avoid potentially unbounded denials-of-service). + + ## [0.5.0] - 2025-09-26 ## + +@@ -354,7 +383,8 @@ This is our first release of `github.com/cyphar/filepath-securejoin`, + containing a full implementation with a coverage of 93.5% (the only missing + cases are the error cases, which are hard to mocktest at the moment). + +-[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...HEAD ++[Unreleased 0.5.z]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.1...release-0.5 ++[0.5.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...v0.5.1 + [0.5.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...v0.5.0 + [0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1 + [0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION +index 8f0916f7..4b9fcbec 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/VERSION ++++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION +@@ -1 +1 @@ +-0.5.0 ++0.5.1 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go +similarity index 70% +rename from vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go +index c26e440e..d0b200f4 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go +@@ -1,5 +1,7 @@ + // SPDX-License-Identifier: MPL-2.0 + ++//go:build linux ++ + // Copyright (C) 2024-2025 Aleksa Sarai + // Copyright (C) 2024-2025 SUSE LLC + // +@@ -12,15 +14,24 @@ package internal + + import ( + "errors" ++ ++ "golang.org/x/sys/unix" + ) + ++type xdevErrorish struct { ++ description string ++} ++ ++func (err xdevErrorish) Error() string { return err.description } ++func (err xdevErrorish) Is(target error) bool { return target == unix.EXDEV } ++ + var ( + // ErrPossibleAttack indicates that some attack was detected. +- ErrPossibleAttack = errors.New("possible attack detected") ++ ErrPossibleAttack error = xdevErrorish{"possible attack detected"} + + // ErrPossibleBreakout indicates that during an operation we ended up in a + // state that could be a breakout but we detected it. +- ErrPossibleBreakout = errors.New("possible breakout detected") ++ ErrPossibleBreakout error = xdevErrorish{"possible breakout detected"} + + // ErrInvalidDirectory indicates an unlinked directory. + ErrInvalidDirectory = errors.New("wandered into deleted directory") +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go +index 23053083..3e937fe3 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go +@@ -17,8 +17,6 @@ import ( + "runtime" + + "golang.org/x/sys/unix" +- +- "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" + ) + + func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { +@@ -34,7 +32,10 @@ func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { + (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) + } + +-const scopedLookupMaxRetries = 32 ++// This is a fairly arbitrary limit we have just to avoid an attacker being ++// able to make us spin in an infinite retry loop -- callers can choose to ++// retry on EAGAIN if they prefer. ++const scopedLookupMaxRetries = 128 + + // Openat2 is an [Fd]-based wrapper around unix.Openat2, but with some retry + // logic in case of EAGAIN errors. +@@ -43,10 +44,10 @@ func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { + // Make sure we always set O_CLOEXEC. + how.Flags |= unix.O_CLOEXEC + var tries int +- for tries < scopedLookupMaxRetries { ++ for { + fd, err := unix.Openat2(dirFd, path, how) + if err != nil { +- if scopedLookupShouldRetry(how, err) { ++ if scopedLookupShouldRetry(how, err) && tries < scopedLookupMaxRetries { + // We retry a couple of times to avoid the spurious errors, and + // if we are being attacked then returning -EAGAIN is the best + // we can do. +@@ -58,5 +59,4 @@ func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil + } +- return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: internal.ErrPossibleAttack} + } +diff --git a/vendor/modules.txt b/vendor/modules.txt +index 4e7e0ef8..64524598 100644 +--- a/vendor/modules.txt ++++ b/vendor/modules.txt +@@ -25,7 +25,7 @@ github.com/coreos/go-systemd/v22/dbus + # github.com/cpuguy83/go-md2man/v2 v2.0.2 + ## explicit; go 1.11 + github.com/cpuguy83/go-md2man/v2/md2man +-# github.com/cyphar/filepath-securejoin v0.5.0 ++# github.com/cyphar/filepath-securejoin v0.5.1 + ## explicit; go 1.18 + github.com/cyphar/filepath-securejoin + github.com/cyphar/filepath-securejoin/internal/consts +-- +2.51.1 + diff --git a/SOURCES/0001-1.2-rootfs-only-set-mode-for-tmpfs-mount-if-target-alrea.patch b/SOURCES/0001-1.2-rootfs-only-set-mode-for-tmpfs-mount-if-target-alrea.patch new file mode 100644 index 0000000..fe1da7d --- /dev/null +++ b/SOURCES/0001-1.2-rootfs-only-set-mode-for-tmpfs-mount-if-target-alrea.patch @@ -0,0 +1,161 @@ +From c8588560cdebd80e9d1823a4a8e39172ee4650bb Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Fri, 7 Nov 2025 14:52:09 +1100 +Subject: [PATCH] rootfs: only set mode= for tmpfs mount if target already + existed + +This was always the intended behaviour but commit 72fbb34f5006 ("rootfs: +switch to fd-based handling of mountpoint targets") regressed it when +adding a mechanism to create a file handle to the target if it didn't +already exist (causing the later stat to always succeed). + +A lot of people depend on this functionality, so add some tests to make +sure we don't break it in the future. + +Fixes: 72fbb34f5006 ("rootfs: switch to fd-based handling of mountpoint targets") +Signed-off-by: Aleksa Sarai +(cherry picked from commit 9a9719eeb4978e73c64740b3fc796c1b12987b05) +Signed-off-by: Aleksa Sarai +--- + libcontainer/rootfs_linux.go | 25 ++++++----- + tests/integration/mounts.bats | 81 +++++++++++++++++++++++++++++++++++ + 2 files changed, 93 insertions(+), 13 deletions(-) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index 204e6a80..ab5a260d 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -511,6 +511,18 @@ func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { + _ = dstFile.Close() + } + }() ++ if err == nil && m.Device == "tmpfs" { ++ // If the original target exists, copy the mode for the tmpfs mount. ++ stat, err := dstFile.Stat() ++ if err != nil { ++ return fmt.Errorf("check tmpfs source mode: %w", err) ++ } ++ dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) ++ if m.Data != "" { ++ dt = dt + "," + m.Data ++ } ++ m.Data = dt ++ } + if err != nil { + if !errors.Is(err, unix.ENOENT) { + return fmt.Errorf("lookup mountpoint target: %w", err) +@@ -551,19 +563,6 @@ func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { + } + } + +- if m.Device == "tmpfs" { +- // If the original target exists, copy the mode for the tmpfs mount. +- stat, err := dstFile.Stat() +- if err != nil { +- return fmt.Errorf("check tmpfs source mode: %w", err) +- } +- dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) +- if m.Data != "" { +- dt = dt + "," + m.Data +- } +- m.Data = dt +- } +- + dstFullPath, err := procfs.ProcSelfFdReadlink(dstFile) + if err != nil { + return fmt.Errorf("get mount destination real path: %w", err) +diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats +index 11fb2cfc..b60c88ae 100644 +--- a/tests/integration/mounts.bats ++++ b/tests/integration/mounts.bats +@@ -234,6 +234,87 @@ function test_mount_order() { + [[ "$(stat -c %a rootfs/setgid/a/b/c)" == 2755 ]] + } + ++# https://github.com/opencontainers/runc/issues/4971 ++@test "runc run [tmpfs mount mode= inherit]" { ++ mkdir rootfs/tmpfs ++ chmod "=0710" rootfs/tmpfs ++ ++ update_config '.mounts += [{ ++ type: "tmpfs", ++ source: "tmpfs", ++ destination: "/tmpfs", ++ options: ["rw", "nodev", "nosuid"] ++ }]' ++ update_config '.process.args = ["stat", "-c", "%a", "/tmpfs"]' ++ ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ [[ "$output" == "710" ]] ++ ++ update_config '.process.args = ["cat", "/proc/self/mounts"]' ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ grep -Ex "tmpfs /tmpfs tmpfs [^ ]*\bmode=710\b[^ ]* .*" <<<"$output" ++} ++ ++# https://github.com/opencontainers/runc/issues/4971 ++@test "runc run [tmpfs mount explicit mode=]" { ++ mkdir rootfs/tmpfs ++ chmod "=0710" rootfs/tmpfs ++ ++ update_config '.mounts += [{ ++ type: "tmpfs", ++ source: "tmpfs", ++ destination: "/tmpfs", ++ options: ["rw", "nodev", "nosuid", "mode=1500"] ++ }]' ++ update_config '.process.args = ["stat", "-c", "%a", "/tmpfs"]' ++ ++ # Explicitly setting mode= overrides whatever mode we would've inherited. ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ [[ "$output" == "1500" ]] ++ ++ update_config '.process.args = ["cat", "/proc/self/mounts"]' ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ grep -Ex "tmpfs /tmpfs tmpfs [^ ]*\bmode=1500\b[^ ]* .*" <<<"$output" ++ ++ # Verify that the actual directory was not chmod-ed. ++ [[ "$(stat -c %a rootfs/tmpfs)" == 710 ]] ++} ++ ++# https://github.com/opencontainers/runc/issues/4971 ++@test "runc run [tmpfs mount mode=1777 default]" { ++ update_config '.mounts += [{ ++ type: "tmpfs", ++ source: "tmpfs", ++ destination: "/non-existent/foo/bar/baz", ++ options: ["rw", "nodev", "nosuid"] ++ }]' ++ update_config '.process.args = ["stat", "-c", "%a", "/non-existent/foo/bar/baz"]' ++ ++ rm -rf rootfs/non-existent ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ [[ "$output" == "1777" ]] ++ ++ update_config '.process.args = ["cat", "/proc/self/mounts"]' ++ ++ rm -rf rootfs/non-existent ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ # We don't explicitly set a mode= in this case, it is just the tmpfs default. ++ grep -Ex "tmpfs /non-existent/foo/bar/baz tmpfs .*" <<<"$output" ++ run ! grep -Ex "tmpfs /non-existent/foo/bar/baz tmpfs [^ ]*\bmode=[0-7]+\b[^ ]* .*" <<<"$output" ++ ++ # Verify that the actual modes are *not* 1777. ++ [[ "$(stat -c %a rootfs/non-existent)" == 755 ]] ++ [[ "$(stat -c %a rootfs/non-existent/foo)" == 755 ]] ++ [[ "$(stat -c %a rootfs/non-existent/foo/bar)" == 755 ]] ++ [[ "$(stat -c %a rootfs/non-existent/foo/bar/baz)" == 755 ]] ++} ++ + @test "runc run [ro /sys/fs/cgroup mounts]" { + # Without cgroup namespace. + update_config '.linux.namespaces -= [{"type": "cgroup"}]' +-- +2.51.1 + diff --git a/SOURCES/0001-1.2.5-1.el9-CVEs-mega-patch.patch b/SOURCES/0001-1.2.5-1.el9-CVEs-mega-patch.patch new file mode 100644 index 0000000..28701ea --- /dev/null +++ b/SOURCES/0001-1.2.5-1.el9-CVEs-mega-patch.patch @@ -0,0 +1,13925 @@ +From b6cebe30cbb1d41a357087ec8fc6c01ac4e6d317 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Tue, 30 Sep 2025 23:04:02 +1000 +Subject: [PATCH] [1.2.5-1.el9] CVEs mega patch + +> This is a combination of 27 commits. +> This is the 1st commit message: + +internal: linux: add package doc-comment + +This is necessary for the pre-1.4 backports because internal/linux was +not present and the linters get angry when a new package without a doc +comment gets added. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #2: + +internal/sys: add VerifyInode helper + +This will be used for a few security patches in later patches in this +patchset. The need to verify what kind of inode we are operating on in a +race-free way turns out to be quite a common pattern... + +Signed-off-by: Aleksa Sarai + +> This is the commit message #3: + +internal: move utils.MkdirAllInRoot to internal/pathrs + +We will have more wrappers around filepath-securejoin, and so move them +to their own specific package so that we can eventually use libpathrs +fairly cleanly (by swapping out the implementation). + +Signed-off-by: Aleksa Sarai + +> This is the commit message #4: + +*: switch to safer securejoin.Reopen + +filepath-securejoin v0.3 gave us a much safer re-open primitive, we +should use it to avoid any theoretical attacks. Rather than using it +direcly, add a small pathrs wrapper to make libpathrs migrations in the +future easier... + +Signed-off-by: Aleksa Sarai + +> This is the commit message #5: + +libct: add/use isDevNull, verifyDevNull + +The /dev/null in a container should not be trusted, because when /dev +is a bind mount, /dev/null is not created by runc itself. + +1. Add isDevNull which checks the fd minor/major and device type, + and verifyDevNull which does the stat and the check. + +2. Rewrite maskPath to open and check /dev/null, and use its fd to + perform mounts. Move the loop over the MaskPaths into the function, + and rename it to maskPaths. + +3. reOpenDevNull: use verifyDevNull and isDevNull. + +4. fixStdioPermissions: use isDevNull instead of stat. + +Fixes: GHSA-9493-h29p-rfm2 CVE-2025-31133 +Co-authored-by: Rodrigo Campos +Signed-off-by: Kir Kolyshkin +Signed-off-by: Aleksa Sarai + +> This is the commit message #6: + +libct: maskPaths: only ignore ENOENT on mount dest + +When mounting a path being masked, the /dev/null might disappear from +under us, and mount (even on an opened /dev/null file descriptor) will +return ENOENT, which we deliberately ignore, as there's no need to mask +non-existent paths. + +Let's open the destination path and ignore ENOENT during open, then +mount via the destination file descriptor, not ignoring ENOENT. + +Reported-by: lifubang +Signed-off-by: Kir Kolyshkin + +> This is the commit message #7: + +libct: maskPaths: don't rely on ENOTDIR for mount + +Currently, we rely on mount returning ENOTDIR when the destination is a +directory (and so mount tells us that the source is not), and fall back +to read-only tmpfs bind mount for such cases. + +Theoretically, ENOTDIR can also be returned in some other cases, +resulting in the wrong type of mount being used. + +Let's be more straightforward here -- call fstat on destination file +descriptor, and use the proper mount depending on whether it is a +directory. + +Reported-by: Rodrigo Campos +Signed-off-by: Kir Kolyshkin + +> This is the commit message #8: + +console: use TIOCGPTPEER when allocating peer PTY + +When opening the peer end of a pty, the old kernel API required us to +open /dev/pts/$num inside the container (at least since we fixed console +handling many years ago in commit 244c9fc426ae ("*: console rewrite")). + +The problem is that in a hostile container it is possible for +/dev/pts/$num to be an attacker-controlled symlink that runc can be +tricked into resolving when doing bind-mounts. This allows the attacker +to (among other things) persist /proc/... entries that are later masked +by runc, allowing an attacker to escape through the kernel.core_pattern +sysctl (/proc/sys/kernel/core_pattern). This is the original issue +reported by Lei Wang and Li Fu Bang in CVE-2025-52565. + +However, it should be noted that this is not entirely a newly-discovered +problem. Way back in Linux 4.13 (2017), I added the TIOCGPTPEER ioctl, +which allows us to get a pty peer without touching the /dev/pts inside +the container. The original threat model was around an attacker +replacing /dev/pts/$n or /dev/pts/ptmx with some malicious inode (a DoS +inode, or possibly a PTY they wanted a confused deputy to operate on). +Unfortunately, there was no practical way for runc to cache a safe +O_PATH handle to /dev/pts/ptmx (unlike other runtimes like LXC, which +switched to TIOCGPTPEER way back in 2017). Since it wasn't clear how we +could protect against the main attack TIOCGPTPEER was meant to protect +against, we never switched to it (even though I implemented it +specifically to harden container runtimes). + +Unfortunately, It turns out that mount *sources* are a threat we didn't +fully consider. Since TIOCGPTPEER already solves this problem entirely +for us in a race free way, we should just use that. In a later patch, we +will add some hardening for /dev/pts/$num opening to maintain support +for very old kernels (Linux 4.13 is very old at this point, but RHEL 7 +is still kicking and is stuck on Linux 3.10). + +Fixes: GHSA-qw9x-cqr3-wc7r CVE-2025-52565 +Reported-by: Lei Wang (CVE-2025-52565) +Reported-by: lfbzhm (CVE-2025-52565) +Reported-by: Aleksa Sarai (TIOCGPTPEER) +Signed-off-by: Aleksa Sarai + +> This is the commit message #9: + +console: add fallback for pre-TIOCGPTPEER kernels + +The pty driver has very consistent allocation rules for the major:minor +numbers of /dev/pts/$n inodes, so it is possible to somewhat safely open +/dev/pts/* paths if we validate that the inode is the one we expect. + +It is possible for an attacker to have over-mounted a pts peer from a +different devpts instance, but to fix this would require more tracking +of devpts instances than runc currently can do. + +This means runc should continue to work on very old kernels. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #10: + +console: avoid trivial symlink attacks for /dev/console + +An attacker could make /dev/console a symlink. This presents two +possible issues: + + 1. os.Create will happily truncate targets, which could have resulted + in a worse version of CVE-2024-4531. Luckily, this all happens after + pivot_root(2) so the scope of that particular attack is fairly + limited (you are unlikely to be able to easily access host rootfs + files -- though it might be possible to take advantage of leaks such + as in CVE-2024-21626). However, O_CREAT|O_NOFOLLOW is what we should + be doing for all file creations. + + 2. Because we passed /dev/console as the only mount path (as opposed to + using a /proc/self/fd/$n path), an attacker could swap the symlink + to point to any other path and thus cause us to mount over some + other path. This is not as big of a problem because all the mounts + are in the container namespace after pivot_root(2), and users + usually can create arbitrary mount targets inside the container. + +These issues don't seem particularly exploitable, but they deserve to be +hardened regardless. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #11: + +console: verify /dev/pts/ptmx before use + +This is primarily done out of an abudance of caution against runc exec +being attacked by a container where /dev/pts/ptmx has been replaced with +some other bad inode (a disconnected NFS handle, a symlink that goes +through a leaked runc file descriptor to reference a host ptmx, etc). + +Unfortunately, we cannot trivially verify that /dev/pts/ptmx is actually +the /dev/pts from the container without storing stuff like the fsid in +the runc state.json, which is probably not worth the extra effort. This +should at least avoid the most concerning cases. + +Reported-by: Aleksa Sarai +Signed-off-by: Aleksa Sarai + +> This is the commit message #12: + +go.mod: update to github.com/cyphar/filepath-securejoin@v0.5.0 + +In order to avoid lint errors due to the deprecation of the top-level +securejoin methods ported from libpathrs, we need to adjust +internal/pathrs to use the new pathrs-lite subpackage instead. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #13: + +internal: add wrappers for securejoin.Proc* + +Signed-off-by: Aleksa Sarai + +> This is the commit message #14: + +rootfs: avoid using os.Create for new device inodes + +If an attacker were to make the target of a device inode creation be a +symlink to some host path, os.Create would happily truncate the target +which could lead to all sorts of issues. This exploit is probably not as +exploitable because device inodes are usually only bind-mounted for +rootless containers, which cannot overwrite important host files (though +user files would still be up for grabs). + +The regular inode creation logic could also theoretically be tricked +into changing the access mode and ownership of host files if the +newly-created device inode was swapped with a symlink to a host path. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #15: + +apparmor: use safe procfs API for labels + +EnsureProcHandle only protects us against a tmpfs mount, but the risk of +a procfs path being used (such as /proc/self/sched) has been known for a +while. Now that filepath-securejoin has a reasonably safe procfs API, +switch to it. + +Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 +Signed-off-by: Aleksa Sarai + +> This is the commit message #16: + +utils: use safe procfs for /proc/self/fd loop code + +From a safety perspective this might not be strictly required, but it +paves the way for us to remove utils.ProcThreadSelf. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #17: + +utils: remove unneeded EnsureProcHandle + +All of the callers of EnsureProcHandle now use filepath-securejoin's +ProcThreadSelf to get a file handle, which has much stricter +verification to avoid procfs attacks than EnsureProcHandle's very +simplistic filesystem type check. + +Signed-off-by: Aleksa Sarai + +> This is the commit message #18: + +init: write sysctls using safe procfs API + +sysctls could in principle also be used as a write gadget for arbitrary +procfs files. As this requires getting a non-subset=pid /proc handle we +amortise this by only allocating a single procfs handle for all sysctl +writes. + +Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 +Signed-off-by: Aleksa Sarai + +> This is the commit message #19: + +init: use securejoin for /proc/self/setgroups + +Signed-off-by: Aleksa Sarai + +> This is the commit message #20: + +libct/system: use securejoin for /proc/$pid/stat + +Signed-off-by: Aleksa Sarai + +> This is the commit message #21: + +libct: align param type for mountCgroupV1/V2 functions + +Signed-off-by: lifubang + +> This is the commit message #22: + +criu: improve prepareCriuRestoreMounts + +1. Replace the big "if !" block with the if block and continue, + simplifying the code flow. + +2. Move comments closer to the code, improving readability. + +This commit is best reviewed with --ignore-all-space or similar. + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit 0c93d41c65b6a1055e945d1d3e56943b07b8405b) +Signed-off-by: Kir Kolyshkin + +> This is the commit message #23: + +criu: ignore cgroup early in prepareCriuRestoreMounts + +It makes sense to ignore cgroup mounts much early in the code, +saving some time on unnecessary operations. + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit b8aa5481db42b5222b1725e5af939bec829937c5) +Signed-off-by: Kir Kolyshkin + +> This is the commit message #24: + +criu: inline makeCriuRestoreMountpoints + +Since its code is now trivial, and it is only called from a single +place, it does not make sense to have it as a separate function. + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit f91fbd34d9e819a833c7da00c6c88f5371a82ac5) +Signed-off-by: Kir Kolyshkin + +> This is the commit message #25: + +criu: simplify isOnTmpfs check in prepareCriuRestoreMounts + +Instead of generating a list of tmpfs mount and have a special function +to check whether the path is in the list, let's go over the list of +mounts directly. This simplifies the code and improves readability. + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit ce3cd4234c9cd90f8109a33ab86f3456c2edf947) +Signed-off-by: Kir Kolyshkin + +> This is the commit message #26: + +rootfs: switch to fd-based handling of mountpoint targets + +An attacker could race with us during mount configuration in order to +trick us into mounting over an unexpected path. This would bypass +checkProcMount() and would allow for security profiles to be left +unapplied by mounting over /proc/self/attr/... (or even more serious +outcomes such as killing the entire system by tricking runc into writing +strings to /proc/sysrq-trigger). + +This is a larger issue with our current mount infrastructure, and the +ideal solution would be to rewrite it all to be fd-based (which would +also allow us to support the "new" mount API, which also avoids a bunch +of other issues with mount(8)). However, such a rewrite is not really +workable as a security fix, so this patch is a bit of a compromise +approach to fix the issue while also moving us a bit towards that +eventual end-goal. + +The core issue in CVE-2025-52881 is that we currently use the (insecure) +SecureJoin to re-resolve mountpoint target paths multiple times during +mounting. Rather than generating a string from createMountpoint(), we +instead open an *os.File handle to the target mountpoint directly and +then operate on that handle. This will make it easier to remove +utils.WithProcfd() and rework mountViaFds() in the future. + +The only real issue we need to work around is that we need to re-open +the mount target after doing the mount in order to get a handle to the +mountpoint -- pathrs.Reopen() doesn't work in this case (it just +re-opens the inode under the mountpoint) so we need to do a naive +re-open using the full path. Note that if we used move_mount(2) this +wouldn't be a problem because we would have a handle to the mountpoint +itself. + +Note that this is still somewhat of a temporary solution -- ideally +mountViaFds would use *os.File directly to let us avoid some other +issues with using bare /proc/... paths, as well as also letting us more +easily use the new mount API on modern kernels. + +Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 +Co-developed-by: lifubang +Signed-off-by: Aleksa Sarai + +> This is the commit message #27: + +selinux: use safe procfs API for labels + +Due to the sensitive nature of these fixes, it was not possible to +submit these upstream and vendor the upstream library. Instead, this +patch uses a fork of github.com/opencontainers/selinux, branched at +commit opencontainers/selinux@879a755db558501df06f4ea59461ebc2d0c4a991. + +In order to permit downstreams to build with this patched version, a +snapshot of the forked version has been included in +internal/third_party/selinux. Note that since we use "go mod vendor", +the patched code is usable even without being "go get"-able. Once the +embargo for this issue is lifted we can submit the patches upstream and +switch back to a proper upstream go.mod entry. + +Also, this requires us to temporarily disable the CI job we have that +disallows "replace" directives. + +Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 +Signed-off-by: Aleksa Sarai +Signed-off-by: Kir Kolyshkin +--- + go.mod | 11 +- + go.sum | 10 +- + internal/linux/doc.go | 3 + + internal/linux/linux.go | 44 + + internal/pathrs/doc.go | 23 + + internal/pathrs/mkdirall_pathrslite.go | 97 ++ + internal/pathrs/path.go | 34 + + internal/pathrs/path_test.go | 53 + + internal/pathrs/procfs_pathrslite.go | 102 ++ + internal/pathrs/root_pathrslite.go | 69 + + internal/sys/doc.go | 5 + + internal/sys/opath_linux.go | 53 + + internal/sys/sysctl_linux.go | 54 + + internal/sys/verify_inode_unix.go | 30 + + internal/third_party/selinux/.codespellrc | 2 + + .../selinux/.github/dependabot.yml | 10 + + .../selinux/.github/workflows/validate.yml | 163 ++ + internal/third_party/selinux/.gitignore | 1 + + internal/third_party/selinux/.golangci.yml | 44 + + internal/third_party/selinux/CODEOWNERS | 1 + + internal/third_party/selinux/CONTRIBUTING.md | 119 ++ + internal/third_party/selinux/LICENSE | 201 +++ + internal/third_party/selinux/MAINTAINERS | 5 + + internal/third_party/selinux/Makefile | 37 + + internal/third_party/selinux/README.md | 23 + + .../third_party/selinux/go-selinux/doc.go | 13 + + .../selinux/go-selinux/label/label.go | 48 + + .../selinux/go-selinux/label/label_linux.go | 136 ++ + .../go-selinux/label/label_linux_test.go | 130 ++ + .../selinux/go-selinux/label/label_stub.go | 44 + + .../go-selinux/label/label_stub_test.go | 76 + + .../selinux/go-selinux/label/label_test.go | 35 + + .../third_party/selinux/go-selinux/selinux.go | 322 ++++ + .../selinux/go-selinux/selinux_linux.go | 1405 +++++++++++++++++ + .../selinux/go-selinux/selinux_linux_test.go | 711 +++++++++ + .../selinux/go-selinux/selinux_stub.go | 159 ++ + .../selinux/go-selinux/selinux_stub_test.go | 127 ++ + .../selinux/go-selinux/xattrs_linux.go | 71 + + internal/third_party/selinux/go.mod | 8 + + internal/third_party/selinux/go.sum | 8 + + .../third_party/selinux/pkg/pwalk/README.md | 52 + + .../third_party/selinux/pkg/pwalk/pwalk.go | 131 ++ + .../selinux/pkg/pwalk/pwalk_test.go | 236 +++ + .../selinux/pkg/pwalkdir/README.md | 56 + + .../selinux/pkg/pwalkdir/pwalkdir.go | 123 ++ + .../selinux/pkg/pwalkdir/pwalkdir_test.go | 239 +++ + libcontainer/apparmor/apparmor_linux.go | 13 +- + libcontainer/console_linux.go | 163 +- + libcontainer/criu_linux.go | 103 +- + libcontainer/dmz/cloned_binary_linux.go | 3 +- + libcontainer/init_linux.go | 27 +- + libcontainer/integration/exec_test.go | 15 +- + libcontainer/rootfs_linux.go | 419 +++-- + libcontainer/standard_init_linux.go | 28 +- + libcontainer/system/linux.go | 20 + + libcontainer/system/proc.go | 16 +- + libcontainer/utils/utils.go | 4 +- + libcontainer/utils/utils_test.go | 4 +- + libcontainer/utils/utils_unix.go | 127 +- + utils_linux.go | 9 +- + .../containerd/console/console_other.go | 4 +- + .../containerd/console/console_unix.go | 9 + + .../containerd/console/tc_darwin.go | 5 +- + .../containerd/console/tc_freebsd_cgo.go | 5 +- + .../containerd/console/tc_freebsd_nocgo.go | 5 +- + .../github.com/containerd/console/tc_linux.go | 5 +- + .../containerd/console/tc_netbsd.go | 5 +- + .../containerd/console/tc_openbsd_cgo.go | 6 +- + .../containerd/console/tc_openbsd_nocgo.go | 6 +- + .../github.com/containerd/console/tc_zos.go | 5 +- + .../cyphar/filepath-securejoin/.golangci.yml | 56 + + .../cyphar/filepath-securejoin/CHANGELOG.md | 121 +- + .../cyphar/filepath-securejoin/COPYING.md | 447 ++++++ + .../{LICENSE => LICENSE.BSD} | 0 + .../filepath-securejoin/LICENSE.MPL-2.0 | 373 +++++ + .../cyphar/filepath-securejoin/README.md | 21 +- + .../cyphar/filepath-securejoin/VERSION | 2 +- + .../cyphar/filepath-securejoin/codecov.yml | 29 + + .../filepath-securejoin/deprecated_linux.go | 48 + + .../cyphar/filepath-securejoin/doc.go | 34 +- + .../gocompat_generics_go121.go | 32 - + .../gocompat_generics_unsupported.go | 124 -- + .../internal/consts/consts.go | 15 + + .../cyphar/filepath-securejoin/join.go | 23 +- + .../filepath-securejoin/openat2_linux.go | 127 -- + .../filepath-securejoin/openat_linux.go | 59 - + .../filepath-securejoin/pathrs-lite/README.md | 33 + + .../filepath-securejoin/pathrs-lite/doc.go | 14 + + .../pathrs-lite/internal/assert/assert.go | 30 + + .../pathrs-lite/internal/errors.go | 30 + + .../pathrs-lite/internal/fd/at_linux.go | 148 ++ + .../pathrs-lite/internal/fd/fd.go | 55 + + .../pathrs-lite/internal/fd/fd_linux.go | 78 + + .../pathrs-lite/internal/fd/mount_linux.go | 54 + + .../pathrs-lite/internal/fd/openat2_linux.go | 62 + + .../pathrs-lite/internal/gocompat/README.md | 10 + + .../pathrs-lite/internal/gocompat/doc.go | 13 + + .../gocompat}/gocompat_errors_go120.go | 7 +- + .../gocompat}/gocompat_errors_unsupported.go | 8 +- + .../gocompat/gocompat_generics_go121.go | 53 + + .../gocompat/gocompat_generics_unsupported.go | 187 +++ + .../internal/kernelversion/kernel_linux.go | 123 ++ + .../pathrs-lite/internal/linux/doc.go | 12 + + .../pathrs-lite/internal/linux/mount_linux.go | 47 + + .../internal/linux/openat2_linux.go | 31 + + .../internal/procfs/procfs_linux.go | 544 +++++++ + .../internal/procfs/procfs_lookup_linux.go | 222 +++ + .../{ => pathrs-lite}/lookup_linux.go | 61 +- + .../{ => pathrs-lite}/mkdir_linux.go | 46 +- + .../{ => pathrs-lite}/open_linux.go | 59 +- + .../pathrs-lite/openat2_linux.go | 101 ++ + .../pathrs-lite/procfs/procfs_linux.go | 157 ++ + .../filepath-securejoin/procfs_linux.go | 452 ------ + .../cyphar/filepath-securejoin/vfs.go | 2 + + .../selinux/go-selinux/label/label.go | 67 - + .../selinux/go-selinux/label/label_linux.go | 22 +- + .../selinux/go-selinux/label/label_stub.go | 20 +- + .../selinux/go-selinux/selinux.go | 26 +- + .../selinux/go-selinux/selinux_linux.go | 322 ++-- + .../selinux/go-selinux/selinux_stub.go | 52 +- + .../selinux/go-selinux/xattrs_linux.go | 4 +- + .../selinux/pkg/pwalkdir/README.md | 6 +- + .../selinux/pkg/pwalkdir/pwalkdir.go | 7 + + vendor/modules.txt | 17 +- + 124 files changed, 9452 insertions(+), 1571 deletions(-) + create mode 100644 internal/linux/doc.go + create mode 100644 internal/linux/linux.go + create mode 100644 internal/pathrs/doc.go + create mode 100644 internal/pathrs/mkdirall_pathrslite.go + create mode 100644 internal/pathrs/path.go + create mode 100644 internal/pathrs/path_test.go + create mode 100644 internal/pathrs/procfs_pathrslite.go + create mode 100644 internal/pathrs/root_pathrslite.go + create mode 100644 internal/sys/doc.go + create mode 100644 internal/sys/opath_linux.go + create mode 100644 internal/sys/sysctl_linux.go + create mode 100644 internal/sys/verify_inode_unix.go + create mode 100644 internal/third_party/selinux/.codespellrc + create mode 100644 internal/third_party/selinux/.github/dependabot.yml + create mode 100644 internal/third_party/selinux/.github/workflows/validate.yml + create mode 100644 internal/third_party/selinux/.gitignore + create mode 100644 internal/third_party/selinux/.golangci.yml + create mode 100644 internal/third_party/selinux/CODEOWNERS + create mode 100644 internal/third_party/selinux/CONTRIBUTING.md + create mode 100644 internal/third_party/selinux/LICENSE + create mode 100644 internal/third_party/selinux/MAINTAINERS + create mode 100644 internal/third_party/selinux/Makefile + create mode 100644 internal/third_party/selinux/README.md + create mode 100644 internal/third_party/selinux/go-selinux/doc.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label_linux.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label_linux_test.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label_stub.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label_stub_test.go + create mode 100644 internal/third_party/selinux/go-selinux/label/label_test.go + create mode 100644 internal/third_party/selinux/go-selinux/selinux.go + create mode 100644 internal/third_party/selinux/go-selinux/selinux_linux.go + create mode 100644 internal/third_party/selinux/go-selinux/selinux_linux_test.go + create mode 100644 internal/third_party/selinux/go-selinux/selinux_stub.go + create mode 100644 internal/third_party/selinux/go-selinux/selinux_stub_test.go + create mode 100644 internal/third_party/selinux/go-selinux/xattrs_linux.go + create mode 100644 internal/third_party/selinux/go.mod + create mode 100644 internal/third_party/selinux/go.sum + create mode 100644 internal/third_party/selinux/pkg/pwalk/README.md + create mode 100644 internal/third_party/selinux/pkg/pwalk/pwalk.go + create mode 100644 internal/third_party/selinux/pkg/pwalk/pwalk_test.go + create mode 100644 internal/third_party/selinux/pkg/pwalkdir/README.md + create mode 100644 internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go + create mode 100644 internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/.golangci.yml + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/COPYING.md + rename vendor/github.com/cyphar/filepath-securejoin/{LICENSE => LICENSE.BSD} (100%) + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/codecov.yml + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go + delete mode 100644 vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go + delete mode 100644 vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go + delete mode 100644 vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go + delete mode 100644 vendor/github.com/cyphar/filepath-securejoin/openat_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go + rename vendor/github.com/cyphar/filepath-securejoin/{ => pathrs-lite/internal/gocompat}/gocompat_errors_go120.go (69%) + rename vendor/github.com/cyphar/filepath-securejoin/{ => pathrs-lite/internal/gocompat}/gocompat_errors_unsupported.go (80%) + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go + rename vendor/github.com/cyphar/filepath-securejoin/{ => pathrs-lite}/lookup_linux.go (86%) + rename vendor/github.com/cyphar/filepath-securejoin/{ => pathrs-lite}/mkdir_linux.go (86%) + rename vendor/github.com/cyphar/filepath-securejoin/{ => pathrs-lite}/open_linux.go (56%) + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go + create mode 100644 vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go + delete mode 100644 vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go + +diff --git a/go.mod b/go.mod +index 8281d740..5f00a576 100644 +--- a/go.mod ++++ b/go.mod +@@ -10,9 +10,9 @@ toolchain go1.22.4 + require ( + github.com/checkpoint-restore/go-criu/v6 v6.3.0 + github.com/cilium/ebpf v0.16.0 +- github.com/containerd/console v1.0.4 ++ github.com/containerd/console v1.0.5 + github.com/coreos/go-systemd/v22 v22.5.0 +- github.com/cyphar/filepath-securejoin v0.4.1 ++ github.com/cyphar/filepath-securejoin v0.5.0 + github.com/docker/go-units v0.5.0 + github.com/godbus/dbus/v5 v5.1.0 + github.com/moby/sys/mountinfo v0.7.1 +@@ -20,7 +20,7 @@ require ( + github.com/moby/sys/userns v0.1.0 + github.com/mrunalp/fileutils v0.5.1 + github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 +- github.com/opencontainers/selinux v1.11.0 ++ github.com/opencontainers/selinux v1.12.0 + github.com/seccomp/libseccomp-golang v0.10.0 + github.com/sirupsen/logrus v1.9.3 + github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 +@@ -37,3 +37,8 @@ require ( + github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect + ) ++ ++// FIXME: This is only intended as a short-term solution to include a patch for ++// CVE-2025-52881 in go-selinux without pushing the patches upstream. This ++// should be removed as soon as possible after the embargo is lifted. ++replace github.com/opencontainers/selinux => ./internal/third_party/selinux +diff --git a/go.sum b/go.sum +index a6d81357..1f930ce4 100644 +--- a/go.sum ++++ b/go.sum +@@ -3,14 +3,14 @@ github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3 + github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI= + github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= + github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= +-github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn4ro= +-github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= ++github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= ++github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= + github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= + github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= + github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= + github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +-github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= +-github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= ++github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= ++github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= + github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= + github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= + github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +@@ -48,8 +48,6 @@ github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm + github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= + github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU= + github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= +-github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= +-github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= + github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= + github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +diff --git a/internal/linux/doc.go b/internal/linux/doc.go +new file mode 100644 +index 00000000..4d1eb900 +--- /dev/null ++++ b/internal/linux/doc.go +@@ -0,0 +1,3 @@ ++// Package linux provides minimal wrappers around Linux system calls, primarily ++// to provide support for automatic EINTR-retries. ++package linux +diff --git a/internal/linux/linux.go b/internal/linux/linux.go +new file mode 100644 +index 00000000..f9e67534 +--- /dev/null ++++ b/internal/linux/linux.go +@@ -0,0 +1,44 @@ ++package linux ++ ++import ( ++ "os" ++ ++ "golang.org/x/sys/unix" ++) ++ ++// Readlinkat wraps [unix.Readlinkat]. ++func Readlinkat(dir *os.File, path string) (string, error) { ++ size := 4096 ++ for { ++ linkBuf := make([]byte, size) ++ n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf) ++ if err != nil { ++ return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err} ++ } ++ if n != size { ++ return string(linkBuf[:n]), nil ++ } ++ // Possible truncation, resize the buffer. ++ size *= 2 ++ } ++} ++ ++// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). ++func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { ++ // Make sure O_NOCTTY is always set -- otherwise runc might accidentally ++ // gain it as a controlling terminal. O_CLOEXEC also needs to be set to ++ // make sure we don't leak the handle either. ++ flags |= unix.O_NOCTTY | unix.O_CLOEXEC ++ ++ // There is no nice wrapper for this kind of ioctl in unix. ++ peerFd, _, errno := unix.Syscall( ++ unix.SYS_IOCTL, ++ ptyFd, ++ uintptr(unix.TIOCGPTPEER), ++ uintptr(flags), ++ ) ++ if errno != 0 { ++ return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) ++ } ++ return os.NewFile(peerFd, unsafePeerPath), nil ++} +diff --git a/internal/pathrs/doc.go b/internal/pathrs/doc.go +new file mode 100644 +index 00000000..496ca595 +--- /dev/null ++++ b/internal/pathrs/doc.go +@@ -0,0 +1,23 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++// Package pathrs provides wrappers around filepath-securejoin to add the ++// minimum set of features needed from libpathrs that are not provided by ++// filepath-securejoin, with the eventual goal being that these can be used to ++// ease the transition by converting them stubs when enabling libpathrs builds. ++package pathrs +diff --git a/internal/pathrs/mkdirall_pathrslite.go b/internal/pathrs/mkdirall_pathrslite.go +new file mode 100644 +index 00000000..fb4f7842 +--- /dev/null ++++ b/internal/pathrs/mkdirall_pathrslite.go +@@ -0,0 +1,97 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import ( ++ "fmt" ++ "os" ++ "path/filepath" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "github.com/sirupsen/logrus" ++ "golang.org/x/sys/unix" ++) ++ ++// MkdirAllInRootOpen attempts to make ++// ++// path, _ := securejoin.SecureJoin(root, unsafePath) ++// os.MkdirAll(path, mode) ++// os.Open(path) ++// ++// safer against attacks where components in the path are changed between ++// SecureJoin returning and MkdirAll (or Open) being called. In particular, we ++// try to detect any symlink components in the path while we are doing the ++// MkdirAll. ++// ++// NOTE: If unsafePath is a subpath of root, we assume that you have already ++// called SecureJoin and so we use the provided path verbatim without resolving ++// any symlinks (this is done in a way that avoids symlink-exchange races). ++// This means that the path also must not contain ".." elements, otherwise an ++// error will occur. ++// ++// This uses (pathrs-lite).MkdirAllHandle under the hood, but it has special ++// handling if unsafePath has already been scoped within the rootfs (this is ++// needed for a lot of runc callers and fixing this would require reworking a ++// lot of path logic). ++func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (*os.File, error) { ++ // If the path is already "within" the root, get the path relative to the ++ // root and use that as the unsafe path. This is necessary because a lot of ++ // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring ++ // all of them to stop using these SecureJoin'd paths would require a fair ++ // amount of work. ++ // TODO(cyphar): Do the refactor to libpathrs once it's ready. ++ if IsLexicallyInRoot(root, unsafePath) { ++ subPath, err := filepath.Rel(root, unsafePath) ++ if err != nil { ++ return nil, err ++ } ++ unsafePath = subPath ++ } ++ ++ // Check for any silly mode bits. ++ if mode&^0o7777 != 0 { ++ return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) ++ } ++ // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if ++ // passed. While it would make sense to return an error in that case (since ++ // the user has asked for a mode that won't be applied), for compatibility ++ // reasons we have to ignore these bits. ++ if ignoredBits := mode &^ 0o1777; ignoredBits != 0 { ++ logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits) ++ mode &= 0o1777 ++ } ++ ++ rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return nil, fmt.Errorf("open root handle: %w", err) ++ } ++ defer rootDir.Close() ++ ++ return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) ++} ++ ++// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the ++// returned handle, for callers that don't need to use it. ++func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error { ++ f, err := MkdirAllInRootOpen(root, unsafePath, mode) ++ if err == nil { ++ _ = f.Close() ++ } ++ return err ++} +diff --git a/internal/pathrs/path.go b/internal/pathrs/path.go +new file mode 100644 +index 00000000..1ee7c795 +--- /dev/null ++++ b/internal/pathrs/path.go +@@ -0,0 +1,34 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import ( ++ "strings" ++) ++ ++// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), ++// but properly handling the case where path or root have a "/" suffix. ++// ++// NOTE: The return value only make sense if the path is already mostly cleaned ++// (i.e., doesn't contain "..", ".", nor unneeded "/"s). ++func IsLexicallyInRoot(root, path string) bool { ++ root = strings.TrimRight(root, "/") ++ path = strings.TrimRight(path, "/") ++ return strings.HasPrefix(path+"/", root+"/") ++} +diff --git a/internal/pathrs/path_test.go b/internal/pathrs/path_test.go +new file mode 100644 +index 00000000..19d577fb +--- /dev/null ++++ b/internal/pathrs/path_test.go +@@ -0,0 +1,53 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import "testing" ++ ++func TestIsLexicallyInRoot(t *testing.T) { ++ for _, test := range []struct { ++ name string ++ root, path string ++ expected bool ++ }{ ++ {"Equal1", "/foo", "/foo", true}, ++ {"Equal2", "/bar/baz", "/bar/baz", true}, ++ {"Equal3", "/bar/baz/", "/bar/baz/", true}, ++ {"Root", "/", "/foo/bar", true}, ++ {"Root-Equal", "/", "/", true}, ++ {"InRoot-Basic1", "/foo/bar", "/foo/bar/baz/abcd", true}, ++ {"InRoot-Basic2", "/a/b/c/d", "/a/b/c/d/e/f/g/h", true}, ++ {"InRoot-Long", "/var/lib/docker/container/1234abcde/rootfs", "/var/lib/docker/container/1234abcde/rootfs/a/b/c", true}, ++ {"InRoot-TrailingSlash1", "/foo/bar/", "/foo/bar", true}, ++ {"InRoot-TrailingSlash2", "/foo/", "/foo/bar/baz/boop", true}, ++ {"NotInRoot-Basic1", "/foo", "/bar", false}, ++ {"NotInRoot-Basic2", "/foo", "/bar", false}, ++ {"NotInRoot-Basic3", "/foo/bar/baz", "/foo/boo/baz/abc", false}, ++ {"NotInRoot-Long", "/var/lib/docker/container/1234abcde/rootfs", "/a/b/c", false}, ++ {"NotInRoot-Tricky1", "/foo/bar", "/foo/bara", false}, ++ {"NotInRoot-Tricky2", "/foo/bar", "/foo/ba/r", false}, ++ } { ++ t.Run(test.name, func(t *testing.T) { ++ got := IsLexicallyInRoot(test.root, test.path) ++ if test.expected != got { ++ t.Errorf("IsLexicallyInRoot(%q, %q) = %v (expected %v)", test.root, test.path, got, test.expected) ++ } ++ }) ++ } ++} +diff --git a/internal/pathrs/procfs_pathrslite.go b/internal/pathrs/procfs_pathrslite.go +new file mode 100644 +index 00000000..a02b0d39 +--- /dev/null ++++ b/internal/pathrs/procfs_pathrslite.go +@@ -0,0 +1,102 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2025 Aleksa Sarai ++ * Copyright (C) 2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import ( ++ "fmt" ++ "os" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" ++) ++ ++func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath string, flags int) (*os.File, error) { ++ handle, err := openFn(subpath) ++ if err != nil { ++ return nil, err ++ } ++ defer handle.Close() ++ ++ f, err := pathrs.Reopen(handle, flags) ++ if err != nil { ++ return nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) ++ } ++ return f, nil ++} ++ ++// ProcSelfOpen is a wrapper around [procfs.Handle.OpenSelf] and ++// [pathrs.Reopen], to let you one-shot open a procfs file with the given ++// flags. ++func ProcSelfOpen(subpath string, flags int) (*os.File, error) { ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ return procOpenReopen(proc.OpenSelf, subpath, flags) ++} ++ ++// ProcPidOpen is a wrapper around [procfs.Handle.OpenPid] and [pathrs.Reopen], ++// to let you one-shot open a procfs file with the given flags. ++func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ return procOpenReopen(func(subpath string) (*os.File, error) { ++ return proc.OpenPid(pid, subpath) ++ }, subpath, flags) ++} ++ ++// ProcThreadSelfOpen is a wrapper around [procfs.Handle.OpenThreadSelf] and ++// [pathrs.Reopen], to let you one-shot open a procfs file with the given ++// flags. The returned [procfs.ProcThreadSelfCloser] needs the same handling as ++// when using pathrs-lite. ++func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThreadSelfCloser, Err error) { ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, nil, err ++ } ++ defer proc.Close() ++ ++ handle, closer, err := proc.OpenThreadSelf(subpath) ++ if err != nil { ++ return nil, nil, err ++ } ++ if closer != nil { ++ defer func() { ++ if Err != nil { ++ closer() ++ } ++ }() ++ } ++ defer handle.Close() ++ ++ f, err := pathrs.Reopen(handle, flags) ++ if err != nil { ++ return nil, nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) ++ } ++ return f, closer, nil ++} ++ ++// Reopen is a wrapper around pathrs.Reopen. ++func Reopen(file *os.File, flags int) (*os.File, error) { ++ return pathrs.Reopen(file, flags) ++} +diff --git a/internal/pathrs/root_pathrslite.go b/internal/pathrs/root_pathrslite.go +new file mode 100644 +index 00000000..0ef81fae +--- /dev/null ++++ b/internal/pathrs/root_pathrslite.go +@@ -0,0 +1,69 @@ ++// SPDX-License-Identifier: Apache-2.0 ++/* ++ * Copyright (C) 2024-2025 Aleksa Sarai ++ * Copyright (C) 2024-2025 SUSE LLC ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package pathrs ++ ++import ( ++ "fmt" ++ "os" ++ "path/filepath" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "golang.org/x/sys/unix" ++) ++ ++// OpenInRoot opens the given path inside the root with the provided flags. It ++// is effectively shorthand for [securejoin.OpenInRoot] followed by ++// [securejoin.Reopen]. ++func OpenInRoot(root, subpath string, flags int) (*os.File, error) { ++ handle, err := pathrs.OpenInRoot(root, subpath) ++ if err != nil { ++ return nil, err ++ } ++ defer handle.Close() ++ return pathrs.Reopen(handle, flags) ++} ++ ++// CreateInRoot creates a new file inside a root (as well as any missing parent ++// directories) and returns a handle to said file. This effectively has ++// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL, ++// include it in the passed flags. The fileMode argument uses unix.* mode bits, ++// *not* os.FileMode. ++func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) { ++ dir, filename := filepath.Split(subpath) ++ if filepath.Join("/", filename) == "/" { ++ return nil, fmt.Errorf("create in root subpath %q has bad trailing component %q", subpath, filename) ++ } ++ ++ dirFd, err := MkdirAllInRootOpen(root, dir, 0o755) ++ if err != nil { ++ return nil, err ++ } ++ defer dirFd.Close() ++ ++ // We know that the filename does not have any "/" components, and that ++ // dirFd is inside the root. O_NOFOLLOW will stop us from following ++ // trailing symlinks, so this is safe to do. libpathrs's Root::create_file ++ // works the same way. ++ flags |= unix.O_CREAT | unix.O_NOFOLLOW ++ fd, err := unix.Openat(int(dirFd.Fd()), filename, flags, fileMode) ++ if err != nil { ++ return nil, err ++ } ++ return os.NewFile(uintptr(fd), root+"/"+subpath), nil ++} +diff --git a/internal/sys/doc.go b/internal/sys/doc.go +new file mode 100644 +index 00000000..075387f7 +--- /dev/null ++++ b/internal/sys/doc.go +@@ -0,0 +1,5 @@ ++// Package sys is an internal package that contains helper methods for dealing ++// with Linux that are more complicated than basic wrappers. Basic wrappers ++// usually belong in internal/linux. If you feel something belongs in ++// libcontainer/utils or libcontainer/system, it probably belongs here instead. ++package sys +diff --git a/internal/sys/opath_linux.go b/internal/sys/opath_linux.go +new file mode 100644 +index 00000000..17a216bc +--- /dev/null ++++ b/internal/sys/opath_linux.go +@@ -0,0 +1,53 @@ ++package sys ++ ++import ( ++ "fmt" ++ "os" ++ "runtime" ++ "strconv" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/runc/internal/pathrs" ++) ++ ++// FchmodFile is a wrapper around fchmodat2(AT_EMPTY_PATH) with fallbacks for ++// older kernels. This is distinct from [File.Chmod] and [unix.Fchmod] in that ++// it works on O_PATH file descriptors. ++func FchmodFile(f *os.File, mode uint32) error { ++ err := unix.Fchmodat(int(f.Fd()), "", mode, unix.AT_EMPTY_PATH) ++ // If fchmodat2(2) is not available at all, golang.org/x/unix (probably ++ // in order to mirror glibc) returns EOPNOTSUPP rather than EINVAL ++ // (what the kernel actually returns for invalid flags, which is being ++ // emulated) or ENOSYS (which is what glibc actually sees). ++ if err != unix.EINVAL && err != unix.EOPNOTSUPP { //nolint:errorlint // unix errors are bare ++ // err == nil is implicitly handled ++ return os.NewSyscallError("fchmodat2 AT_EMPTY_PATH", err) ++ } ++ ++ // AT_EMPTY_PATH support was added to fchmodat2 in Linux 6.6 ++ // (5daeb41a6fc9d0d81cb2291884b7410e062d8fa1). The alternative for ++ // older kernels is to go through /proc. ++ fdDir, closer, err2 := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY) ++ if err2 != nil { ++ return fmt.Errorf("fchmodat2 AT_EMPTY_PATH fallback: %w", err2) ++ } ++ defer closer() ++ defer fdDir.Close() ++ ++ err = unix.Fchmodat(int(fdDir.Fd()), strconv.Itoa(int(f.Fd())), mode, 0) ++ if err != nil { ++ err = fmt.Errorf("fchmodat /proc/self/fd/%d: %w", f.Fd(), err) ++ } ++ runtime.KeepAlive(f) ++ return err ++} ++ ++// FchownFile is a wrapper around fchownat(AT_EMPTY_PATH). This is distinct ++// from [File.Chown] and [unix.Fchown] in that it works on O_PATH file ++// descriptors. ++func FchownFile(f *os.File, uid, gid int) error { ++ err := unix.Fchownat(int(f.Fd()), "", uid, gid, unix.AT_EMPTY_PATH) ++ runtime.KeepAlive(f) ++ return os.NewSyscallError("fchownat AT_EMPTY_PATH", err) ++} +diff --git a/internal/sys/sysctl_linux.go b/internal/sys/sysctl_linux.go +new file mode 100644 +index 00000000..96876a55 +--- /dev/null ++++ b/internal/sys/sysctl_linux.go +@@ -0,0 +1,54 @@ ++package sys ++ ++import ( ++ "fmt" ++ "io" ++ "os" ++ "strings" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" ++) ++ ++func procfsOpenRoot(proc *procfs.Handle, subpath string, flags int) (*os.File, error) { ++ handle, err := proc.OpenRoot(subpath) ++ if err != nil { ++ return nil, err ++ } ++ defer handle.Close() ++ ++ return pathrs.Reopen(handle, flags) ++} ++ ++// WriteSysctls sets the given sysctls to the requested values. ++func WriteSysctls(sysctls map[string]string) error { ++ // We are going to write multiple sysctls, which require writing to an ++ // unmasked procfs which is not going to be cached. To avoid creating a new ++ // procfs instance for each one, just allocate one handle for all of them. ++ proc, err := procfs.OpenUnsafeProcRoot() ++ if err != nil { ++ return err ++ } ++ defer proc.Close() ++ ++ for key, value := range sysctls { ++ keyPath := strings.ReplaceAll(key, ".", "/") ++ ++ sysctlFile, err := procfsOpenRoot(proc, "sys/"+keyPath, unix.O_WRONLY|unix.O_TRUNC|unix.O_CLOEXEC) ++ if err != nil { ++ return fmt.Errorf("open sysctl %s file: %w", key, err) ++ } ++ defer sysctlFile.Close() ++ ++ n, err := io.WriteString(sysctlFile, value) ++ if n != len(value) && err == nil { ++ err = fmt.Errorf("short write to file (%d bytes != %d bytes)", n, len(value)) ++ } ++ if err != nil { ++ return fmt.Errorf("failed to write sysctl %s = %q: %w", key, value, err) ++ } ++ } ++ return nil ++} +diff --git a/internal/sys/verify_inode_unix.go b/internal/sys/verify_inode_unix.go +new file mode 100644 +index 00000000..d5019db5 +--- /dev/null ++++ b/internal/sys/verify_inode_unix.go +@@ -0,0 +1,30 @@ ++package sys ++ ++import ( ++ "fmt" ++ "os" ++ "runtime" ++ ++ "golang.org/x/sys/unix" ++) ++ ++// VerifyInodeFunc is the callback passed to [VerifyInode] to check if the ++// inode is the expected type (and on the correct filesystem type, in the case ++// of filesystem-specific inodes). ++type VerifyInodeFunc func(stat *unix.Stat_t, statfs *unix.Statfs_t) error ++ ++// VerifyInode verifies that the underlying inode for the given file matches an ++// expected inode type (possibly on a particular kind of filesystem). This is ++// mainly a wrapper around [VerifyInodeFunc]. ++func VerifyInode(file *os.File, checkFunc VerifyInodeFunc) error { ++ var stat unix.Stat_t ++ if err := unix.Fstat(int(file.Fd()), &stat); err != nil { ++ return fmt.Errorf("fstat %q: %w", file.Name(), err) ++ } ++ var statfs unix.Statfs_t ++ if err := unix.Fstatfs(int(file.Fd()), &statfs); err != nil { ++ return fmt.Errorf("fstatfs %q: %w", file.Name(), err) ++ } ++ runtime.KeepAlive(file) ++ return checkFunc(&stat, &statfs) ++} +diff --git a/internal/third_party/selinux/.codespellrc b/internal/third_party/selinux/.codespellrc +new file mode 100644 +index 00000000..8f0866a3 +--- /dev/null ++++ b/internal/third_party/selinux/.codespellrc +@@ -0,0 +1,2 @@ ++[codespell] ++skip = ./.git,./go.sum,./go-selinux/testdata +diff --git a/internal/third_party/selinux/.github/dependabot.yml b/internal/third_party/selinux/.github/dependabot.yml +new file mode 100644 +index 00000000..b534a2b9 +--- /dev/null ++++ b/internal/third_party/selinux/.github/dependabot.yml +@@ -0,0 +1,10 @@ ++# Please see the documentation for all configuration options: ++# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file ++ ++version: 2 ++updates: ++ # Dependencies listed in .github/workflows/*.yml ++ - package-ecosystem: "github-actions" ++ directory: "/" ++ schedule: ++ interval: "daily" +diff --git a/internal/third_party/selinux/.github/workflows/validate.yml b/internal/third_party/selinux/.github/workflows/validate.yml +new file mode 100644 +index 00000000..fab1cb49 +--- /dev/null ++++ b/internal/third_party/selinux/.github/workflows/validate.yml +@@ -0,0 +1,163 @@ ++name: validate ++on: ++ push: ++ tags: ++ - v* ++ branches: ++ - master ++ pull_request: ++ ++jobs: ++ ++ commit: ++ runs-on: ubuntu-24.04 ++ # Only check commits on pull requests. ++ if: github.event_name == 'pull_request' ++ steps: ++ - name: get pr commits ++ id: 'get-pr-commits' ++ uses: tim-actions/get-pr-commits@v1.3.1 ++ with: ++ token: ${{ secrets.GITHUB_TOKEN }} ++ ++ - name: check subject line length ++ uses: tim-actions/commit-message-checker-with-regex@v0.3.2 ++ with: ++ commits: ${{ steps.get-pr-commits.outputs.commits }} ++ pattern: '^.{0,72}(\n.*)*$' ++ error: 'Subject too long (max 72)' ++ ++ lint: ++ runs-on: ubuntu-24.04 ++ steps: ++ - uses: actions/checkout@v5 ++ - uses: actions/setup-go@v6 ++ with: ++ go-version: 1.24.x ++ - uses: golangci/golangci-lint-action@v7 ++ with: ++ version: v2.0 ++ ++ codespell: ++ runs-on: ubuntu-24.04 ++ steps: ++ - uses: actions/checkout@v5 ++ - name: install deps ++ # Version of codespell bundled with Ubuntu is way old, so use pip. ++ run: pip install codespell ++ - name: run codespell ++ run: codespell ++ ++ cross: ++ runs-on: ubuntu-24.04 ++ steps: ++ - uses: actions/checkout@v5 ++ - name: cross ++ run: make build-cross ++ ++ test-stubs: ++ runs-on: macos-latest ++ steps: ++ - uses: actions/checkout@v5 ++ - uses: actions/setup-go@v6 ++ with: ++ go-version: 1.24.x ++ - uses: golangci/golangci-lint-action@v7 ++ with: ++ version: v2.0 ++ - name: test-stubs ++ run: make test ++ ++ test: ++ strategy: ++ fail-fast: false ++ matrix: ++ go-version: [1.19.x, 1.23.x, 1.24.x] ++ race: ["-race", ""] ++ runs-on: ubuntu-24.04 ++ steps: ++ - uses: actions/checkout@v5 ++ ++ - name: install go ${{ matrix.go-version }} ++ uses: actions/setup-go@v6 ++ with: ++ go-version: ${{ matrix.go-version }} ++ ++ - name: build ++ run: make BUILDFLAGS="${{ matrix.race }}" build ++ ++ - name: test ++ run: make TESTFLAGS="${{ matrix.race }}" test ++ ++ vm: ++ name: "VM" ++ strategy: ++ fail-fast: false ++ matrix: ++ template: ++ - template://almalinux-8 ++ - template://centos-stream-9 ++ - template://fedora ++ - template://experimental/opensuse-tumbleweed ++ runs-on: ubuntu-24.04 ++ steps: ++ - uses: actions/checkout@v5 ++ ++ - name: "Install Lima" ++ uses: lima-vm/lima-actions/setup@v1 ++ id: lima-actions-setup ++ ++ - name: "Cache ~/.cache/lima" ++ uses: actions/cache@v4 ++ with: ++ path: ~/.cache/lima ++ key: lima-${{ steps.lima-actions-setup.outputs.version }}-${{ matrix.template }} ++ ++ - name: "Start VM" ++ # --plain is set to disable file sharing, port forwarding, built-in containerd, etc. for faster start up ++ run: limactl start --plain --name=default ${{ matrix.template }} ++ ++ - name: "Initialize VM" ++ run: | ++ set -eux -o pipefail ++ # Sync the current directory to /tmp/selinux in the guest ++ limactl cp -r . default:/tmp/selinux ++ # Install packages ++ if lima command -v dnf >/dev/null; then ++ lima sudo dnf install --setopt=install_weak_deps=false --setopt=tsflags=nodocs -y git-core make golang ++ elif lima command -v zypper >/dev/null; then ++ lima sudo zypper install -y git make go ++ else ++ echo >&2 "Unsupported distribution" ++ exit 1 ++ fi ++ ++ - name: "make test" ++ continue-on-error: true ++ run: lima make -C /tmp/selinux test ++ ++ - name: "32-bit test" ++ continue-on-error: true ++ run: lima make -C /tmp/selinux GOARCH=386 test ++ ++ # https://github.com/opencontainers/selinux/issues/222 ++ # https://github.com/opencontainers/selinux/issues/225 ++ - name: "racy test" ++ continue-on-error: true ++ run: lima bash -c 'cd /tmp/selinux && go test -timeout 10m -count 100000 ./go-selinux' ++ ++ - name: "Show AVC denials" ++ run: lima sudo ausearch -m AVC,USER_AVC || true ++ ++ all-done: ++ needs: ++ - commit ++ - lint ++ - codespell ++ - cross ++ - test-stubs ++ - test ++ - vm ++ runs-on: ubuntu-24.04 ++ steps: ++ - run: echo "All jobs completed" +diff --git a/internal/third_party/selinux/.gitignore b/internal/third_party/selinux/.gitignore +new file mode 100644 +index 00000000..378eac25 +--- /dev/null ++++ b/internal/third_party/selinux/.gitignore +@@ -0,0 +1 @@ ++build +diff --git a/internal/third_party/selinux/.golangci.yml b/internal/third_party/selinux/.golangci.yml +new file mode 100644 +index 00000000..b1b98925 +--- /dev/null ++++ b/internal/third_party/selinux/.golangci.yml +@@ -0,0 +1,44 @@ ++version: "2" ++ ++formatters: ++ enable: ++ - gofumpt ++ ++linters: ++ enable: ++ # - copyloopvar # Detects places where loop variables are copied. TODO enable for Go 1.22+ ++ - dupword # Detects duplicate words. ++ - errorlint # Detects code that may cause problems with Go 1.13 error wrapping. ++ - gocritic # Metalinter; detects bugs, performance, and styling issues. ++ - gosec # Detects security problems. ++ - misspell # Detects commonly misspelled English words in comments. ++ - nilerr # Detects code that returns nil even if it checks that the error is not nil. ++ - nolintlint # Detects ill-formed or insufficient nolint directives. ++ - prealloc # Detects slice declarations that could potentially be pre-allocated. ++ - predeclared # Detects code that shadows one of Go's predeclared identifiers ++ - revive # Metalinter; drop-in replacement for golint. ++ - thelper # Detects test helpers without t.Helper(). ++ - tparallel # Detects inappropriate usage of t.Parallel(). ++ - unconvert # Detects unnecessary type conversions. ++ - usetesting # Reports uses of functions with replacement inside the testing package. ++ settings: ++ govet: ++ enable-all: true ++ settings: ++ shadow: ++ strict: true ++ exclusions: ++ generated: strict ++ presets: ++ - comments ++ - common-false-positives ++ - legacy ++ - std-error-handling ++ rules: ++ - linters: ++ - govet ++ text: '^shadow: declaration of "err" shadows declaration' ++ ++issues: ++ max-issues-per-linter: 0 ++ max-same-issues: 0 +diff --git a/internal/third_party/selinux/CODEOWNERS b/internal/third_party/selinux/CODEOWNERS +new file mode 100644 +index 00000000..14392178 +--- /dev/null ++++ b/internal/third_party/selinux/CODEOWNERS +@@ -0,0 +1 @@ ++* @kolyshkin @mrunalp @rhatdan @runcom @thajeztah +diff --git a/internal/third_party/selinux/CONTRIBUTING.md b/internal/third_party/selinux/CONTRIBUTING.md +new file mode 100644 +index 00000000..dc3ff6a5 +--- /dev/null ++++ b/internal/third_party/selinux/CONTRIBUTING.md +@@ -0,0 +1,119 @@ ++## Contribution Guidelines ++ ++### Security issues ++ ++If you are reporting a security issue, do not create an issue or file a pull ++request on GitHub. Instead, disclose the issue responsibly by sending an email ++to security@opencontainers.org (which is inhabited only by the maintainers of ++the various OCI projects). ++ ++### Pull requests are always welcome ++ ++We are always thrilled to receive pull requests, and do our best to ++process them as fast as possible. Not sure if that typo is worth a pull ++request? Do it! We will appreciate it. ++ ++If your pull request is not accepted on the first try, don't be ++discouraged! If there's a problem with the implementation, hopefully you ++received feedback on what to improve. ++ ++We're trying very hard to keep the project lean and focused. We don't want it ++to do everything for everybody. This means that we might decide against ++incorporating a new feature. ++ ++ ++### Conventions ++ ++Fork the repo and make changes on your fork in a feature branch. ++For larger bugs and enhancements, consider filing a leader issue or mailing-list thread for discussion that is independent of the implementation. ++Small changes or changes that have been discussed on the project mailing list may be submitted without a leader issue. ++ ++If the project has a test suite, submit unit tests for your changes. Take a ++look at existing tests for inspiration. Run the full test suite on your branch ++before submitting a pull request. ++ ++Update the documentation when creating or modifying features. Test ++your documentation changes for clarity, concision, and correctness, as ++well as a clean documentation build. See ``docs/README.md`` for more ++information on building the docs and how docs get released. ++ ++Write clean code. Universally formatted code promotes ease of writing, reading, ++and maintenance. Always run `gofmt -s -w file.go` on each changed file before ++committing your changes. Most editors have plugins that do this automatically. ++ ++Pull requests descriptions should be as clear as possible and include a ++reference to all the issues that they address. ++ ++Commit messages must start with a capitalized and short summary ++written in the imperative, followed by an optional, more detailed ++explanatory text which is separated from the summary by an empty line. ++ ++Code review comments may be added to your pull request. Discuss, then make the ++suggested modifications and push additional commits to your feature branch. Be ++sure to post a comment after pushing. The new commits will show up in the pull ++request automatically, but the reviewers will not be notified unless you ++comment. ++ ++Before the pull request is merged, make sure that you squash your commits into ++logical units of work using `git rebase -i` and `git push -f`. After every ++commit the test suite (if any) should be passing. Include documentation changes ++in the same commit so that a revert would remove all traces of the feature or ++fix. ++ ++Commits that fix or close an issue should include a reference like `Closes #XXX` ++or `Fixes #XXX`, which will automatically close the issue when merged. ++ ++### Sign your work ++ ++The sign-off is a simple line at the end of the explanation for the ++patch, which certifies that you wrote it or otherwise have the right to ++pass it on as an open-source patch. The rules are pretty simple: if you ++can certify the below (from ++[developercertificate.org](http://developercertificate.org/)): ++ ++``` ++Developer Certificate of Origin ++Version 1.1 ++ ++Copyright (C) 2004, 2006 The Linux Foundation and its contributors. ++660 York Street, Suite 102, ++San Francisco, CA 94110 USA ++ ++Everyone is permitted to copy and distribute verbatim copies of this ++license document, but changing it is not allowed. ++ ++ ++Developer's Certificate of Origin 1.1 ++ ++By making a contribution to this project, I certify that: ++ ++(a) The contribution was created in whole or in part by me and I ++ have the right to submit it under the open source license ++ indicated in the file; or ++ ++(b) The contribution is based upon previous work that, to the best ++ of my knowledge, is covered under an appropriate open source ++ license and I have the right under that license to submit that ++ work with modifications, whether created in whole or in part ++ by me, under the same open source license (unless I am ++ permitted to submit under a different license), as indicated ++ in the file; or ++ ++(c) The contribution was provided directly to me by some other ++ person who certified (a), (b) or (c) and I have not modified ++ it. ++ ++(d) I understand and agree that this project and the contribution ++ are public and that a record of the contribution (including all ++ personal information I submit with it, including my sign-off) is ++ maintained indefinitely and may be redistributed consistent with ++ this project or the open source license(s) involved. ++``` ++ ++then you just add a line to every git commit message: ++ ++ Signed-off-by: Joe Smith ++ ++using your real name (sorry, no pseudonyms or anonymous contributions.) ++ ++You can add the sign off when creating the git commit via `git commit -s`. +diff --git a/internal/third_party/selinux/LICENSE b/internal/third_party/selinux/LICENSE +new file mode 100644 +index 00000000..8dada3ed +--- /dev/null ++++ b/internal/third_party/selinux/LICENSE +@@ -0,0 +1,201 @@ ++ Apache License ++ Version 2.0, January 2004 ++ http://www.apache.org/licenses/ ++ ++ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION ++ ++ 1. Definitions. ++ ++ "License" shall mean the terms and conditions for use, reproduction, ++ and distribution as defined by Sections 1 through 9 of this document. ++ ++ "Licensor" shall mean the copyright owner or entity authorized by ++ the copyright owner that is granting the License. ++ ++ "Legal Entity" shall mean the union of the acting entity and all ++ other entities that control, are controlled by, or are under common ++ control with that entity. For the purposes of this definition, ++ "control" means (i) the power, direct or indirect, to cause the ++ direction or management of such entity, whether by contract or ++ otherwise, or (ii) ownership of fifty percent (50%) or more of the ++ outstanding shares, or (iii) beneficial ownership of such entity. ++ ++ "You" (or "Your") shall mean an individual or Legal Entity ++ exercising permissions granted by this License. ++ ++ "Source" form shall mean the preferred form for making modifications, ++ including but not limited to software source code, documentation ++ source, and configuration files. ++ ++ "Object" form shall mean any form resulting from mechanical ++ transformation or translation of a Source form, including but ++ not limited to compiled object code, generated documentation, ++ and conversions to other media types. ++ ++ "Work" shall mean the work of authorship, whether in Source or ++ Object form, made available under the License, as indicated by a ++ copyright notice that is included in or attached to the work ++ (an example is provided in the Appendix below). ++ ++ "Derivative Works" shall mean any work, whether in Source or Object ++ form, that is based on (or derived from) the Work and for which the ++ editorial revisions, annotations, elaborations, or other modifications ++ represent, as a whole, an original work of authorship. For the purposes ++ of this License, Derivative Works shall not include works that remain ++ separable from, or merely link (or bind by name) to the interfaces of, ++ the Work and Derivative Works thereof. ++ ++ "Contribution" shall mean any work of authorship, including ++ the original version of the Work and any modifications or additions ++ to that Work or Derivative Works thereof, that is intentionally ++ submitted to Licensor for inclusion in the Work by the copyright owner ++ or by an individual or Legal Entity authorized to submit on behalf of ++ the copyright owner. For the purposes of this definition, "submitted" ++ means any form of electronic, verbal, or written communication sent ++ to the Licensor or its representatives, including but not limited to ++ communication on electronic mailing lists, source code control systems, ++ and issue tracking systems that are managed by, or on behalf of, the ++ Licensor for the purpose of discussing and improving the Work, but ++ excluding communication that is conspicuously marked or otherwise ++ designated in writing by the copyright owner as "Not a Contribution." ++ ++ "Contributor" shall mean Licensor and any individual or Legal Entity ++ on behalf of whom a Contribution has been received by Licensor and ++ subsequently incorporated within the Work. ++ ++ 2. Grant of Copyright License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ copyright license to reproduce, prepare Derivative Works of, ++ publicly display, publicly perform, sublicense, and distribute the ++ Work and such Derivative Works in Source or Object form. ++ ++ 3. Grant of Patent License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ (except as stated in this section) patent license to make, have made, ++ use, offer to sell, sell, import, and otherwise transfer the Work, ++ where such license applies only to those patent claims licensable ++ by such Contributor that are necessarily infringed by their ++ Contribution(s) alone or by combination of their Contribution(s) ++ with the Work to which such Contribution(s) was submitted. If You ++ institute patent litigation against any entity (including a ++ cross-claim or counterclaim in a lawsuit) alleging that the Work ++ or a Contribution incorporated within the Work constitutes direct ++ or contributory patent infringement, then any patent licenses ++ granted to You under this License for that Work shall terminate ++ as of the date such litigation is filed. ++ ++ 4. Redistribution. You may reproduce and distribute copies of the ++ Work or Derivative Works thereof in any medium, with or without ++ modifications, and in Source or Object form, provided that You ++ meet the following conditions: ++ ++ (a) You must give any other recipients of the Work or ++ Derivative Works a copy of this License; and ++ ++ (b) You must cause any modified files to carry prominent notices ++ stating that You changed the files; and ++ ++ (c) You must retain, in the Source form of any Derivative Works ++ that You distribute, all copyright, patent, trademark, and ++ attribution notices from the Source form of the Work, ++ excluding those notices that do not pertain to any part of ++ the Derivative Works; and ++ ++ (d) If the Work includes a "NOTICE" text file as part of its ++ distribution, then any Derivative Works that You distribute must ++ include a readable copy of the attribution notices contained ++ within such NOTICE file, excluding those notices that do not ++ pertain to any part of the Derivative Works, in at least one ++ of the following places: within a NOTICE text file distributed ++ as part of the Derivative Works; within the Source form or ++ documentation, if provided along with the Derivative Works; or, ++ within a display generated by the Derivative Works, if and ++ wherever such third-party notices normally appear. The contents ++ of the NOTICE file are for informational purposes only and ++ do not modify the License. You may add Your own attribution ++ notices within Derivative Works that You distribute, alongside ++ or as an addendum to the NOTICE text from the Work, provided ++ that such additional attribution notices cannot be construed ++ as modifying the License. ++ ++ You may add Your own copyright statement to Your modifications and ++ may provide additional or different license terms and conditions ++ for use, reproduction, or distribution of Your modifications, or ++ for any such Derivative Works as a whole, provided Your use, ++ reproduction, and distribution of the Work otherwise complies with ++ the conditions stated in this License. ++ ++ 5. Submission of Contributions. Unless You explicitly state otherwise, ++ any Contribution intentionally submitted for inclusion in the Work ++ by You to the Licensor shall be under the terms and conditions of ++ this License, without any additional terms or conditions. ++ Notwithstanding the above, nothing herein shall supersede or modify ++ the terms of any separate license agreement you may have executed ++ with Licensor regarding such Contributions. ++ ++ 6. Trademarks. This License does not grant permission to use the trade ++ names, trademarks, service marks, or product names of the Licensor, ++ except as required for reasonable and customary use in describing the ++ origin of the Work and reproducing the content of the NOTICE file. ++ ++ 7. Disclaimer of Warranty. Unless required by applicable law or ++ agreed to in writing, Licensor provides the Work (and each ++ Contributor provides its Contributions) on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or ++ implied, including, without limitation, any warranties or conditions ++ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A ++ PARTICULAR PURPOSE. You are solely responsible for determining the ++ appropriateness of using or redistributing the Work and assume any ++ risks associated with Your exercise of permissions under this License. ++ ++ 8. Limitation of Liability. In no event and under no legal theory, ++ whether in tort (including negligence), contract, or otherwise, ++ unless required by applicable law (such as deliberate and grossly ++ negligent acts) or agreed to in writing, shall any Contributor be ++ liable to You for damages, including any direct, indirect, special, ++ incidental, or consequential damages of any character arising as a ++ result of this License or out of the use or inability to use the ++ Work (including but not limited to damages for loss of goodwill, ++ work stoppage, computer failure or malfunction, or any and all ++ other commercial damages or losses), even if such Contributor ++ has been advised of the possibility of such damages. ++ ++ 9. Accepting Warranty or Additional Liability. While redistributing ++ the Work or Derivative Works thereof, You may choose to offer, ++ and charge a fee for, acceptance of support, warranty, indemnity, ++ or other liability obligations and/or rights consistent with this ++ License. However, in accepting such obligations, You may act only ++ on Your own behalf and on Your sole responsibility, not on behalf ++ of any other Contributor, and only if You agree to indemnify, ++ defend, and hold each Contributor harmless for any liability ++ incurred by, or claims asserted against, such Contributor by reason ++ of your accepting any such warranty or additional liability. ++ ++ END OF TERMS AND CONDITIONS ++ ++ APPENDIX: How to apply the Apache License to your work. ++ ++ To apply the Apache License to your work, attach the following ++ boilerplate notice, with the fields enclosed by brackets "{}" ++ replaced with your own identifying information. (Don't include ++ the brackets!) The text should be enclosed in the appropriate ++ comment syntax for the file format. We also recommend that a ++ file or class name and description of purpose be included on the ++ same "printed page" as the copyright notice for easier ++ identification within third-party archives. ++ ++ Copyright {yyyy} {name of copyright owner} ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. +diff --git a/internal/third_party/selinux/MAINTAINERS b/internal/third_party/selinux/MAINTAINERS +new file mode 100644 +index 00000000..748c18b4 +--- /dev/null ++++ b/internal/third_party/selinux/MAINTAINERS +@@ -0,0 +1,5 @@ ++Antonio Murdaca (@runcom) ++Daniel J Walsh (@rhatdan) ++Mrunal Patel (@mrunalp) ++Sebastiaan van Stijn (@thaJeztah) ++Kirill Kolyshikin (@kolyshkin) +diff --git a/internal/third_party/selinux/Makefile b/internal/third_party/selinux/Makefile +new file mode 100644 +index 00000000..f7b9c3da +--- /dev/null ++++ b/internal/third_party/selinux/Makefile +@@ -0,0 +1,37 @@ ++GO ?= go ++ ++all: build build-cross ++ ++define go-build ++ GOOS=$(1) GOARCH=$(2) $(GO) build ${BUILDFLAGS} ./... ++endef ++ ++.PHONY: build ++build: ++ $(call go-build,linux,amd64) ++ ++.PHONY: build-cross ++build-cross: ++ $(call go-build,linux,386) ++ $(call go-build,linux,arm) ++ $(call go-build,linux,arm64) ++ $(call go-build,linux,ppc64le) ++ $(call go-build,linux,s390x) ++ $(call go-build,linux,mips64le) ++ $(call go-build,linux,riscv64) ++ $(call go-build,windows,amd64) ++ $(call go-build,windows,386) ++ ++ ++.PHONY: test ++test: ++ $(GO) test -timeout 3m ${TESTFLAGS} -v ./... ++ ++.PHONY: lint ++lint: ++ golangci-lint run ++ ++.PHONY: vendor ++vendor: ++ $(GO) mod tidy ++ $(GO) mod verify +diff --git a/internal/third_party/selinux/README.md b/internal/third_party/selinux/README.md +new file mode 100644 +index 00000000..cd6a60f8 +--- /dev/null ++++ b/internal/third_party/selinux/README.md +@@ -0,0 +1,23 @@ ++# selinux ++ ++[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux) ++ ++Common SELinux package used across the container ecosystem. ++ ++## Usage ++ ++Prior to v1.8.0, the `selinux` build tag had to be used to enable selinux functionality for compiling consumers of this project. ++Starting with v1.8.0, the `selinux` build tag is no longer needed. ++ ++For complete documentation, see [godoc](https://godoc.org/github.com/opencontainers/selinux). ++ ++## Code of Conduct ++ ++Participation in the OpenContainers community is governed by [OpenContainer's Code of Conduct][code-of-conduct]. ++ ++## Security ++ ++If you find an issue, please follow the [security][security] protocol to report it. ++ ++[security]: https://github.com/opencontainers/org/blob/master/SECURITY.md ++[code-of-conduct]: https://github.com/opencontainers/org/blob/master/CODE_OF_CONDUCT.md +diff --git a/internal/third_party/selinux/go-selinux/doc.go b/internal/third_party/selinux/go-selinux/doc.go +new file mode 100644 +index 00000000..57a15c9a +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/doc.go +@@ -0,0 +1,13 @@ ++/* ++Package selinux provides a high-level interface for interacting with selinux. ++ ++Usage: ++ ++ import "github.com/opencontainers/selinux/go-selinux" ++ ++ // Ensure that selinux is enforcing mode. ++ if selinux.EnforceMode() != selinux.Enforcing { ++ selinux.SetEnforceMode(selinux.Enforcing) ++ } ++*/ ++package selinux +diff --git a/internal/third_party/selinux/go-selinux/label/label.go b/internal/third_party/selinux/go-selinux/label/label.go +new file mode 100644 +index 00000000..884a8b80 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label.go +@@ -0,0 +1,48 @@ ++package label ++ ++import ( ++ "fmt" ++ ++ "github.com/opencontainers/selinux/go-selinux" ++) ++ ++// Init initialises the labeling system ++func Init() { ++ _ = selinux.GetEnabled() ++} ++ ++// FormatMountLabel returns a string to be used by the mount command. Using ++// the SELinux `context` mount option. Changing labels of files on mount ++// points with this option can never be changed. ++// FormatMountLabel returns a string to be used by the mount command. ++// The format of this string will be used to alter the labeling of the mountpoint. ++// The string returned is suitable to be used as the options field of the mount command. ++// If you need to have additional mount point options, you can pass them in as ++// the first parameter. Second parameter is the label that you wish to apply ++// to all content in the mount point. ++func FormatMountLabel(src, mountLabel string) string { ++ return FormatMountLabelByType(src, mountLabel, "context") ++} ++ ++// FormatMountLabelByType returns a string to be used by the mount command. ++// Allow caller to specify the mount options. For example using the SELinux ++// `fscontext` mount option would allow certain container processes to change ++// labels of files created on the mount points, where as `context` option does ++// not. ++// FormatMountLabelByType returns a string to be used by the mount command. ++// The format of this string will be used to alter the labeling of the mountpoint. ++// The string returned is suitable to be used as the options field of the mount command. ++// If you need to have additional mount point options, you can pass them in as ++// the first parameter. Second parameter is the label that you wish to apply ++// to all content in the mount point. ++func FormatMountLabelByType(src, mountLabel, contextType string) string { ++ if mountLabel != "" { ++ switch src { ++ case "": ++ src = fmt.Sprintf("%s=%q", contextType, mountLabel) ++ default: ++ src = fmt.Sprintf("%s,%s=%q", src, contextType, mountLabel) ++ } ++ } ++ return src ++} +diff --git a/internal/third_party/selinux/go-selinux/label/label_linux.go b/internal/third_party/selinux/go-selinux/label/label_linux.go +new file mode 100644 +index 00000000..95f29e21 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label_linux.go +@@ -0,0 +1,136 @@ ++package label ++ ++import ( ++ "errors" ++ "fmt" ++ "strings" ++ ++ "github.com/opencontainers/selinux/go-selinux" ++) ++ ++// Valid Label Options ++var validOptions = map[string]bool{ ++ "disable": true, ++ "type": true, ++ "filetype": true, ++ "user": true, ++ "role": true, ++ "level": true, ++} ++ ++var ErrIncompatibleLabel = errors.New("bad SELinux option: z and Z can not be used together") ++ ++// InitLabels returns the process label and file labels to be used within ++// the container. A list of options can be passed into this function to alter ++// the labels. The labels returned will include a random MCS String, that is ++// guaranteed to be unique. ++// If the disabled flag is passed in, the process label will not be set, but the mount label will be set ++// to the container_file label with the maximum category. This label is not usable by any confined label. ++func InitLabels(options []string) (plabel string, mlabel string, retErr error) { ++ if !selinux.GetEnabled() { ++ return "", "", nil ++ } ++ processLabel, mountLabel := selinux.ContainerLabels() ++ if processLabel != "" { ++ defer func() { ++ if retErr != nil { ++ selinux.ReleaseLabel(mountLabel) ++ } ++ }() ++ pcon, err := selinux.NewContext(processLabel) ++ if err != nil { ++ return "", "", err ++ } ++ mcsLevel := pcon["level"] ++ mcon, err := selinux.NewContext(mountLabel) ++ if err != nil { ++ return "", "", err ++ } ++ for _, opt := range options { ++ if opt == "disable" { ++ selinux.ReleaseLabel(mountLabel) ++ return "", selinux.PrivContainerMountLabel(), nil ++ } ++ if i := strings.Index(opt, ":"); i == -1 { ++ return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) ++ } ++ con := strings.SplitN(opt, ":", 2) ++ if !validOptions[con[0]] { ++ return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) ++ } ++ if con[0] == "filetype" { ++ mcon["type"] = con[1] ++ continue ++ } ++ pcon[con[0]] = con[1] ++ if con[0] == "level" || con[0] == "user" { ++ mcon[con[0]] = con[1] ++ } ++ } ++ if pcon.Get() != processLabel { ++ if pcon["level"] != mcsLevel { ++ selinux.ReleaseLabel(processLabel) ++ } ++ processLabel = pcon.Get() ++ selinux.ReserveLabel(processLabel) ++ } ++ mountLabel = mcon.Get() ++ } ++ return processLabel, mountLabel, nil ++} ++ ++// SetFileLabel modifies the "path" label to the specified file label ++func SetFileLabel(path string, fileLabel string) error { ++ if !selinux.GetEnabled() || fileLabel == "" { ++ return nil ++ } ++ return selinux.SetFileLabel(path, fileLabel) ++} ++ ++// SetFileCreateLabel tells the kernel the label for all files to be created ++func SetFileCreateLabel(fileLabel string) error { ++ if !selinux.GetEnabled() { ++ return nil ++ } ++ return selinux.SetFSCreateLabel(fileLabel) ++} ++ ++// Relabel changes the label of path and all the entries beneath the path. ++// It changes the MCS label to s0 if shared is true. ++// This will allow all containers to share the content. ++// ++// The path itself is guaranteed to be relabeled last. ++func Relabel(path string, fileLabel string, shared bool) error { ++ if !selinux.GetEnabled() || fileLabel == "" { ++ return nil ++ } ++ ++ if shared { ++ c, err := selinux.NewContext(fileLabel) ++ if err != nil { ++ return err ++ } ++ ++ c["level"] = "s0" ++ fileLabel = c.Get() ++ } ++ return selinux.Chcon(path, fileLabel, true) ++} ++ ++// Validate checks that the label does not include unexpected options ++func Validate(label string) error { ++ if strings.Contains(label, "z") && strings.Contains(label, "Z") { ++ return ErrIncompatibleLabel ++ } ++ return nil ++} ++ ++// RelabelNeeded checks whether the user requested a relabel ++func RelabelNeeded(label string) bool { ++ return strings.Contains(label, "z") || strings.Contains(label, "Z") ++} ++ ++// IsShared checks that the label includes a "shared" mark ++func IsShared(label string) bool { ++ return strings.Contains(label, "z") ++} +diff --git a/internal/third_party/selinux/go-selinux/label/label_linux_test.go b/internal/third_party/selinux/go-selinux/label/label_linux_test.go +new file mode 100644 +index 00000000..e25ead79 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label_linux_test.go +@@ -0,0 +1,130 @@ ++package label ++ ++import ( ++ "errors" ++ "os" ++ "testing" ++ ++ "github.com/opencontainers/selinux/go-selinux" ++) ++ ++func needSELinux(t *testing.T) { ++ t.Helper() ++ if !selinux.GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++} ++ ++func TestInit(t *testing.T) { ++ needSELinux(t) ++ ++ var testNull []string ++ _, _, err := InitLabels(testNull) ++ if err != nil { ++ t.Fatalf("InitLabels failed: %v:", err) ++ } ++ testDisabled := []string{"disable"} ++ if selinux.ROFileLabel() == "" { ++ t.Fatal("selinux.ROFileLabel: empty") ++ } ++ plabel, mlabel, err := InitLabels(testDisabled) ++ if err != nil { ++ t.Fatalf("InitLabels(disabled) failed: %v", err) ++ } ++ if plabel != "" { ++ t.Fatalf("InitLabels(disabled): %q not empty", plabel) ++ } ++ if mlabel != "system_u:object_r:container_file_t:s0:c1022,c1023" { ++ t.Fatalf("InitLabels Disabled mlabel Failed, %s", mlabel) ++ } ++ ++ testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} ++ plabel, mlabel, err = InitLabels(testUser) ++ if err != nil { ++ t.Fatalf("InitLabels(user) failed: %v", err) ++ } ++ if plabel != "user_u:user_r:user_t:s0:c1,c15" || (mlabel != "user_u:object_r:container_file_t:s0:c1,c15" && mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15") { ++ t.Fatalf("InitLabels(user) failed (plabel=%q, mlabel=%q)", plabel, mlabel) ++ } ++ ++ testBadData := []string{"user", "role:user_r", "type:user_t", "level:s0:c1,c15"} ++ if _, _, err = InitLabels(testBadData); err == nil { ++ t.Fatal("InitLabels(bad): expected error, got nil") ++ } ++} ++ ++func TestRelabel(t *testing.T) { ++ needSELinux(t) ++ ++ testdir := t.TempDir() ++ label := "system_u:object_r:container_file_t:s0:c1,c2" ++ if err := Relabel(testdir, "", true); err != nil { ++ t.Fatalf("Relabel with no label failed: %v", err) ++ } ++ if err := Relabel(testdir, label, true); err != nil { ++ t.Fatalf("Relabel shared failed: %v", err) ++ } ++ if err := Relabel(testdir, label, false); err != nil { ++ t.Fatalf("Relabel unshared failed: %v", err) ++ } ++ if err := Relabel("/etc", label, false); err == nil { ++ t.Fatalf("Relabel /etc succeeded") ++ } ++ if err := Relabel("/", label, false); err == nil { ++ t.Fatalf("Relabel / succeeded") ++ } ++ if err := Relabel("/usr", label, false); err == nil { ++ t.Fatalf("Relabel /usr succeeded") ++ } ++ if err := Relabel("/usr/", label, false); err == nil { ++ t.Fatalf("Relabel /usr/ succeeded") ++ } ++ if err := Relabel("/etc/passwd", label, false); err == nil { ++ t.Fatalf("Relabel /etc/passwd succeeded") ++ } ++ if home := os.Getenv("HOME"); home != "" { ++ if err := Relabel(home, label, false); err == nil { ++ t.Fatalf("Relabel %s succeeded", home) ++ } ++ } ++} ++ ++func TestValidate(t *testing.T) { ++ if err := Validate("zZ"); !errors.Is(err, ErrIncompatibleLabel) { ++ t.Fatalf("Expected incompatible error, got %v", err) ++ } ++ if err := Validate("Z"); err != nil { ++ t.Fatal(err) ++ } ++ if err := Validate("z"); err != nil { ++ t.Fatal(err) ++ } ++ if err := Validate(""); err != nil { ++ t.Fatal(err) ++ } ++} ++ ++func TestIsShared(t *testing.T) { ++ if shared := IsShared("Z"); shared { ++ t.Fatalf("Expected label `Z` to not be shared, got %v", shared) ++ } ++ if shared := IsShared("z"); !shared { ++ t.Fatalf("Expected label `z` to be shared, got %v", shared) ++ } ++ if shared := IsShared("Zz"); !shared { ++ t.Fatalf("Expected label `Zz` to be shared, got %v", shared) ++ } ++} ++ ++func TestFileLabel(t *testing.T) { ++ needSELinux(t) ++ ++ testUser := []string{"filetype:test_file_t", "level:s0:c1,c15"} ++ _, mlabel, err := InitLabels(testUser) ++ if err != nil { ++ t.Fatalf("InitLabels(user) failed: %v", err) ++ } ++ if mlabel != "system_u:object_r:test_file_t:s0:c1,c15" { ++ t.Fatalf("InitLabels(filetype) failed: %v", err) ++ } ++} +diff --git a/internal/third_party/selinux/go-selinux/label/label_stub.go b/internal/third_party/selinux/go-selinux/label/label_stub.go +new file mode 100644 +index 00000000..7a54afc5 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label_stub.go +@@ -0,0 +1,44 @@ ++//go:build !linux ++// +build !linux ++ ++package label ++ ++// InitLabels returns the process label and file labels to be used within ++// the container. A list of options can be passed into this function to alter ++// the labels. ++func InitLabels([]string) (string, string, error) { ++ return "", "", nil ++} ++ ++func SetFileLabel(string, string) error { ++ return nil ++} ++ ++func SetFileCreateLabel(string) error { ++ return nil ++} ++ ++func Relabel(string, string, bool) error { ++ return nil ++} ++ ++// DisableSecOpt returns a security opt that can disable labeling ++// support for future container processes ++func DisableSecOpt() []string { ++ return nil ++} ++ ++// Validate checks that the label does not include unexpected options ++func Validate(string) error { ++ return nil ++} ++ ++// RelabelNeeded checks whether the user requested a relabel ++func RelabelNeeded(string) bool { ++ return false ++} ++ ++// IsShared checks that the label includes a "shared" mark ++func IsShared(string) bool { ++ return false ++} +diff --git a/internal/third_party/selinux/go-selinux/label/label_stub_test.go b/internal/third_party/selinux/go-selinux/label/label_stub_test.go +new file mode 100644 +index 00000000..e92cc8b9 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label_stub_test.go +@@ -0,0 +1,76 @@ ++//go:build !linux ++// +build !linux ++ ++package label ++ ++import ( ++ "testing" ++ ++ "github.com/opencontainers/selinux/go-selinux" ++) ++ ++const testLabel = "system_u:object_r:container_file_t:s0:c1,c2" ++ ++func TestInit(t *testing.T) { ++ var testNull []string ++ _, _, err := InitLabels(testNull) ++ if err != nil { ++ t.Log("InitLabels Failed") ++ t.Fatal(err) ++ } ++ testDisabled := []string{"disable"} ++ if selinux.ROFileLabel() != "" { ++ t.Error("selinux.ROFileLabel Failed") ++ } ++ plabel, mlabel, err := InitLabels(testDisabled) ++ if err != nil { ++ t.Log("InitLabels Disabled Failed") ++ t.Fatal(err) ++ } ++ if plabel != "" { ++ t.Fatal("InitLabels Disabled Failed") ++ } ++ if mlabel != "" { ++ t.Fatal("InitLabels Disabled mlabel Failed") ++ } ++ testUser := []string{"user:user_u", "role:user_r", "type:user_t", "level:s0:c1,c15"} ++ _, _, err = InitLabels(testUser) ++ if err != nil { ++ t.Log("InitLabels User Failed") ++ t.Fatal(err) ++ } ++} ++ ++func TestRelabel(t *testing.T) { ++ if err := Relabel("/etc", testLabel, false); err != nil { ++ t.Fatalf("Relabel /etc succeeded") ++ } ++} ++ ++func TestCheckLabelCompile(t *testing.T) { ++ if _, _, err := InitLabels(nil); err != nil { ++ t.Fatal(err) ++ } ++ ++ tmpDir := t.TempDir() ++ ++ if err := SetFileLabel(tmpDir, "foobar"); err != nil { ++ t.Fatal(err) ++ } ++ ++ if err := SetFileCreateLabel("foobar"); err != nil { ++ t.Fatal(err) ++ } ++ ++ DisableSecOpt() ++ ++ if err := Validate("foobar"); err != nil { ++ t.Fatal(err) ++ } ++ if relabel := RelabelNeeded("foobar"); relabel { ++ t.Fatal("Relabel failed") ++ } ++ if shared := IsShared("foobar"); shared { ++ t.Fatal("isshared failed") ++ } ++} +diff --git a/internal/third_party/selinux/go-selinux/label/label_test.go b/internal/third_party/selinux/go-selinux/label/label_test.go +new file mode 100644 +index 00000000..fb172f3f +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/label/label_test.go +@@ -0,0 +1,35 @@ ++package label ++ ++import "testing" ++ ++func TestFormatMountLabel(t *testing.T) { ++ expected := `context="foobar"` ++ if test := FormatMountLabel("", "foobar"); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++ ++ expected = `src,context="foobar"` ++ if test := FormatMountLabel("src", "foobar"); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++ ++ expected = `src` ++ if test := FormatMountLabel("src", ""); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++ ++ expected = `fscontext="foobar"` ++ if test := FormatMountLabelByType("", "foobar", "fscontext"); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++ ++ expected = `src,fscontext="foobar"` ++ if test := FormatMountLabelByType("src", "foobar", "fscontext"); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++ ++ expected = `src` ++ if test := FormatMountLabelByType("src", "", "rootcontext"); test != expected { ++ t.Fatalf("Format failed. Expected %s, got %s", expected, test) ++ } ++} +diff --git a/internal/third_party/selinux/go-selinux/selinux.go b/internal/third_party/selinux/go-selinux/selinux.go +new file mode 100644 +index 00000000..15150d47 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/selinux.go +@@ -0,0 +1,322 @@ ++package selinux ++ ++import ( ++ "errors" ++) ++ ++const ( ++ // Enforcing constant indicate SELinux is in enforcing mode ++ Enforcing = 1 ++ // Permissive constant to indicate SELinux is in permissive mode ++ Permissive = 0 ++ // Disabled constant to indicate SELinux is disabled ++ Disabled = -1 ++ // maxCategory is the maximum number of categories used within containers ++ maxCategory = 1024 ++ // DefaultCategoryRange is the upper bound on the category range ++ DefaultCategoryRange = uint32(maxCategory) ++) ++ ++var ( ++ // ErrMCSAlreadyExists is returned when trying to allocate a duplicate MCS. ++ ErrMCSAlreadyExists = errors.New("MCS label already exists") ++ // ErrEmptyPath is returned when an empty path has been specified. ++ ErrEmptyPath = errors.New("empty path") ++ ++ // ErrInvalidLabel is returned when an invalid label is specified. ++ ErrInvalidLabel = errors.New("invalid Label") ++ ++ // InvalidLabel is returned when an invalid label is specified. ++ // ++ // Deprecated: use [ErrInvalidLabel]. ++ InvalidLabel = ErrInvalidLabel ++ ++ // ErrIncomparable is returned two levels are not comparable ++ ErrIncomparable = errors.New("incomparable levels") ++ // ErrLevelSyntax is returned when a sensitivity or category do not have correct syntax in a level ++ ErrLevelSyntax = errors.New("invalid level syntax") ++ ++ // ErrContextMissing is returned if a requested context is not found in a file. ++ ErrContextMissing = errors.New("context does not have a match") ++ // ErrVerifierNil is returned when a context verifier function is nil. ++ ErrVerifierNil = errors.New("verifier function is nil") ++ ++ // ErrNotTGLeader is returned by [SetKeyLabel] if the calling thread ++ // is not the thread group leader. ++ ErrNotTGLeader = errors.New("calling thread is not the thread group leader") ++ ++ // CategoryRange allows the upper bound on the category range to be adjusted ++ CategoryRange = DefaultCategoryRange ++ ++ privContainerMountLabel string ++) ++ ++// Context is a representation of the SELinux label broken into 4 parts ++type Context map[string]string ++ ++// SetDisabled disables SELinux support for the package ++func SetDisabled() { ++ setDisabled() ++} ++ ++// GetEnabled returns whether SELinux is currently enabled. ++func GetEnabled() bool { ++ return getEnabled() ++} ++ ++// ClassIndex returns the int index for an object class in the loaded policy, ++// or -1 and an error ++func ClassIndex(class string) (int, error) { ++ return classIndex(class) ++} ++ ++// SetFileLabel sets the SELinux label for this path, following symlinks, ++// or returns an error. ++func SetFileLabel(fpath string, label string) error { ++ return setFileLabel(fpath, label) ++} ++ ++// LsetFileLabel sets the SELinux label for this path, not following symlinks, ++// or returns an error. ++func LsetFileLabel(fpath string, label string) error { ++ return lSetFileLabel(fpath, label) ++} ++ ++// FileLabel returns the SELinux label for this path, following symlinks, ++// or returns an error. ++func FileLabel(fpath string) (string, error) { ++ return fileLabel(fpath) ++} ++ ++// LfileLabel returns the SELinux label for this path, not following symlinks, ++// or returns an error. ++func LfileLabel(fpath string) (string, error) { ++ return lFileLabel(fpath) ++} ++ ++// SetFSCreateLabel tells the kernel what label to use for all file system objects ++// created by this task. ++// Set the label to an empty string to return to the default label. Calls to SetFSCreateLabel ++// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until file system ++// objects created by this task are finished to guarantee another goroutine does not migrate ++// to the current thread before execution is complete. ++func SetFSCreateLabel(label string) error { ++ return setFSCreateLabel(label) ++} ++ ++// FSCreateLabel returns the default label the kernel which the kernel is using ++// for file system objects created by this task. "" indicates default. ++func FSCreateLabel() (string, error) { ++ return fsCreateLabel() ++} ++ ++// CurrentLabel returns the SELinux label of the current process thread, or an error. ++func CurrentLabel() (string, error) { ++ return currentLabel() ++} ++ ++// PidLabel returns the SELinux label of the given pid, or an error. ++func PidLabel(pid int) (string, error) { ++ return pidLabel(pid) ++} ++ ++// ExecLabel returns the SELinux label that the kernel will use for any programs ++// that are executed by the current process thread, or an error. ++func ExecLabel() (string, error) { ++ return execLabel() ++} ++ ++// CanonicalizeContext takes a context string and writes it to the kernel ++// the function then returns the context that the kernel will use. Use this ++// function to check if two contexts are equivalent ++func CanonicalizeContext(val string) (string, error) { ++ return canonicalizeContext(val) ++} ++ ++// ComputeCreateContext requests the type transition from source to target for ++// class from the kernel. ++func ComputeCreateContext(source string, target string, class string) (string, error) { ++ return computeCreateContext(source, target, class) ++} ++ ++// CalculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) ++// of a source and target range. ++// The glblub is calculated as the greater of the low sensitivities and ++// the lower of the high sensitivities and the and of each category bitset. ++func CalculateGlbLub(sourceRange, targetRange string) (string, error) { ++ return calculateGlbLub(sourceRange, targetRange) ++} ++ ++// SetExecLabel sets the SELinux label that the kernel will use for any programs ++// that are executed by the current process thread, or an error. Calls to SetExecLabel ++// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until execution ++// of the program is finished to guarantee another goroutine does not migrate to the current ++// thread before execution is complete. ++func SetExecLabel(label string) error { ++ return writeConThreadSelf("attr/exec", label) ++} ++ ++// SetTaskLabel sets the SELinux label for the current thread, or an error. ++// This requires the dyntransition permission. Calls to SetTaskLabel should ++// be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee ++// the current thread does not run in a new mislabeled thread. ++func SetTaskLabel(label string) error { ++ return writeConThreadSelf("attr/current", label) ++} ++ ++// SetSocketLabel takes a process label and tells the kernel to assign the ++// label to the next socket that gets created. Calls to SetSocketLabel ++// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until ++// the socket is created to guarantee another goroutine does not migrate ++// to the current thread before execution is complete. ++func SetSocketLabel(label string) error { ++ return writeConThreadSelf("attr/sockcreate", label) ++} ++ ++// SocketLabel retrieves the current socket label setting ++func SocketLabel() (string, error) { ++ return readConThreadSelf("attr/sockcreate") ++} ++ ++// PeerLabel retrieves the label of the client on the other side of a socket ++func PeerLabel(fd uintptr) (string, error) { ++ return peerLabel(fd) ++} ++ ++// SetKeyLabel takes a process label and tells the kernel to assign the ++// label to the next kernel keyring that gets created. ++// ++// Calls to SetKeyLabel should be wrapped in ++// runtime.LockOSThread()/runtime.UnlockOSThread() until the kernel keyring is ++// created to guarantee another goroutine does not migrate to the current ++// thread before execution is complete. ++// ++// Only the thread group leader can set key label. ++func SetKeyLabel(label string) error { ++ return setKeyLabel(label) ++} ++ ++// KeyLabel retrieves the current kernel keyring label setting ++func KeyLabel() (string, error) { ++ return keyLabel() ++} ++ ++// Get returns the Context as a string ++func (c Context) Get() string { ++ return c.get() ++} ++ ++// NewContext creates a new Context struct from the specified label ++func NewContext(label string) (Context, error) { ++ return newContext(label) ++} ++ ++// ClearLabels clears all reserved labels ++func ClearLabels() { ++ clearLabels() ++} ++ ++// ReserveLabel reserves the MLS/MCS level component of the specified label ++func ReserveLabel(label string) { ++ reserveLabel(label) ++} ++ ++// MLSEnabled checks if MLS is enabled. ++func MLSEnabled() bool { ++ return isMLSEnabled() ++} ++ ++// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled ++func EnforceMode() int { ++ return enforceMode() ++} ++ ++// SetEnforceMode sets the current SELinux mode Enforcing, Permissive. ++// Disabled is not valid, since this needs to be set at boot time. ++func SetEnforceMode(mode int) error { ++ return setEnforceMode(mode) ++} ++ ++// DefaultEnforceMode returns the systems default SELinux mode Enforcing, ++// Permissive or Disabled. Note this is just the default at boot time. ++// EnforceMode tells you the systems current mode. ++func DefaultEnforceMode() int { ++ return defaultEnforceMode() ++} ++ ++// ReleaseLabel un-reserves the MLS/MCS Level field of the specified label, ++// allowing it to be used by another process. ++func ReleaseLabel(label string) { ++ releaseLabel(label) ++} ++ ++// ROFileLabel returns the specified SELinux readonly file label ++func ROFileLabel() string { ++ return roFileLabel() ++} ++ ++// KVMContainerLabels returns the default processLabel and mountLabel to be used ++// for kvm containers by the calling process. ++func KVMContainerLabels() (string, string) { ++ return kvmContainerLabels() ++} ++ ++// InitContainerLabels returns the default processLabel and file labels to be ++// used for containers running an init system like systemd by the calling process. ++func InitContainerLabels() (string, string) { ++ return initContainerLabels() ++} ++ ++// ContainerLabels returns an allocated processLabel and fileLabel to be used for ++// container labeling by the calling process. ++func ContainerLabels() (processLabel string, fileLabel string) { ++ return containerLabels() ++} ++ ++// SecurityCheckContext validates that the SELinux label is understood by the kernel ++func SecurityCheckContext(val string) error { ++ return securityCheckContext(val) ++} ++ ++// CopyLevel returns a label with the MLS/MCS level from src label replaced on ++// the dest label. ++func CopyLevel(src, dest string) (string, error) { ++ return copyLevel(src, dest) ++} ++ ++// Chcon changes the fpath file object to the SELinux label. ++// If fpath is a directory and recurse is true, then Chcon walks the ++// directory tree setting the label. ++// ++// The fpath itself is guaranteed to be relabeled last. ++func Chcon(fpath string, label string, recurse bool) error { ++ return chcon(fpath, label, recurse) ++} ++ ++// DupSecOpt takes an SELinux process label and returns security options that ++// can be used to set the SELinux Type and Level for future container processes. ++func DupSecOpt(src string) ([]string, error) { ++ return dupSecOpt(src) ++} ++ ++// DisableSecOpt returns a security opt that can be used to disable SELinux ++// labeling support for future container processes. ++func DisableSecOpt() []string { ++ return []string{"disable"} ++} ++ ++// GetDefaultContextWithLevel gets a single context for the specified SELinux user ++// identity that is reachable from the specified scon context. The context is based ++// on the per-user /etc/selinux/{SELINUXTYPE}/contexts/users/ if it exists, ++// and falls back to the global /etc/selinux/{SELINUXTYPE}/contexts/default_contexts ++// file. ++func GetDefaultContextWithLevel(user, level, scon string) (string, error) { ++ return getDefaultContextWithLevel(user, level, scon) ++} ++ ++// PrivContainerMountLabel returns mount label for privileged containers ++func PrivContainerMountLabel() string { ++ // Make sure label is initialized. ++ _ = label("") ++ return privContainerMountLabel ++} +diff --git a/internal/third_party/selinux/go-selinux/selinux_linux.go b/internal/third_party/selinux/go-selinux/selinux_linux.go +new file mode 100644 +index 00000000..70392d98 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/selinux_linux.go +@@ -0,0 +1,1405 @@ ++package selinux ++ ++import ( ++ "bufio" ++ "bytes" ++ "crypto/rand" ++ "encoding/binary" ++ "errors" ++ "fmt" ++ "io" ++ "io/fs" ++ "math/big" ++ "os" ++ "os/user" ++ "path/filepath" ++ "strconv" ++ "strings" ++ "sync" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" ++ "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/selinux/pkg/pwalkdir" ++) ++ ++const ( ++ minSensLen = 2 ++ contextFile = "/usr/share/containers/selinux/contexts" ++ selinuxDir = "/etc/selinux/" ++ selinuxUsersDir = "contexts/users" ++ defaultContexts = "contexts/default_contexts" ++ selinuxConfig = selinuxDir + "config" ++ selinuxfsMount = "/sys/fs/selinux" ++ selinuxTypeTag = "SELINUXTYPE" ++ selinuxTag = "SELINUX" ++ xattrNameSelinux = "security.selinux" ++) ++ ++type selinuxState struct { ++ mcsList map[string]bool ++ selinuxfs string ++ selinuxfsOnce sync.Once ++ enabledSet bool ++ enabled bool ++ sync.Mutex ++} ++ ++type level struct { ++ cats *big.Int ++ sens int ++} ++ ++type mlsRange struct { ++ low *level ++ high *level ++} ++ ++type defaultSECtx struct { ++ userRdr io.Reader ++ verifier func(string) error ++ defaultRdr io.Reader ++ user, level, scon string ++} ++ ++type levelItem byte ++ ++const ( ++ sensitivity levelItem = 's' ++ category levelItem = 'c' ++) ++ ++var ( ++ readOnlyFileLabel string ++ state = selinuxState{ ++ mcsList: make(map[string]bool), ++ } ++ ++ // for policyRoot() ++ policyRootOnce sync.Once ++ policyRootVal string ++ ++ // for label() ++ loadLabelsOnce sync.Once ++ labels map[string]string ++) ++ ++func policyRoot() string { ++ policyRootOnce.Do(func() { ++ policyRootVal = filepath.Join(selinuxDir, readConfig(selinuxTypeTag)) ++ }) ++ ++ return policyRootVal ++} ++ ++func (s *selinuxState) setEnable(enabled bool) bool { ++ s.Lock() ++ defer s.Unlock() ++ s.enabledSet = true ++ s.enabled = enabled ++ return s.enabled ++} ++ ++func (s *selinuxState) getEnabled() bool { ++ s.Lock() ++ enabled := s.enabled ++ enabledSet := s.enabledSet ++ s.Unlock() ++ if enabledSet { ++ return enabled ++ } ++ ++ enabled = false ++ if fs := getSelinuxMountPoint(); fs != "" { ++ if con, _ := CurrentLabel(); con != "kernel" { ++ enabled = true ++ } ++ } ++ return s.setEnable(enabled) ++} ++ ++// setDisabled disables SELinux support for the package ++func setDisabled() { ++ state.setEnable(false) ++} ++ ++func verifySELinuxfsMount(mnt string) bool { ++ var buf unix.Statfs_t ++ for { ++ err := unix.Statfs(mnt, &buf) ++ if err == nil { ++ break ++ } ++ if err == unix.EAGAIN || err == unix.EINTR { ++ continue ++ } ++ return false ++ } ++ ++ //#nosec G115 -- there is no overflow here. ++ if uint32(buf.Type) != uint32(unix.SELINUX_MAGIC) { ++ return false ++ } ++ if (buf.Flags & unix.ST_RDONLY) != 0 { ++ return false ++ } ++ ++ return true ++} ++ ++func findSELinuxfs() string { ++ // fast path: check the default mount first ++ if verifySELinuxfsMount(selinuxfsMount) { ++ return selinuxfsMount ++ } ++ ++ // check if selinuxfs is available before going the slow path ++ fs, err := os.ReadFile("/proc/filesystems") ++ if err != nil { ++ return "" ++ } ++ if !bytes.Contains(fs, []byte("\tselinuxfs\n")) { ++ return "" ++ } ++ ++ // slow path: try to find among the mounts ++ f, err := os.Open("/proc/self/mountinfo") ++ if err != nil { ++ return "" ++ } ++ defer f.Close() ++ ++ scanner := bufio.NewScanner(f) ++ for { ++ mnt := findSELinuxfsMount(scanner) ++ if mnt == "" { // error or not found ++ return "" ++ } ++ if verifySELinuxfsMount(mnt) { ++ return mnt ++ } ++ } ++} ++ ++// findSELinuxfsMount returns a next selinuxfs mount point found, ++// if there is one, or an empty string in case of EOF or error. ++func findSELinuxfsMount(s *bufio.Scanner) string { ++ for s.Scan() { ++ txt := s.Bytes() ++ // The first field after - is fs type. ++ // Safe as spaces in mountpoints are encoded as \040 ++ if !bytes.Contains(txt, []byte(" - selinuxfs ")) { ++ continue ++ } ++ const mPos = 5 // mount point is 5th field ++ fields := bytes.SplitN(txt, []byte(" "), mPos+1) ++ if len(fields) < mPos+1 { ++ continue ++ } ++ return string(fields[mPos-1]) ++ } ++ ++ return "" ++} ++ ++func (s *selinuxState) getSELinuxfs() string { ++ s.selinuxfsOnce.Do(func() { ++ s.selinuxfs = findSELinuxfs() ++ }) ++ ++ return s.selinuxfs ++} ++ ++// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs ++// filesystem or an empty string if no mountpoint is found. Selinuxfs is ++// a proc-like pseudo-filesystem that exposes the SELinux policy API to ++// processes. The existence of an selinuxfs mount is used to determine ++// whether SELinux is currently enabled or not. ++func getSelinuxMountPoint() string { ++ return state.getSELinuxfs() ++} ++ ++// getEnabled returns whether SELinux is currently enabled. ++func getEnabled() bool { ++ return state.getEnabled() ++} ++ ++func readConfig(target string) string { ++ in, err := os.Open(selinuxConfig) ++ if err != nil { ++ return "" ++ } ++ defer in.Close() ++ ++ scanner := bufio.NewScanner(in) ++ ++ for scanner.Scan() { ++ line := bytes.TrimSpace(scanner.Bytes()) ++ if len(line) == 0 { ++ // Skip blank lines ++ continue ++ } ++ if line[0] == ';' || line[0] == '#' { ++ // Skip comments ++ continue ++ } ++ fields := bytes.SplitN(line, []byte{'='}, 2) ++ if len(fields) != 2 { ++ continue ++ } ++ if bytes.Equal(fields[0], []byte(target)) { ++ return string(bytes.Trim(fields[1], `"`)) ++ } ++ } ++ return "" ++} ++ ++func readConFd(in *os.File) (string, error) { ++ data, err := io.ReadAll(in) ++ if err != nil { ++ return "", err ++ } ++ return string(bytes.TrimSuffix(data, []byte{0})), nil ++} ++ ++func writeConFd(out *os.File, val string) error { ++ var err error ++ if val != "" { ++ _, err = out.Write([]byte(val)) ++ } else { ++ _, err = out.Write(nil) ++ } ++ return err ++} ++ ++// openProcThreadSelf is a small wrapper around [OpenThreadSelf] and ++// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The ++// provided mode must be os.O_* flags to indicate what mode the returned file ++// should be opened with (flags like os.O_CREAT and os.O_EXCL are not ++// supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/thread-self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenThreadSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenThreadSelf ++func openProcThreadSelf(subpath string, mode int) (*os.File, procfs.ProcThreadSelfCloser, error) { ++ if subpath == "" { ++ return nil, nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, nil, err ++ } ++ defer proc.Close() ++ ++ handle, closer, err := proc.OpenThreadSelf(subpath) ++ if err != nil { ++ return nil, nil, fmt.Errorf("open /proc/thread-self/%s handle: %w", subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle ++ ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ closer() ++ return nil, nil, fmt.Errorf("reopen /proc/thread-self/%s handle (%#x): %w", subpath, mode, err) ++ } ++ return file, closer, nil ++} ++ ++// Read the contents of /proc/thread-self/. ++func readConThreadSelf(fpath string) (string, error) { ++ in, closer, err := openProcThreadSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return "", err ++ } ++ defer closer() ++ defer in.Close() ++ ++ return readConFd(in) ++} ++ ++// Write to /proc/thread-self/. ++func writeConThreadSelf(fpath, val string) error { ++ if val == "" { ++ if !getEnabled() { ++ return nil ++ } ++ } ++ ++ out, closer, err := openProcThreadSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return err ++ } ++ defer closer() ++ defer out.Close() ++ ++ return writeConFd(out, val) ++} ++ ++// openProcSelf is a small wrapper around [OpenSelf] and [pathrs.Reopen] to ++// make "one-shot opens" slightly more ergonomic. The provided mode must be ++// os.O_* flags to indicate what mode the returned file should be opened with ++// (flags like os.O_CREAT and os.O_EXCL are not supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenSelf ++func openProcSelf(subpath string, mode int) (*os.File, error) { ++ if subpath == "" { ++ return nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ ++ handle, err := proc.OpenSelf(subpath) ++ if err != nil { ++ return nil, fmt.Errorf("open /proc/self/%s handle: %w", subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle ++ ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ return nil, fmt.Errorf("reopen /proc/self/%s handle (%#x): %w", subpath, mode, err) ++ } ++ return file, nil ++} ++ ++// Read the contents of /proc/self/. ++func readConSelf(fpath string) (string, error) { ++ in, err := openProcSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return "", err ++ } ++ defer in.Close() ++ ++ return readConFd(in) ++} ++ ++// Write to /proc/self/. ++func writeConSelf(fpath, val string) error { ++ if val == "" { ++ if !getEnabled() { ++ return nil ++ } ++ } ++ ++ out, err := openProcSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return err ++ } ++ defer out.Close() ++ ++ return writeConFd(out, val) ++} ++ ++// openProcPid is a small wrapper around [OpenPid] and [pathrs.Reopen] to make ++// "one-shot opens" slightly more ergonomic. The provided mode must be os.O_* ++// flags to indicate what mode the returned file should be opened with (flags ++// like os.O_CREAT and os.O_EXCL are not supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenPid]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenPid ++func openProcPid(pid int, subpath string, mode int) (*os.File, error) { ++ if subpath == "" { ++ return nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ ++ handle, err := proc.OpenPid(pid, subpath) ++ if err != nil { ++ return nil, fmt.Errorf("open /proc/%d/%s handle: %w", pid, subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle ++ ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ return nil, fmt.Errorf("reopen /proc/%d/%s handle (%#x): %w", pid, subpath, mode, err) ++ } ++ return file, nil ++} ++ ++// classIndex returns the int index for an object class in the loaded policy, ++// or -1 and an error ++func classIndex(class string) (int, error) { ++ permpath := fmt.Sprintf("class/%s/index", class) ++ indexpath := filepath.Join(getSelinuxMountPoint(), permpath) ++ ++ indexB, err := os.ReadFile(indexpath) ++ if err != nil { ++ return -1, err ++ } ++ index, err := strconv.Atoi(string(indexB)) ++ if err != nil { ++ return -1, err ++ } ++ ++ return index, nil ++} ++ ++// lSetFileLabel sets the SELinux label for this path, not following symlinks, ++// or returns an error. ++func lSetFileLabel(fpath string, label string) error { ++ if fpath == "" { ++ return ErrEmptyPath ++ } ++ for { ++ err := unix.Lsetxattr(fpath, xattrNameSelinux, []byte(label), 0) ++ if err == nil { ++ break ++ } ++ if err != unix.EINTR { ++ return &os.PathError{Op: fmt.Sprintf("lsetxattr(label=%s)", label), Path: fpath, Err: err} ++ } ++ } ++ ++ return nil ++} ++ ++// setFileLabel sets the SELinux label for this path, following symlinks, ++// or returns an error. ++func setFileLabel(fpath string, label string) error { ++ if fpath == "" { ++ return ErrEmptyPath ++ } ++ for { ++ err := unix.Setxattr(fpath, xattrNameSelinux, []byte(label), 0) ++ if err == nil { ++ break ++ } ++ if err != unix.EINTR { ++ return &os.PathError{Op: fmt.Sprintf("setxattr(label=%s)", label), Path: fpath, Err: err} ++ } ++ } ++ ++ return nil ++} ++ ++// fileLabel returns the SELinux label for this path, following symlinks, ++// or returns an error. ++func fileLabel(fpath string) (string, error) { ++ if fpath == "" { ++ return "", ErrEmptyPath ++ } ++ ++ label, err := getxattr(fpath, xattrNameSelinux) ++ if err != nil { ++ return "", &os.PathError{Op: "getxattr", Path: fpath, Err: err} ++ } ++ // Trim the NUL byte at the end of the byte buffer, if present. ++ if len(label) > 0 && label[len(label)-1] == '\x00' { ++ label = label[:len(label)-1] ++ } ++ return string(label), nil ++} ++ ++// lFileLabel returns the SELinux label for this path, not following symlinks, ++// or returns an error. ++func lFileLabel(fpath string) (string, error) { ++ if fpath == "" { ++ return "", ErrEmptyPath ++ } ++ ++ label, err := lgetxattr(fpath, xattrNameSelinux) ++ if err != nil { ++ return "", &os.PathError{Op: "lgetxattr", Path: fpath, Err: err} ++ } ++ // Trim the NUL byte at the end of the byte buffer, if present. ++ if len(label) > 0 && label[len(label)-1] == '\x00' { ++ label = label[:len(label)-1] ++ } ++ return string(label), nil ++} ++ ++func setFSCreateLabel(label string) error { ++ return writeConThreadSelf("attr/fscreate", label) ++} ++ ++// fsCreateLabel returns the default label the kernel which the kernel is using ++// for file system objects created by this task. "" indicates default. ++func fsCreateLabel() (string, error) { ++ return readConThreadSelf("attr/fscreate") ++} ++ ++// currentLabel returns the SELinux label of the current process thread, or an error. ++func currentLabel() (string, error) { ++ return readConThreadSelf("attr/current") ++} ++ ++// pidLabel returns the SELinux label of the given pid, or an error. ++func pidLabel(pid int) (string, error) { ++ it, err := openProcPid(pid, "attr/current", os.O_RDONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return "", nil ++ } ++ defer it.Close() ++ return readConFd(it) ++} ++ ++// ExecLabel returns the SELinux label that the kernel will use for any programs ++// that are executed by the current process thread, or an error. ++func execLabel() (string, error) { ++ return readConThreadSelf("exec") ++} ++ ++// canonicalizeContext takes a context string and writes it to the kernel ++// the function then returns the context that the kernel will use. Use this ++// function to check if two contexts are equivalent ++func canonicalizeContext(val string) (string, error) { ++ return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val) ++} ++ ++// computeCreateContext requests the type transition from source to target for ++// class from the kernel. ++func computeCreateContext(source string, target string, class string) (string, error) { ++ classidx, err := classIndex(class) ++ if err != nil { ++ return "", err ++ } ++ ++ return readWriteCon(filepath.Join(getSelinuxMountPoint(), "create"), fmt.Sprintf("%s %s %d", source, target, classidx)) ++} ++ ++// catsToBitset stores categories in a bitset. ++func catsToBitset(cats string) (*big.Int, error) { ++ bitset := new(big.Int) ++ ++ catlist := strings.Split(cats, ",") ++ for _, r := range catlist { ++ ranges := strings.SplitN(r, ".", 2) ++ if len(ranges) > 1 { ++ catstart, err := parseLevelItem(ranges[0], category) ++ if err != nil { ++ return nil, err ++ } ++ catend, err := parseLevelItem(ranges[1], category) ++ if err != nil { ++ return nil, err ++ } ++ for i := catstart; i <= catend; i++ { ++ bitset.SetBit(bitset, i, 1) ++ } ++ } else { ++ cat, err := parseLevelItem(ranges[0], category) ++ if err != nil { ++ return nil, err ++ } ++ bitset.SetBit(bitset, cat, 1) ++ } ++ } ++ ++ return bitset, nil ++} ++ ++// parseLevelItem parses and verifies that a sensitivity or category are valid ++func parseLevelItem(s string, sep levelItem) (int, error) { ++ if len(s) < minSensLen || levelItem(s[0]) != sep { ++ return 0, ErrLevelSyntax ++ } ++ const bitSize = 31 // Make sure the result fits into signed int32. ++ val, err := strconv.ParseUint(s[1:], 10, bitSize) ++ if err != nil { ++ return 0, err ++ } ++ ++ return int(val), nil ++} ++ ++// parseLevel fills a level from a string that contains ++// a sensitivity and categories ++func (l *level) parseLevel(levelStr string) error { ++ lvl := strings.SplitN(levelStr, ":", 2) ++ sens, err := parseLevelItem(lvl[0], sensitivity) ++ if err != nil { ++ return fmt.Errorf("failed to parse sensitivity: %w", err) ++ } ++ l.sens = sens ++ if len(lvl) > 1 { ++ cats, err := catsToBitset(lvl[1]) ++ if err != nil { ++ return fmt.Errorf("failed to parse categories: %w", err) ++ } ++ l.cats = cats ++ } ++ ++ return nil ++} ++ ++// rangeStrToMLSRange marshals a string representation of a range. ++func rangeStrToMLSRange(rangeStr string) (*mlsRange, error) { ++ r := &mlsRange{} ++ l := strings.SplitN(rangeStr, "-", 2) ++ ++ switch len(l) { ++ // rangeStr that has a low and a high level, e.g. s4:c0.c1023-s6:c0.c1023 ++ case 2: ++ r.high = &level{} ++ if err := r.high.parseLevel(l[1]); err != nil { ++ return nil, fmt.Errorf("failed to parse high level %q: %w", l[1], err) ++ } ++ fallthrough ++ // rangeStr that is single level, e.g. s6:c0,c3,c5,c30.c1023 ++ case 1: ++ r.low = &level{} ++ if err := r.low.parseLevel(l[0]); err != nil { ++ return nil, fmt.Errorf("failed to parse low level %q: %w", l[0], err) ++ } ++ } ++ ++ if r.high == nil { ++ r.high = r.low ++ } ++ ++ return r, nil ++} ++ ++// bitsetToStr takes a category bitset and returns it in the ++// canonical selinux syntax ++func bitsetToStr(c *big.Int) string { ++ var str string ++ ++ length := 0 ++ i0 := int(c.TrailingZeroBits()) //#nosec G115 -- don't expect TralingZeroBits to return values with highest bit set. ++ for i := i0; i < c.BitLen(); i++ { ++ if c.Bit(i) == 0 { ++ continue ++ } ++ if length == 0 { ++ if str != "" { ++ str += "," ++ } ++ str += "c" + strconv.Itoa(i) ++ } ++ if c.Bit(i+1) == 1 { ++ length++ ++ continue ++ } ++ if length == 1 { ++ str += ",c" + strconv.Itoa(i) ++ } else if length > 1 { ++ str += ".c" + strconv.Itoa(i) ++ } ++ length = 0 ++ } ++ ++ return str ++} ++ ++func (l *level) equal(l2 *level) bool { ++ if l2 == nil || l == nil { ++ return l == l2 ++ } ++ if l2.sens != l.sens { ++ return false ++ } ++ if l2.cats == nil || l.cats == nil { ++ return l2.cats == l.cats ++ } ++ return l.cats.Cmp(l2.cats) == 0 ++} ++ ++// String returns an mlsRange as a string. ++func (m mlsRange) String() string { ++ low := "s" + strconv.Itoa(m.low.sens) ++ if m.low.cats != nil && m.low.cats.BitLen() > 0 { ++ low += ":" + bitsetToStr(m.low.cats) ++ } ++ ++ if m.low.equal(m.high) { ++ return low ++ } ++ ++ high := "s" + strconv.Itoa(m.high.sens) ++ if m.high.cats != nil && m.high.cats.BitLen() > 0 { ++ high += ":" + bitsetToStr(m.high.cats) ++ } ++ ++ return low + "-" + high ++} ++ ++// TODO: remove these in favor of built-in min/max ++// once we stop supporting Go < 1.21. ++func maxInt(a, b int) int { ++ if a > b { ++ return a ++ } ++ return b ++} ++ ++func minInt(a, b int) int { ++ if a < b { ++ return a ++ } ++ return b ++} ++ ++// calculateGlbLub computes the glb (greatest lower bound) and lub (least upper bound) ++// of a source and target range. ++// The glblub is calculated as the greater of the low sensitivities and ++// the lower of the high sensitivities and the and of each category bitset. ++func calculateGlbLub(sourceRange, targetRange string) (string, error) { ++ s, err := rangeStrToMLSRange(sourceRange) ++ if err != nil { ++ return "", err ++ } ++ t, err := rangeStrToMLSRange(targetRange) ++ if err != nil { ++ return "", err ++ } ++ ++ if s.high.sens < t.low.sens || t.high.sens < s.low.sens { ++ /* these ranges have no common sensitivities */ ++ return "", ErrIncomparable ++ } ++ ++ outrange := &mlsRange{low: &level{}, high: &level{}} ++ ++ /* take the greatest of the low */ ++ outrange.low.sens = maxInt(s.low.sens, t.low.sens) ++ ++ /* take the least of the high */ ++ outrange.high.sens = minInt(s.high.sens, t.high.sens) ++ ++ /* find the intersecting categories */ ++ if s.low.cats != nil && t.low.cats != nil { ++ outrange.low.cats = new(big.Int) ++ outrange.low.cats.And(s.low.cats, t.low.cats) ++ } ++ if s.high.cats != nil && t.high.cats != nil { ++ outrange.high.cats = new(big.Int) ++ outrange.high.cats.And(s.high.cats, t.high.cats) ++ } ++ ++ return outrange.String(), nil ++} ++ ++func readWriteCon(fpath string, val string) (string, error) { ++ if fpath == "" { ++ return "", ErrEmptyPath ++ } ++ f, err := os.OpenFile(fpath, os.O_RDWR, 0) ++ if err != nil { ++ return "", err ++ } ++ defer f.Close() ++ ++ _, err = f.Write([]byte(val)) ++ if err != nil { ++ return "", err ++ } ++ ++ return readConFd(f) ++} ++ ++// peerLabel retrieves the label of the client on the other side of a socket ++func peerLabel(fd uintptr) (string, error) { ++ l, err := unix.GetsockoptString(int(fd), unix.SOL_SOCKET, unix.SO_PEERSEC) ++ if err != nil { ++ return "", &os.PathError{Op: "getsockopt", Path: "fd " + strconv.Itoa(int(fd)), Err: err} ++ } ++ return l, nil ++} ++ ++// setKeyLabel takes a process label and tells the kernel to assign the ++// label to the next kernel keyring that gets created ++func setKeyLabel(label string) error { ++ // Rather than using /proc/thread-self, we want to use /proc/self to ++ // operate on the thread-group leader. ++ err := writeConSelf("attr/keycreate", label) ++ if errors.Is(err, os.ErrNotExist) { ++ return nil ++ } ++ if label == "" && errors.Is(err, os.ErrPermission) { ++ return nil ++ } ++ if errors.Is(err, unix.EACCES) && unix.Getpid() != unix.Gettid() { ++ return ErrNotTGLeader ++ } ++ return err ++} ++ ++// KeyLabel retrieves the current kernel keyring label setting for this ++// thread-group. ++func keyLabel() (string, error) { ++ // Rather than using /proc/thread-self, we want to use /proc/self to ++ // operate on the thread-group leader. ++ return readConSelf("attr/keycreate") ++} ++ ++// get returns the Context as a string ++func (c Context) get() string { ++ if l := c["level"]; l != "" { ++ return c["user"] + ":" + c["role"] + ":" + c["type"] + ":" + l ++ } ++ return c["user"] + ":" + c["role"] + ":" + c["type"] ++} ++ ++// newContext creates a new Context struct from the specified label ++func newContext(label string) (Context, error) { ++ c := make(Context) ++ ++ if len(label) != 0 { ++ con := strings.SplitN(label, ":", 4) ++ if len(con) < 3 { ++ return c, ErrInvalidLabel ++ } ++ c["user"] = con[0] ++ c["role"] = con[1] ++ c["type"] = con[2] ++ if len(con) > 3 { ++ c["level"] = con[3] ++ } ++ } ++ return c, nil ++} ++ ++// clearLabels clears all reserved labels ++func clearLabels() { ++ state.Lock() ++ state.mcsList = make(map[string]bool) ++ state.Unlock() ++} ++ ++// reserveLabel reserves the MLS/MCS level component of the specified label ++func reserveLabel(label string) { ++ if len(label) != 0 { ++ con := strings.SplitN(label, ":", 4) ++ if len(con) > 3 { ++ _ = mcsAdd(con[3]) ++ } ++ } ++} ++ ++func selinuxEnforcePath() string { ++ return filepath.Join(getSelinuxMountPoint(), "enforce") ++} ++ ++// isMLSEnabled checks if MLS is enabled. ++func isMLSEnabled() bool { ++ enabledB, err := os.ReadFile(filepath.Join(getSelinuxMountPoint(), "mls")) ++ if err != nil { ++ return false ++ } ++ return bytes.Equal(enabledB, []byte{'1'}) ++} ++ ++// enforceMode returns the current SELinux mode Enforcing, Permissive, Disabled ++func enforceMode() int { ++ var enforce int ++ ++ enforceB, err := os.ReadFile(selinuxEnforcePath()) ++ if err != nil { ++ return -1 ++ } ++ enforce, err = strconv.Atoi(string(enforceB)) ++ if err != nil { ++ return -1 ++ } ++ return enforce ++} ++ ++// setEnforceMode sets the current SELinux mode Enforcing, Permissive. ++// Disabled is not valid, since this needs to be set at boot time. ++func setEnforceMode(mode int) error { ++ return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0) ++} ++ ++// defaultEnforceMode returns the systems default SELinux mode Enforcing, ++// Permissive or Disabled. Note this is just the default at boot time. ++// EnforceMode tells you the systems current mode. ++func defaultEnforceMode() int { ++ switch readConfig(selinuxTag) { ++ case "enforcing": ++ return Enforcing ++ case "permissive": ++ return Permissive ++ } ++ return Disabled ++} ++ ++func mcsAdd(mcs string) error { ++ if mcs == "" { ++ return nil ++ } ++ state.Lock() ++ defer state.Unlock() ++ if state.mcsList[mcs] { ++ return ErrMCSAlreadyExists ++ } ++ state.mcsList[mcs] = true ++ return nil ++} ++ ++func mcsDelete(mcs string) { ++ if mcs == "" { ++ return ++ } ++ state.Lock() ++ defer state.Unlock() ++ state.mcsList[mcs] = false ++} ++ ++func intToMcs(id int, catRange uint32) string { ++ var ( ++ SETSIZE = int(catRange) ++ TIER = SETSIZE ++ ORD = id ++ ) ++ ++ if id < 1 || id > 523776 { ++ return "" ++ } ++ ++ for ORD > TIER { ++ ORD -= TIER ++ TIER-- ++ } ++ TIER = SETSIZE - TIER ++ ORD += TIER ++ return fmt.Sprintf("s0:c%d,c%d", TIER, ORD) ++} ++ ++func uniqMcs(catRange uint32) string { ++ var ( ++ n uint32 ++ c1, c2 uint32 ++ mcs string ++ ) ++ ++ for { ++ _ = binary.Read(rand.Reader, binary.LittleEndian, &n) ++ c1 = n % catRange ++ _ = binary.Read(rand.Reader, binary.LittleEndian, &n) ++ c2 = n % catRange ++ if c1 == c2 { ++ continue ++ } else if c1 > c2 { ++ c1, c2 = c2, c1 ++ } ++ mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2) ++ if err := mcsAdd(mcs); err != nil { ++ continue ++ } ++ break ++ } ++ return mcs ++} ++ ++// releaseLabel un-reserves the MLS/MCS Level field of the specified label, ++// allowing it to be used by another process. ++func releaseLabel(label string) { ++ if len(label) != 0 { ++ con := strings.SplitN(label, ":", 4) ++ if len(con) > 3 { ++ mcsDelete(con[3]) ++ } ++ } ++} ++ ++// roFileLabel returns the specified SELinux readonly file label ++func roFileLabel() string { ++ return readOnlyFileLabel ++} ++ ++func openContextFile() (*os.File, error) { ++ if f, err := os.Open(contextFile); err == nil { ++ return f, nil ++ } ++ return os.Open(filepath.Join(policyRoot(), "contexts", "lxc_contexts")) ++} ++ ++func loadLabels() { ++ labels = make(map[string]string) ++ in, err := openContextFile() ++ if err != nil { ++ return ++ } ++ defer in.Close() ++ ++ scanner := bufio.NewScanner(in) ++ ++ for scanner.Scan() { ++ line := bytes.TrimSpace(scanner.Bytes()) ++ if len(line) == 0 { ++ // Skip blank lines ++ continue ++ } ++ if line[0] == ';' || line[0] == '#' { ++ // Skip comments ++ continue ++ } ++ fields := bytes.SplitN(line, []byte{'='}, 2) ++ if len(fields) != 2 { ++ continue ++ } ++ key, val := bytes.TrimSpace(fields[0]), bytes.TrimSpace(fields[1]) ++ labels[string(key)] = string(bytes.Trim(val, `"`)) ++ } ++ ++ con, _ := NewContext(labels["file"]) ++ con["level"] = fmt.Sprintf("s0:c%d,c%d", maxCategory-2, maxCategory-1) ++ privContainerMountLabel = con.get() ++ reserveLabel(privContainerMountLabel) ++} ++ ++func label(key string) string { ++ loadLabelsOnce.Do(func() { ++ loadLabels() ++ }) ++ return labels[key] ++} ++ ++// kvmContainerLabels returns the default processLabel and mountLabel to be used ++// for kvm containers by the calling process. ++func kvmContainerLabels() (string, string) { ++ processLabel := label("kvm_process") ++ if processLabel == "" { ++ processLabel = label("process") ++ } ++ ++ return addMcs(processLabel, label("file")) ++} ++ ++// initContainerLabels returns the default processLabel and file labels to be ++// used for containers running an init system like systemd by the calling process. ++func initContainerLabels() (string, string) { ++ processLabel := label("init_process") ++ if processLabel == "" { ++ processLabel = label("process") ++ } ++ ++ return addMcs(processLabel, label("file")) ++} ++ ++// containerLabels returns an allocated processLabel and fileLabel to be used for ++// container labeling by the calling process. ++func containerLabels() (processLabel string, fileLabel string) { ++ if !getEnabled() { ++ return "", "" ++ } ++ ++ processLabel = label("process") ++ fileLabel = label("file") ++ readOnlyFileLabel = label("ro_file") ++ ++ if processLabel == "" || fileLabel == "" { ++ return "", fileLabel ++ } ++ ++ if readOnlyFileLabel == "" { ++ readOnlyFileLabel = fileLabel ++ } ++ ++ return addMcs(processLabel, fileLabel) ++} ++ ++func addMcs(processLabel, fileLabel string) (string, string) { ++ scon, _ := NewContext(processLabel) ++ if scon["level"] != "" { ++ mcs := uniqMcs(CategoryRange) ++ scon["level"] = mcs ++ processLabel = scon.Get() ++ scon, _ = NewContext(fileLabel) ++ scon["level"] = mcs ++ fileLabel = scon.Get() ++ } ++ return processLabel, fileLabel ++} ++ ++// securityCheckContext validates that the SELinux label is understood by the kernel ++func securityCheckContext(val string) error { ++ return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0) ++} ++ ++// copyLevel returns a label with the MLS/MCS level from src label replaced on ++// the dest label. ++func copyLevel(src, dest string) (string, error) { ++ if src == "" { ++ return "", nil ++ } ++ if err := SecurityCheckContext(src); err != nil { ++ return "", err ++ } ++ if err := SecurityCheckContext(dest); err != nil { ++ return "", err ++ } ++ scon, err := NewContext(src) ++ if err != nil { ++ return "", err ++ } ++ tcon, err := NewContext(dest) ++ if err != nil { ++ return "", err ++ } ++ mcsDelete(tcon["level"]) ++ _ = mcsAdd(scon["level"]) ++ tcon["level"] = scon["level"] ++ return tcon.Get(), nil ++} ++ ++// chcon changes the fpath file object to the SELinux label. ++// If fpath is a directory and recurse is true, then chcon walks the ++// directory tree setting the label. ++func chcon(fpath string, label string, recurse bool) error { ++ if fpath == "" { ++ return ErrEmptyPath ++ } ++ if label == "" { ++ return nil ++ } ++ ++ excludePaths := map[string]bool{ ++ "/": true, ++ "/bin": true, ++ "/boot": true, ++ "/dev": true, ++ "/etc": true, ++ "/etc/passwd": true, ++ "/etc/pki": true, ++ "/etc/shadow": true, ++ "/home": true, ++ "/lib": true, ++ "/lib64": true, ++ "/media": true, ++ "/opt": true, ++ "/proc": true, ++ "/root": true, ++ "/run": true, ++ "/sbin": true, ++ "/srv": true, ++ "/sys": true, ++ "/tmp": true, ++ "/usr": true, ++ "/var": true, ++ "/var/lib": true, ++ "/var/log": true, ++ } ++ ++ if home := os.Getenv("HOME"); home != "" { ++ excludePaths[home] = true ++ } ++ ++ if sudoUser := os.Getenv("SUDO_USER"); sudoUser != "" { ++ if usr, err := user.Lookup(sudoUser); err == nil { ++ excludePaths[usr.HomeDir] = true ++ } ++ } ++ ++ if fpath != "/" { ++ fpath = strings.TrimSuffix(fpath, "/") ++ } ++ if excludePaths[fpath] { ++ return fmt.Errorf("SELinux relabeling of %s is not allowed", fpath) ++ } ++ ++ if !recurse { ++ err := lSetFileLabel(fpath, label) ++ if err != nil { ++ // Check if file doesn't exist, must have been removed ++ if errors.Is(err, os.ErrNotExist) { ++ return nil ++ } ++ // Check if current label is correct on disk ++ flabel, nerr := lFileLabel(fpath) ++ if nerr == nil && flabel == label { ++ return nil ++ } ++ // Check if file doesn't exist, must have been removed ++ if errors.Is(nerr, os.ErrNotExist) { ++ return nil ++ } ++ return err ++ } ++ return nil ++ } ++ ++ return rchcon(fpath, label) ++} ++ ++func rchcon(fpath, label string) error { //revive:disable:cognitive-complexity ++ fastMode := false ++ // If the current label matches the new label, assume ++ // other labels are correct. ++ if cLabel, err := lFileLabel(fpath); err == nil && cLabel == label { ++ fastMode = true ++ } ++ return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { ++ if fastMode { ++ if cLabel, err := lFileLabel(p); err == nil && cLabel == label { ++ return nil ++ } ++ } ++ err := lSetFileLabel(p, label) ++ // Walk a file tree can race with removal, so ignore ENOENT. ++ if errors.Is(err, os.ErrNotExist) { ++ return nil ++ } ++ return err ++ }) ++} ++ ++// dupSecOpt takes an SELinux process label and returns security options that ++// can be used to set the SELinux Type and Level for future container processes. ++func dupSecOpt(src string) ([]string, error) { ++ if src == "" { ++ return nil, nil ++ } ++ con, err := NewContext(src) ++ if err != nil { ++ return nil, err ++ } ++ if con["user"] == "" || ++ con["role"] == "" || ++ con["type"] == "" { ++ return nil, nil ++ } ++ dup := []string{ ++ "user:" + con["user"], ++ "role:" + con["role"], ++ "type:" + con["type"], ++ } ++ ++ if con["level"] != "" { ++ dup = append(dup, "level:"+con["level"]) ++ } ++ ++ return dup, nil ++} ++ ++// findUserInContext scans the reader for a valid SELinux context ++// match that is verified with the verifier. Invalid contexts are ++// skipped. It returns a matched context or an empty string if no ++// match is found. If a scanner error occurs, it is returned. ++func findUserInContext(context Context, r io.Reader, verifier func(string) error) (string, error) { ++ fromRole := context["role"] ++ fromType := context["type"] ++ scanner := bufio.NewScanner(r) ++ ++ for scanner.Scan() { ++ fromConns := strings.Fields(scanner.Text()) ++ if len(fromConns) == 0 { ++ // Skip blank lines ++ continue ++ } ++ ++ line := fromConns[0] ++ ++ if line[0] == ';' || line[0] == '#' { ++ // Skip comments ++ continue ++ } ++ ++ // user context files contexts are formatted as ++ // role_r:type_t:s0 where the user is missing. ++ lineArr := strings.SplitN(line, ":", 4) ++ // skip context with typo, or role and type do not match ++ if len(lineArr) != 3 || ++ lineArr[0] != fromRole || ++ lineArr[1] != fromType { ++ continue ++ } ++ ++ for _, cc := range fromConns[1:] { ++ toConns := strings.SplitN(cc, ":", 4) ++ if len(toConns) != 3 { ++ continue ++ } ++ ++ context["role"] = toConns[0] ++ context["type"] = toConns[1] ++ ++ outConn := context.get() ++ if err := verifier(outConn); err != nil { ++ continue ++ } ++ ++ return outConn, nil ++ } ++ } ++ if err := scanner.Err(); err != nil { ++ return "", fmt.Errorf("failed to scan for context: %w", err) ++ } ++ ++ return "", nil ++} ++ ++func getDefaultContextFromReaders(c *defaultSECtx) (string, error) { ++ if c.verifier == nil { ++ return "", ErrVerifierNil ++ } ++ ++ context, err := newContext(c.scon) ++ if err != nil { ++ return "", fmt.Errorf("failed to create label for %s: %w", c.scon, err) ++ } ++ ++ // set so the verifier validates the matched context with the provided user and level. ++ context["user"] = c.user ++ context["level"] = c.level ++ ++ conn, err := findUserInContext(context, c.userRdr, c.verifier) ++ if err != nil { ++ return "", err ++ } ++ ++ if conn != "" { ++ return conn, nil ++ } ++ ++ conn, err = findUserInContext(context, c.defaultRdr, c.verifier) ++ if err != nil { ++ return "", err ++ } ++ ++ if conn != "" { ++ return conn, nil ++ } ++ ++ return "", fmt.Errorf("context %q not found: %w", c.scon, ErrContextMissing) ++} ++ ++func getDefaultContextWithLevel(user, level, scon string) (string, error) { ++ userPath := filepath.Join(policyRoot(), selinuxUsersDir, user) ++ fu, err := os.Open(userPath) ++ if err != nil { ++ return "", err ++ } ++ defer fu.Close() ++ ++ defaultPath := filepath.Join(policyRoot(), defaultContexts) ++ fd, err := os.Open(defaultPath) ++ if err != nil { ++ return "", err ++ } ++ defer fd.Close() ++ ++ c := defaultSECtx{ ++ user: user, ++ level: level, ++ scon: scon, ++ userRdr: fu, ++ defaultRdr: fd, ++ verifier: securityCheckContext, ++ } ++ ++ return getDefaultContextFromReaders(&c) ++} +diff --git a/internal/third_party/selinux/go-selinux/selinux_linux_test.go b/internal/third_party/selinux/go-selinux/selinux_linux_test.go +new file mode 100644 +index 00000000..71aa0b82 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/selinux_linux_test.go +@@ -0,0 +1,711 @@ ++package selinux ++ ++import ( ++ "bufio" ++ "bytes" ++ "errors" ++ "fmt" ++ "os" ++ "path/filepath" ++ "runtime" ++ "strconv" ++ "strings" ++ "testing" ++ ++ "golang.org/x/sys/unix" ++) ++ ++func TestSetFileLabel(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ const ( ++ tmpFile = "selinux_test" ++ tmpLink = "selinux_test_link" ++ con = "system_u:object_r:bin_t:s0:c1,c2" ++ con2 = "system_u:object_r:bin_t:s0:c3,c4" ++ ) ++ ++ _ = os.Remove(tmpFile) ++ out, err := os.OpenFile(tmpFile, os.O_WRONLY|os.O_CREATE, 0) ++ if err != nil { ++ t.Fatal(err) ++ } ++ out.Close() ++ defer os.Remove(tmpFile) ++ ++ _ = os.Remove(tmpLink) ++ if err := os.Symlink(tmpFile, tmpLink); err != nil { ++ t.Fatal(err) ++ } ++ defer os.Remove(tmpLink) ++ ++ if err := SetFileLabel(tmpLink, con); err != nil { ++ t.Fatalf("SetFileLabel failed: %s", err) ++ } ++ filelabel, err := FileLabel(tmpLink) ++ if err != nil { ++ t.Fatalf("FileLabel failed: %s", err) ++ } ++ if filelabel != con { ++ t.Fatalf("FileLabel failed, returned %s expected %s", filelabel, con) ++ } ++ ++ // Using LfileLabel to verify that the symlink itself is not labeled. ++ linkLabel, err := LfileLabel(tmpLink) ++ if err != nil { ++ t.Fatalf("LfileLabel failed: %s", err) ++ } ++ if linkLabel == con { ++ t.Fatalf("Label on symlink should not be set, got: %q", linkLabel) ++ } ++ ++ // Use LsetFileLabel to set a label on the symlink itself. ++ if err := LsetFileLabel(tmpLink, con2); err != nil { ++ t.Fatalf("LsetFileLabel failed: %s", err) ++ } ++ filelabel, err = FileLabel(tmpFile) ++ if err != nil { ++ t.Fatalf("FileLabel failed: %s", err) ++ } ++ if filelabel != con { ++ t.Fatalf("FileLabel was updated, returned %s expected %s", filelabel, con) ++ } ++ ++ linkLabel, err = LfileLabel(tmpLink) ++ if err != nil { ++ t.Fatalf("LfileLabel failed: %s", err) ++ } ++ if linkLabel != con2 { ++ t.Fatalf("LfileLabel failed: returned %s expected %s", linkLabel, con2) ++ } ++} ++ ++func TestKVMLabels(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ plabel, flabel := KVMContainerLabels() ++ if plabel == "" { ++ t.Log("Failed to read kvm label") ++ } ++ t.Log(plabel) ++ t.Log(flabel) ++ if _, err := CanonicalizeContext(plabel); err != nil { ++ t.Fatal(err) ++ } ++ if _, err := CanonicalizeContext(flabel); err != nil { ++ t.Fatal(err) ++ } ++ ++ ReleaseLabel(plabel) ++} ++ ++func TestInitLabels(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ plabel, flabel := InitContainerLabels() ++ if plabel == "" { ++ t.Log("Failed to read init label") ++ } ++ t.Log(plabel) ++ t.Log(flabel) ++ if _, err := CanonicalizeContext(plabel); err != nil { ++ t.Fatal(err) ++ } ++ if _, err := CanonicalizeContext(flabel); err != nil { ++ t.Fatal(err) ++ } ++ ReleaseLabel(plabel) ++} ++ ++func TestDuplicateLabel(t *testing.T) { ++ secopt, err := DupSecOpt("system_u:system_r:container_t:s0:c1,c2") ++ if err != nil { ++ t.Fatalf("DupSecOpt: %v", err) ++ } ++ for _, opt := range secopt { ++ con := strings.SplitN(opt, ":", 2) ++ if con[0] == "user" { ++ if con[1] != "system_u" { ++ t.Errorf("DupSecOpt Failed user incorrect") ++ } ++ continue ++ } ++ if con[0] == "role" { ++ if con[1] != "system_r" { ++ t.Errorf("DupSecOpt Failed role incorrect") ++ } ++ continue ++ } ++ if con[0] == "type" { ++ if con[1] != "container_t" { ++ t.Errorf("DupSecOpt Failed type incorrect") ++ } ++ continue ++ } ++ if con[0] == "level" { ++ if con[1] != "s0:c1,c2" { ++ t.Errorf("DupSecOpt Failed level incorrect") ++ } ++ continue ++ } ++ t.Errorf("DupSecOpt failed: invalid field %q", con[0]) ++ } ++ secopt = DisableSecOpt() ++ if secopt[0] != "disable" { ++ t.Errorf(`DisableSecOpt failed: want "disable", got %q`, secopt[0]) ++ } ++} ++ ++func TestSELinuxNoLevel(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ tlabel := "system_u:system_r:container_t" ++ dup, err := DupSecOpt(tlabel) ++ if err != nil { ++ t.Fatal(err) ++ } ++ ++ if len(dup) != 3 { ++ t.Errorf("DupSecOpt failed on non mls label: want 3, got %d", len(dup)) ++ } ++ con, err := NewContext(tlabel) ++ if err != nil { ++ t.Fatal(err) ++ } ++ if con.Get() != tlabel { ++ t.Errorf("NewContext and con.Get() failed on non mls label: want %q, got %q", tlabel, con.Get()) ++ } ++} ++ ++func TestSocketLabel(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ // Ensure the thread stays the same for duration of the test. ++ // Otherwise Go runtime can switch this to a different thread, ++ // which results in EACCES in call to SetSocketLabel. ++ runtime.LockOSThread() ++ defer runtime.UnlockOSThread() ++ ++ label := "system_u:object_r:container_t:s0:c1,c2" ++ if err := SetSocketLabel(label); err != nil { ++ t.Fatal(err) ++ } ++ nlabel, err := SocketLabel() ++ if err != nil { ++ t.Fatal(err) ++ } ++ if label != nlabel { ++ t.Errorf("SocketLabel %s != %s", nlabel, label) ++ } ++} ++ ++func TestKeyLabel(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ // Ensure the thread stays the same for duration of the test. ++ // Otherwise Go runtime can switch this to a different thread, ++ // which results in EACCES in call to SetKeyLabel. ++ runtime.LockOSThread() ++ defer runtime.UnlockOSThread() ++ ++ if unix.Getpid() != unix.Gettid() { ++ t.Skip(ErrNotTGLeader) ++ } ++ ++ label := "system_u:object_r:container_t:s0:c1,c2" ++ if err := SetKeyLabel(label); err != nil { ++ t.Fatal(err) ++ } ++ nlabel, err := KeyLabel() ++ if err != nil { ++ t.Fatal(err) ++ } ++ if label != nlabel { ++ t.Errorf("KeyLabel: want %q, got %q", label, nlabel) ++ } ++} ++ ++func BenchmarkContextGet(b *testing.B) { ++ ctx, err := NewContext("system_u:object_r:container_file_t:s0:c1022,c1023") ++ if err != nil { ++ b.Fatal(err) ++ } ++ str := "" ++ for i := 0; i < b.N; i++ { ++ str = ctx.get() ++ } ++ b.Log(str) ++} ++ ++func TestSELinux(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ // Ensure the thread stays the same for duration of the test. ++ // Otherwise Go runtime can switch this to a different thread, ++ // which results in EACCES in call to SetFSCreateLabel. ++ runtime.LockOSThread() ++ defer runtime.UnlockOSThread() ++ ++ var ( ++ err error ++ plabel, flabel string ++ ) ++ ++ plabel, flabel = ContainerLabels() ++ t.Log(plabel) ++ t.Log(flabel) ++ plabel, flabel = ContainerLabels() ++ t.Log(plabel) ++ t.Log(flabel) ++ ReleaseLabel(plabel) ++ ++ plabel, flabel = ContainerLabels() ++ t.Log(plabel) ++ t.Log(flabel) ++ ClearLabels() ++ t.Log("ClearLabels") ++ plabel, flabel = ContainerLabels() ++ t.Log(plabel) ++ t.Log(flabel) ++ ReleaseLabel(plabel) ++ ++ pid := os.Getpid() ++ t.Logf("PID:%d MCS:%s", pid, intToMcs(pid, 1023)) ++ err = SetFSCreateLabel("unconfined_u:unconfined_r:unconfined_t:s0") ++ if err != nil { ++ t.Fatal("SetFSCreateLabel failed:", err) ++ } ++ t.Log(FSCreateLabel()) ++ err = SetFSCreateLabel("") ++ if err != nil { ++ t.Fatal("SetFSCreateLabel failed:", err) ++ } ++ t.Log(FSCreateLabel()) ++ t.Log(PidLabel(1)) ++} ++ ++func TestSetEnforceMode(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ if os.Geteuid() != 0 { ++ t.Skip("root required, skipping") ++ } ++ ++ t.Log("Enforcing Mode:", EnforceMode()) ++ mode := DefaultEnforceMode() ++ t.Log("Default Enforce Mode:", mode) ++ defer func() { ++ _ = SetEnforceMode(mode) ++ }() ++ ++ if err := SetEnforceMode(Enforcing); err != nil { ++ t.Fatalf("setting selinux mode to enforcing failed: %v", err) ++ } ++ if err := SetEnforceMode(Permissive); err != nil { ++ t.Fatalf("setting selinux mode to permissive failed: %v", err) ++ } ++} ++ ++func TestCanonicalizeContext(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ con := "system_u:object_r:bin_t:s0:c1,c2,c3" ++ checkcon := "system_u:object_r:bin_t:s0:c1.c3" ++ newcon, err := CanonicalizeContext(con) ++ if err != nil { ++ t.Fatal(err) ++ } ++ if newcon != checkcon { ++ t.Fatalf("CanonicalizeContext(%s) returned %s expected %s", con, newcon, checkcon) ++ } ++ con = "system_u:object_r:bin_t:s0:c5,c2" ++ checkcon = "system_u:object_r:bin_t:s0:c2,c5" ++ newcon, err = CanonicalizeContext(con) ++ if err != nil { ++ t.Fatal(err) ++ } ++ if newcon != checkcon { ++ t.Fatalf("CanonicalizeContext(%s) returned %s expected %s", con, newcon, checkcon) ++ } ++} ++ ++func TestFindSELinuxfsInMountinfo(t *testing.T) { ++ //nolint:dupword // ignore duplicate words (sysfs sysfs) ++ const mountinfo = `18 62 0:17 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel ++19 62 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw ++20 62 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=3995472k,nr_inodes=998868,mode=755 ++21 18 0:16 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw ++22 20 0:18 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel ++23 20 0:11 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000 ++24 62 0:19 / /run rw,nosuid,nodev shared:23 - tmpfs tmpfs rw,seclabel,mode=755 ++25 18 0:20 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755 ++26 25 0:21 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd ++27 18 0:22 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw ++28 25 0:23 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,perf_event ++29 25 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,devices ++30 25 0:25 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu ++31 25 0:26 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,freezer ++32 25 0:27 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,net_prio,net_cls ++33 25 0:28 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,cpuset ++34 25 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory ++35 25 0:30 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,pids ++36 25 0:31 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,hugetlb ++37 25 0:32 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,blkio ++59 18 0:33 / /sys/kernel/config rw,relatime shared:21 - configfs configfs rw ++62 1 253:1 / / rw,relatime shared:1 - ext4 /dev/vda1 rw,seclabel,data=ordered ++38 18 0:15 / /sys/fs/selinux rw,relatime shared:22 - selinuxfs selinuxfs rw ++39 19 0:35 / /proc/sys/fs/binfmt_misc rw,relatime shared:24 - autofs systemd-1 rw,fd=29,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=11601 ++40 20 0:36 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel ++41 20 0:14 / /dev/mqueue rw,relatime shared:26 - mqueue mqueue rw,seclabel ++42 18 0:6 / /sys/kernel/debug rw,relatime shared:27 - debugfs debugfs rw ++112 62 253:1 /var/lib/docker/plugins /var/lib/docker/plugins rw,relatime - ext4 /dev/vda1 rw,seclabel,data=ordered ++115 62 253:1 /var/lib/docker/overlay2 /var/lib/docker/overlay2 rw,relatime - ext4 /dev/vda1 rw,seclabel,data=ordered ++118 62 7:0 / /root/mnt rw,relatime shared:66 - ext4 /dev/loop0 rw,seclabel,data=ordered ++121 115 0:38 / /var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/merged rw,relatime - overlay overlay rw,seclabel,lowerdir=/var/lib/docker/overlay2/l/CPD4XI7UD4GGTGSJVPQSHWZKTK:/var/lib/docker/overlay2/l/NQKORR3IS7KNQDER35AZECLH4Z,upperdir=/var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/diff,workdir=/var/lib/docker/overlay2/8cdbabf81bc89b14ea54eaf418c1922068f06917fff57e184aa26541ff291073/work ++125 62 0:39 / /var/lib/docker/containers/5e3fce422957c291a5b502c2cf33d512fc1fcac424e4113136c808360e5b7215/shm rw,nosuid,nodev,noexec,relatime shared:68 - tmpfs shm rw,seclabel,size=65536k ++186 24 0:3 / /run/docker/netns/0a08e7496c6d rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw ++130 62 0:15 / /root/chroot/selinux rw,relatime shared:22 - selinuxfs selinuxfs rw ++109 24 0:37 / /run/user/0 rw,nosuid,nodev,relatime shared:62 - tmpfs tmpfs rw,seclabel,size=801032k,mode=700 ++` ++ s := bufio.NewScanner(bytes.NewBuffer([]byte(mountinfo))) ++ for _, expected := range []string{"/sys/fs/selinux", "/root/chroot/selinux", ""} { ++ mnt := findSELinuxfsMount(s) ++ t.Logf("found %q", mnt) ++ if mnt != expected { ++ t.Fatalf("expected %q, got %q", expected, mnt) ++ } ++ } ++} ++ ++func TestSecurityCheckContext(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ // check with valid context ++ context, err := CurrentLabel() ++ if err != nil { ++ t.Fatalf("CurrentLabel() error: %v", err) ++ } ++ if context != "" { ++ t.Logf("SecurityCheckContext(%q)", context) ++ err = SecurityCheckContext(context) ++ if err != nil { ++ t.Errorf("SecurityCheckContext(%q) error: %v", context, err) ++ } ++ } ++ ++ context = "not-syntactically-valid" ++ err = SecurityCheckContext(context) ++ if err == nil { ++ t.Errorf("SecurityCheckContext(%q) succeeded, expected to fail", context) ++ } ++} ++ ++func TestClassIndex(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ idx, err := ClassIndex("process") ++ if err != nil { ++ t.Errorf("Classindex error: %v", err) ++ } ++ // Every known policy has process as index 2, but it isn't guaranteed ++ if idx != 2 { ++ t.Errorf("ClassIndex unexpected answer %d, possibly not reference policy", idx) ++ } ++ ++ _, err = ClassIndex("foobar") ++ if err == nil { ++ t.Errorf("ClassIndex(\"foobar\") succeeded, expected to fail:") ++ } ++} ++ ++func TestComputeCreateContext(t *testing.T) { ++ if !GetEnabled() { ++ t.Skip("SELinux not enabled, skipping.") ++ } ++ ++ // This may or may not be in the loaded policy but any refpolicy based policy should have it ++ init := "system_u:system_r:init_t:s0" ++ tmp := "system_u:object_r:tmp_t:s0" ++ file := "file" ++ t.Logf("ComputeCreateContext(%s, %s, %s)", init, tmp, file) ++ context, err := ComputeCreateContext(init, tmp, file) ++ if err != nil { ++ t.Errorf("ComputeCreateContext error: %v", err) ++ } ++ if context != "system_u:object_r:init_tmp_t:s0" { ++ t.Errorf("ComputeCreateContext unexpected answer %s, possibly not reference policy", context) ++ } ++ ++ badcon := "badcon" ++ process := "process" ++ // Test to ensure that a bad context returns an error ++ t.Logf("ComputeCreateContext(%s, %s, %s)", badcon, tmp, process) ++ _, err = ComputeCreateContext(badcon, tmp, process) ++ if err == nil { ++ t.Errorf("ComputeCreateContext(%s, %s, %s) succeeded, expected failure", badcon, tmp, process) ++ } ++} ++ ++func TestGlbLub(t *testing.T) { ++ tests := []struct { ++ expectedErr error ++ sourceRange string ++ targetRange string ++ expectedRange string ++ }{ ++ { ++ sourceRange: "s0:c0.c100-s10:c0.c150", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedRange: "s5:c50.c100-s10:c0.c149", ++ }, ++ { ++ sourceRange: "s5:c50.c100-s15:c0.c149", ++ targetRange: "s0:c0.c100-s10:c0.c150", ++ expectedRange: "s5:c50.c100-s10:c0.c149", ++ }, ++ { ++ sourceRange: "s0:c0.c100-s10:c0.c150", ++ targetRange: "s0", ++ expectedRange: "s0", ++ }, ++ { ++ sourceRange: "s6:c0.c1023", ++ targetRange: "s6:c0,c2,c11,c201.c429,c431.c511", ++ expectedRange: "s6:c0,c2,c11,c201.c429,c431.c511", ++ }, ++ { ++ sourceRange: "s0-s15:c0.c1023", ++ targetRange: "s6:c0,c2,c11,c201.c429,c431.c511", ++ expectedRange: "s6-s6:c0,c2,c11,c201.c429,c431.c511", ++ }, ++ { ++ sourceRange: "s0:c0.c100,c125,c140,c150-s10", ++ targetRange: "s4:c0.c50,c140", ++ expectedRange: "s4:c0.c50,c140-s4", ++ }, ++ { ++ sourceRange: "s5:c512.c550,c552.c1023-s5:c0.c550,c552.c1023", ++ targetRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4,c5,c6,c512.c550,c553.c1023", ++ expectedRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4.c6,c512.c550,c553.c1023", ++ }, ++ { ++ sourceRange: "s5:c512.c540,c542,c543,c552.c1023-s5:c0.c550,c552.c1023", ++ targetRange: "s5:c512.c550,c553.c1023-s5:c0,c1,c4,c5,c6,c512.c550,c553.c1023", ++ expectedRange: "s5:c512.c540,c542,c543,c553.c1023-s5:c0,c1,c4.c6,c512.c550,c553.c1023", ++ }, ++ { ++ sourceRange: "s5:c50.c100-s15:c0.c149", ++ targetRange: "s5:c512.c550,c552.c1023-s5:c0.c550,c552.c1023", ++ expectedRange: "s5-s5:c0.c149", ++ }, ++ { ++ sourceRange: "s5-s15", ++ targetRange: "s6-s7", ++ expectedRange: "s6-s7", ++ }, ++ { ++ sourceRange: "s5:c50.c100-s15:c0.c149", ++ targetRange: "s4-s4:c0.c1023", ++ expectedErr: ErrIncomparable, ++ }, ++ { ++ sourceRange: "s4-s4:c0.c1023", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedErr: ErrIncomparable, ++ }, ++ { ++ sourceRange: "s4-s4:c0.c1023.c10000", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedErr: strconv.ErrSyntax, ++ }, ++ { ++ sourceRange: "s4-s4:c0.c1023.c10000-s4", ++ targetRange: "s5:c50.c100-s15:c0.c149-s5", ++ expectedErr: strconv.ErrSyntax, ++ }, ++ { ++ sourceRange: "4-4", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedErr: ErrLevelSyntax, ++ }, ++ { ++ sourceRange: "t4-t4", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedErr: ErrLevelSyntax, ++ }, ++ { ++ sourceRange: "s5:x50.x100-s15:c0.c149", ++ targetRange: "s5:c50.c100-s15:c0.c149", ++ expectedErr: ErrLevelSyntax, ++ }, ++ } ++ ++ for _, tt := range tests { ++ got, err := CalculateGlbLub(tt.sourceRange, tt.targetRange) ++ if !errors.Is(err, tt.expectedErr) { ++ // Go 1.13 strconv errors are not unwrappable, ++ // so do that manually. ++ // TODO remove this once we stop supporting Go 1.13. ++ var numErr *strconv.NumError ++ if errors.As(err, &numErr) && numErr.Err == tt.expectedErr { //nolint:errorlint // see above ++ continue ++ } ++ t.Fatalf("want %q got %q: src: %q tgt: %q", tt.expectedErr, err, tt.sourceRange, tt.targetRange) ++ } ++ ++ if got != tt.expectedRange { ++ t.Errorf("want %q got %q", tt.expectedRange, got) ++ } ++ } ++} ++ ++func TestContextWithLevel(t *testing.T) { ++ want := "bob:sysadm_r:sysadm_t:SystemLow-SystemHigh" ++ ++ goodDefaultBuff := ` ++foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 ++staff_r:staff_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 ++` ++ ++ verifier := func(con string) error { ++ if con != want { ++ return fmt.Errorf("invalid context %s", con) ++ } ++ ++ return nil ++ } ++ ++ tests := []struct { ++ name, userBuff, defaultBuff string ++ }{ ++ { ++ name: "match exists in user context file", ++ userBuff: `# COMMENT ++foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 ++ ++staff_r:staff_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 ++`, ++ defaultBuff: goodDefaultBuff, ++ }, ++ { ++ name: "match exists in default context file, but not in user file", ++ userBuff: `# COMMENT ++foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 ++fake_r:fake_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 ++`, ++ defaultBuff: goodDefaultBuff, ++ }, ++ } ++ ++ for _, tt := range tests { ++ t.Run(tt.name, func(t *testing.T) { ++ c := defaultSECtx{ ++ user: "bob", ++ level: "SystemLow-SystemHigh", ++ scon: "system_u:staff_r:staff_t:s0", ++ userRdr: bytes.NewBufferString(tt.userBuff), ++ defaultRdr: bytes.NewBufferString(tt.defaultBuff), ++ verifier: verifier, ++ } ++ ++ got, err := getDefaultContextFromReaders(&c) ++ if err != nil { ++ t.Fatalf("err should not exist but is: %v", err) ++ } ++ ++ if got != want { ++ t.Fatalf("got context: %q but expected %q", got, want) ++ } ++ }) ++ } ++ ++ t.Run("no match in user or default context files", func(t *testing.T) { ++ badUserBuff := "" ++ ++ badDefaultBuff := ` ++ foo_r:foo_t:s0 sysadm_r:sysadm_t:s0 ++ dne_r:dne_t:s0 baz_r:baz_t:s0 sysadm_r:sysadm_t:s0 ++ ` ++ c := defaultSECtx{ ++ user: "bob", ++ level: "SystemLow-SystemHigh", ++ scon: "system_u:staff_r:staff_t:s0", ++ userRdr: bytes.NewBufferString(badUserBuff), ++ defaultRdr: bytes.NewBufferString(badDefaultBuff), ++ verifier: verifier, ++ } ++ ++ _, err := getDefaultContextFromReaders(&c) ++ if err == nil { ++ t.Fatalf("err was expected") ++ } ++ }) ++} ++ ++func BenchmarkChcon(b *testing.B) { ++ file, err := filepath.Abs(os.Args[0]) ++ if err != nil { ++ b.Fatalf("filepath.Abs: %v", err) ++ } ++ dir := filepath.Dir(file) ++ con, err := FileLabel(file) ++ if err != nil { ++ b.Fatalf("FileLabel(%q): %v", file, err) ++ } ++ b.Logf("Chcon(%q, %q)", dir, con) ++ b.ResetTimer() ++ for n := 0; n < b.N; n++ { ++ if err := Chcon(dir, con, true); err != nil { ++ b.Fatal(err) ++ } ++ } ++} ++ ++func BenchmarkCurrentLabel(b *testing.B) { ++ var ( ++ l string ++ err error ++ ) ++ for n := 0; n < b.N; n++ { ++ l, err = CurrentLabel() ++ if err != nil { ++ b.Fatal(err) ++ } ++ } ++ b.Log(l) ++} ++ ++func BenchmarkReadConfig(b *testing.B) { ++ str := "" ++ for n := 0; n < b.N; n++ { ++ str = readConfig(selinuxTypeTag) ++ } ++ b.Log(str) ++} ++ ++func BenchmarkLoadLabels(b *testing.B) { ++ for n := 0; n < b.N; n++ { ++ loadLabels() ++ } ++} +diff --git a/internal/third_party/selinux/go-selinux/selinux_stub.go b/internal/third_party/selinux/go-selinux/selinux_stub.go +new file mode 100644 +index 00000000..26792123 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/selinux_stub.go +@@ -0,0 +1,159 @@ ++//go:build !linux ++// +build !linux ++ ++package selinux ++ ++func attrPath(string) string { ++ return "" ++} ++ ++func readConThreadSelf(string) (string, error) { ++ return "", nil ++} ++ ++func writeConThreadSelf(string, string) error { ++ return nil ++} ++ ++func setDisabled() {} ++ ++func getEnabled() bool { ++ return false ++} ++ ++func classIndex(string) (int, error) { ++ return -1, nil ++} ++ ++func setFileLabel(string, string) error { ++ return nil ++} ++ ++func lSetFileLabel(string, string) error { ++ return nil ++} ++ ++func fileLabel(string) (string, error) { ++ return "", nil ++} ++ ++func lFileLabel(string) (string, error) { ++ return "", nil ++} ++ ++func setFSCreateLabel(string) error { ++ return nil ++} ++ ++func fsCreateLabel() (string, error) { ++ return "", nil ++} ++ ++func currentLabel() (string, error) { ++ return "", nil ++} ++ ++func pidLabel(int) (string, error) { ++ return "", nil ++} ++ ++func execLabel() (string, error) { ++ return "", nil ++} ++ ++func canonicalizeContext(string) (string, error) { ++ return "", nil ++} ++ ++func computeCreateContext(string, string, string) (string, error) { ++ return "", nil ++} ++ ++func calculateGlbLub(string, string) (string, error) { ++ return "", nil ++} ++ ++func peerLabel(uintptr) (string, error) { ++ return "", nil ++} ++ ++func setKeyLabel(string) error { ++ return nil ++} ++ ++func keyLabel() (string, error) { ++ return "", nil ++} ++ ++func (c Context) get() string { ++ return "" ++} ++ ++func newContext(string) (Context, error) { ++ return Context{}, nil ++} ++ ++func clearLabels() { ++} ++ ++func reserveLabel(string) { ++} ++ ++func isMLSEnabled() bool { ++ return false ++} ++ ++func enforceMode() int { ++ return Disabled ++} ++ ++func setEnforceMode(int) error { ++ return nil ++} ++ ++func defaultEnforceMode() int { ++ return Disabled ++} ++ ++func releaseLabel(string) { ++} ++ ++func roFileLabel() string { ++ return "" ++} ++ ++func kvmContainerLabels() (string, string) { ++ return "", "" ++} ++ ++func initContainerLabels() (string, string) { ++ return "", "" ++} ++ ++func containerLabels() (string, string) { ++ return "", "" ++} ++ ++func securityCheckContext(string) error { ++ return nil ++} ++ ++func copyLevel(string, string) (string, error) { ++ return "", nil ++} ++ ++func chcon(string, string, bool) error { ++ return nil ++} ++ ++func dupSecOpt(string) ([]string, error) { ++ return nil, nil ++} ++ ++func getDefaultContextWithLevel(string, string, string) (string, error) { ++ return "", nil ++} ++ ++func label(_ string) string { ++ return "" ++} +diff --git a/internal/third_party/selinux/go-selinux/selinux_stub_test.go b/internal/third_party/selinux/go-selinux/selinux_stub_test.go +new file mode 100644 +index 00000000..19ea636a +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/selinux_stub_test.go +@@ -0,0 +1,127 @@ ++//go:build !linux ++// +build !linux ++ ++package selinux ++ ++import ( ++ "testing" ++) ++ ++const testLabel = "foobar" ++ ++func TestSELinuxStubs(t *testing.T) { ++ if GetEnabled() { ++ t.Error("SELinux enabled on non-linux.") ++ } ++ ++ tmpDir := t.TempDir() ++ if _, err := FileLabel(tmpDir); err != nil { ++ t.Error(err) ++ } ++ ++ if err := SetFileLabel(tmpDir, testLabel); err != nil { ++ t.Error(err) ++ } ++ ++ if _, err := LfileLabel(tmpDir); err != nil { ++ t.Error(err) ++ } ++ if err := LsetFileLabel(tmpDir, testLabel); err != nil { ++ t.Error(err) ++ } ++ ++ if err := SetFSCreateLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ ++ if _, err := FSCreateLabel(); err != nil { ++ t.Error(err) ++ } ++ if _, err := CurrentLabel(); err != nil { ++ t.Error(err) ++ } ++ ++ if _, err := PidLabel(0); err != nil { ++ t.Error(err) ++ } ++ ++ ClearLabels() ++ ++ ReserveLabel(testLabel) ++ ReleaseLabel(testLabel) ++ if _, err := DupSecOpt(testLabel); err != nil { ++ t.Error(err) ++ } ++ if v := DisableSecOpt(); len(v) != 1 || v[0] != "disable" { ++ t.Errorf(`expected "disabled", got %v`, v) ++ } ++ SetDisabled() ++ if enabled := GetEnabled(); enabled { ++ t.Error("Should not be enabled") ++ } ++ if err := SetExecLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ if err := SetTaskLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := ExecLabel(); err != nil { ++ t.Error(err) ++ } ++ if _, err := CanonicalizeContext(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := ComputeCreateContext("foo", "bar", testLabel); err != nil { ++ t.Error(err) ++ } ++ if err := SetSocketLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := ClassIndex(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := SocketLabel(); err != nil { ++ t.Error(err) ++ } ++ if _, err := PeerLabel(0); err != nil { ++ t.Error(err) ++ } ++ if err := SetKeyLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := KeyLabel(); err != nil { ++ t.Error(err) ++ } ++ if err := SetExecLabel(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err := ExecLabel(); err != nil { ++ t.Error(err) ++ } ++ con, err := NewContext(testLabel) ++ if err != nil { ++ t.Error(err) ++ } ++ con.Get() ++ if err = SetEnforceMode(1); err != nil { ++ t.Error(err) ++ } ++ if v := DefaultEnforceMode(); v != Disabled { ++ t.Errorf("expected %d, got %d", Disabled, v) ++ } ++ if v := EnforceMode(); v != Disabled { ++ t.Errorf("expected %d, got %d", Disabled, v) ++ } ++ if v := ROFileLabel(); v != "" { ++ t.Errorf(`expected "", got %q`, v) ++ } ++ if processLbl, fileLbl := ContainerLabels(); processLbl != "" || fileLbl != "" { ++ t.Errorf(`expected fileLbl="", fileLbl="" got processLbl=%q, fileLbl=%q`, processLbl, fileLbl) ++ } ++ if err = SecurityCheckContext(testLabel); err != nil { ++ t.Error(err) ++ } ++ if _, err = CopyLevel("foo", "bar"); err != nil { ++ t.Error(err) ++ } ++} +diff --git a/internal/third_party/selinux/go-selinux/xattrs_linux.go b/internal/third_party/selinux/go-selinux/xattrs_linux.go +new file mode 100644 +index 00000000..559c8510 +--- /dev/null ++++ b/internal/third_party/selinux/go-selinux/xattrs_linux.go +@@ -0,0 +1,71 @@ ++package selinux ++ ++import ( ++ "golang.org/x/sys/unix" ++) ++ ++// lgetxattr returns a []byte slice containing the value of ++// an extended attribute attr set for path. ++func lgetxattr(path, attr string) ([]byte, error) { ++ // Start with a 128 length byte array ++ dest := make([]byte, 128) ++ sz, errno := doLgetxattr(path, attr, dest) ++ for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare ++ // Buffer too small, use zero-sized buffer to get the actual size ++ sz, errno = doLgetxattr(path, attr, []byte{}) ++ if errno != nil { ++ return nil, errno ++ } ++ ++ dest = make([]byte, sz) ++ sz, errno = doLgetxattr(path, attr, dest) ++ } ++ if errno != nil { ++ return nil, errno ++ } ++ ++ return dest[:sz], nil ++} ++ ++// doLgetxattr is a wrapper that retries on EINTR ++func doLgetxattr(path, attr string, dest []byte) (int, error) { ++ for { ++ sz, err := unix.Lgetxattr(path, attr, dest) ++ if err != unix.EINTR { ++ return sz, err ++ } ++ } ++} ++ ++// getxattr returns a []byte slice containing the value of ++// an extended attribute attr set for path. ++func getxattr(path, attr string) ([]byte, error) { ++ // Start with a 128 length byte array ++ dest := make([]byte, 128) ++ sz, errno := dogetxattr(path, attr, dest) ++ for errno == unix.ERANGE { //nolint:errorlint // unix errors are bare ++ // Buffer too small, use zero-sized buffer to get the actual size ++ sz, errno = dogetxattr(path, attr, []byte{}) ++ if errno != nil { ++ return nil, errno ++ } ++ ++ dest = make([]byte, sz) ++ sz, errno = dogetxattr(path, attr, dest) ++ } ++ if errno != nil { ++ return nil, errno ++ } ++ ++ return dest[:sz], nil ++} ++ ++// dogetxattr is a wrapper that retries on EINTR ++func dogetxattr(path, attr string, dest []byte) (int, error) { ++ for { ++ sz, err := unix.Getxattr(path, attr, dest) ++ if err != unix.EINTR { ++ return sz, err ++ } ++ } ++} +diff --git a/internal/third_party/selinux/go.mod b/internal/third_party/selinux/go.mod +new file mode 100644 +index 00000000..24d3261a +--- /dev/null ++++ b/internal/third_party/selinux/go.mod +@@ -0,0 +1,8 @@ ++module github.com/opencontainers/selinux ++ ++go 1.19 ++ ++require ( ++ github.com/cyphar/filepath-securejoin v0.5.0 ++ golang.org/x/sys v0.18.0 ++) +diff --git a/internal/third_party/selinux/go.sum b/internal/third_party/selinux/go.sum +new file mode 100644 +index 00000000..b9ae0987 +--- /dev/null ++++ b/internal/third_party/selinux/go.sum +@@ -0,0 +1,8 @@ ++github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= ++github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= ++github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= ++github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= ++github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= ++golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= ++golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= ++gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +diff --git a/internal/third_party/selinux/pkg/pwalk/README.md b/internal/third_party/selinux/pkg/pwalk/README.md +new file mode 100644 +index 00000000..a060ad36 +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalk/README.md +@@ -0,0 +1,52 @@ ++## pwalk: parallel implementation of filepath.Walk ++ ++This is a wrapper for [filepath.Walk](https://pkg.go.dev/path/filepath?tab=doc#Walk) ++which may speed it up by calling multiple callback functions (WalkFunc) in parallel, ++utilizing goroutines. ++ ++By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks. ++This can be changed by using WalkN function which has the additional ++parameter, specifying the number of goroutines (concurrency). ++ ++### pwalk vs pwalkdir ++ ++This package is deprecated in favor of ++[pwalkdir](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir), ++which is faster, but requires at least Go 1.16. ++ ++### Caveats ++ ++Please note the following limitations of this code: ++ ++* Unlike filepath.Walk, the order of calls is non-deterministic; ++ ++* Only primitive error handling is supported: ++ ++ * filepath.SkipDir is not supported; ++ ++ * ErrNotExist errors from filepath.Walk are silently ignored for any path ++ except the top directory (Walk argument); any other error is returned to ++ the caller of Walk; ++ ++ * no errors are ever passed to WalkFunc; ++ ++ * once any error is returned from any WalkFunc instance, no more new calls ++ to WalkFunc are made, and the error is returned to the caller of Walk; ++ ++ * if more than one walkFunc instance will return an error, only one ++ of such errors will be propagated and returned by Walk, others ++ will be silently discarded. ++ ++### Documentation ++ ++For the official documentation, see ++https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalk?tab=doc ++ ++### Benchmarks ++ ++For a WalkFunc that consists solely of the return statement, this ++implementation is about 10% slower than the standard library's ++filepath.Walk. ++ ++Otherwise (if a WalkFunc is doing something) this is usually faster, ++except when the WalkN(..., 1) is used. +diff --git a/internal/third_party/selinux/pkg/pwalk/pwalk.go b/internal/third_party/selinux/pkg/pwalk/pwalk.go +new file mode 100644 +index 00000000..686c8bac +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalk/pwalk.go +@@ -0,0 +1,131 @@ ++package pwalk ++ ++import ( ++ "errors" ++ "fmt" ++ "os" ++ "path/filepath" ++ "runtime" ++ "sync" ++) ++ ++// WalkFunc is the type of the function called by Walk to visit each ++// file or directory. It is an alias for [filepath.WalkFunc]. ++// ++// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir] and [fs.WalkDirFunc]. ++type WalkFunc = filepath.WalkFunc ++ ++// Walk is a wrapper for filepath.Walk which can call multiple walkFn ++// in parallel, allowing to handle each item concurrently. A maximum of ++// twice the runtime.NumCPU() walkFn will be called at any one time. ++// If you want to change the maximum, use WalkN instead. ++// ++// The order of calls is non-deterministic. ++// ++// Note that this implementation only supports primitive error handling: ++// ++// - no errors are ever passed to walkFn; ++// ++// - once a walkFn returns any error, all further processing stops ++// and the error is returned to the caller of Walk; ++// ++// - filepath.SkipDir is not supported; ++// ++// - if more than one walkFn instance will return an error, only one ++// of such errors will be propagated and returned by Walk, others ++// will be silently discarded. ++// ++// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.Walk] ++func Walk(root string, walkFn WalkFunc) error { ++ return WalkN(root, walkFn, runtime.NumCPU()*2) ++} ++ ++// WalkN is a wrapper for filepath.Walk which can call multiple walkFn ++// in parallel, allowing to handle each item concurrently. A maximum of ++// num walkFn will be called at any one time. ++// ++// Please see Walk documentation for caveats of using this function. ++// ++// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.WalkN] ++func WalkN(root string, walkFn WalkFunc, num int) error { ++ // make sure limit is sensible ++ if num < 1 { ++ return fmt.Errorf("walk(%q): num must be > 0", root) ++ } ++ ++ files := make(chan *walkArgs, 2*num) ++ errCh := make(chan error, 1) // get the first error, ignore others ++ ++ // Start walking a tree asap ++ var ( ++ err error ++ wg sync.WaitGroup ++ ++ rootLen = len(root) ++ rootEntry *walkArgs ++ ) ++ wg.Add(1) ++ go func() { ++ err = filepath.Walk(root, func(p string, info os.FileInfo, err error) error { ++ if err != nil { ++ // Walking a file tree can race with removal, ++ // so ignore ENOENT, except for root. ++ // https://github.com/opencontainers/selinux/issues/199. ++ if errors.Is(err, os.ErrNotExist) && len(p) != rootLen { ++ return nil ++ } ++ ++ close(files) ++ return err ++ } ++ if len(p) == rootLen { ++ // Root entry is processed separately below. ++ rootEntry = &walkArgs{path: p, info: &info} ++ return nil ++ } ++ // add a file to the queue unless a callback sent an error ++ select { ++ case e := <-errCh: ++ close(files) ++ return e ++ default: ++ files <- &walkArgs{path: p, info: &info} ++ return nil ++ } ++ }) ++ if err == nil { ++ close(files) ++ } ++ wg.Done() ++ }() ++ ++ wg.Add(num) ++ for i := 0; i < num; i++ { ++ go func() { ++ for file := range files { ++ if e := walkFn(file.path, *file.info, nil); e != nil { ++ select { ++ case errCh <- e: // sent ok ++ default: // buffer full ++ } ++ } ++ } ++ wg.Done() ++ }() ++ } ++ ++ wg.Wait() ++ ++ if err == nil { ++ err = walkFn(rootEntry.path, *rootEntry.info, nil) ++ } ++ ++ return err ++} ++ ++// walkArgs holds the arguments that were passed to the Walk or WalkN ++// functions. ++type walkArgs struct { ++ info *os.FileInfo ++ path string ++} +diff --git a/internal/third_party/selinux/pkg/pwalk/pwalk_test.go b/internal/third_party/selinux/pkg/pwalk/pwalk_test.go +new file mode 100644 +index 00000000..9cca3b6b +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalk/pwalk_test.go +@@ -0,0 +1,236 @@ ++package pwalk ++ ++import ( ++ "errors" ++ "math/rand" ++ "os" ++ "path/filepath" ++ "runtime" ++ "sync/atomic" ++ "testing" ++ "time" ++) ++ ++func TestWalk(t *testing.T) { ++ var ac atomic.Uint32 ++ concurrency := runtime.NumCPU() * 2 ++ ++ dir, total := prepareTestSet(t, 3, 2, 1) ++ ++ err := WalkN(dir, ++ func(_ string, _ os.FileInfo, _ error) error { ++ ac.Add(1) ++ return nil ++ }, ++ concurrency) ++ if err != nil { ++ t.Errorf("Walk failed: %v", err) ++ } ++ count := ac.Load() ++ if count != total { ++ t.Errorf("File count mismatch: found %d, expected %d", count, total) ++ } ++ ++ t.Logf("concurrency: %d, files found: %d", concurrency, count) ++} ++ ++func TestWalkTopLevelErrNotExistNotIgnored(t *testing.T) { ++ if WalkN("non-existent-directory", cbEmpty, 8) == nil { ++ t.Fatal("expected ErrNotExist, got nil") ++ } ++} ++ ++// https://github.com/opencontainers/selinux/issues/199 ++func TestWalkRaceWithRemoval(t *testing.T) { ++ var ac atomic.Uint32 ++ concurrency := runtime.NumCPU() * 2 ++ // This test is still on a best-effort basis, meaning it can still pass ++ // when there is a bug in the code, but the larger the test set is, the ++ // higher the probability that this test fails (without a fix). ++ // ++ // With this set (4, 5, 6), and the fix commented out, it fails ++ // 100 out of 100 runs on my machine. ++ dir, total := prepareTestSet(t, 4, 5, 6) ++ ++ // Race walk with removal. ++ go os.RemoveAll(dir) ++ err := WalkN(dir, ++ func(_ string, _ os.FileInfo, _ error) error { ++ ac.Add(1) ++ return nil ++ }, ++ concurrency) ++ count := int(ac.Load()) ++ t.Logf("found %d of %d files", count, total) ++ if err != nil { ++ t.Fatalf("expected nil, got %v", err) ++ } ++} ++ ++func TestWalkDirManyErrors(t *testing.T) { ++ var ac atomic.Uint32 ++ ++ dir, total := prepareTestSet(t, 3, 3, 2) ++ ++ maxFiles := total / 2 ++ e42 := errors.New("42") ++ err := Walk(dir, ++ func(_ string, _ os.FileInfo, _ error) error { ++ if ac.Add(1) > maxFiles { ++ return e42 ++ } ++ return nil ++ }) ++ count := ac.Load() ++ t.Logf("found %d of %d files", count, total) ++ ++ if err == nil { ++ t.Errorf("Walk succeeded, but error is expected") ++ if count != total { ++ t.Errorf("File count mismatch: found %d, expected %d", count, total) ++ } ++ } ++} ++ ++func makeManyDirs(prefix string, levels, dirs, files int) (count uint32, err error) { ++ for d := 0; d < dirs; d++ { ++ var dir string ++ dir, err = os.MkdirTemp(prefix, "d-") ++ if err != nil { ++ return count, err ++ } ++ count++ ++ for f := 0; f < files; f++ { ++ var fi *os.File ++ fi, err = os.CreateTemp(dir, "f-") ++ if err != nil { ++ return count, err ++ } ++ _ = fi.Close() ++ count++ ++ } ++ if levels == 0 { ++ continue ++ } ++ var c uint32 ++ if c, err = makeManyDirs(dir, levels-1, dirs, files); err != nil { ++ return count, err ++ } ++ count += c ++ } ++ ++ return count, err ++} ++ ++// prepareTestSet() creates a directory tree of shallow files, ++// to be used for testing or benchmarking. ++// ++// Total dirs: dirs^levels + dirs^(levels-1) + ... + dirs^1 ++// Total files: total_dirs * files ++func prepareTestSet(tb testing.TB, levels, dirs, files int) (dir string, total uint32) { ++ tb.Helper() ++ var err error ++ ++ dir = tb.TempDir() ++ total, err = makeManyDirs(dir, levels, dirs, files) ++ if err != nil { ++ tb.Fatal(err) ++ } ++ total++ // this dir ++ ++ return dir, total ++} ++ ++type walkerFunc func(root string, walkFn WalkFunc) error ++ ++func genWalkN(n int) walkerFunc { ++ return func(root string, walkFn WalkFunc) error { ++ return WalkN(root, walkFn, n) ++ } ++} ++ ++func BenchmarkWalk(b *testing.B) { ++ const ( ++ levels = 5 // how deep ++ dirs = 3 // dirs on each levels ++ files = 8 // files on each levels ++ ) ++ ++ benchmarks := []struct { ++ walk filepath.WalkFunc ++ name string ++ }{ ++ {name: "Empty", walk: cbEmpty}, ++ {name: "ReadFile", walk: cbReadFile}, ++ {name: "ChownChmod", walk: cbChownChmod}, ++ {name: "RandomSleep", walk: cbRandomSleep}, ++ } ++ ++ walkers := []struct { ++ walker walkerFunc ++ name string ++ }{ ++ {name: "filepath.Walk", walker: filepath.Walk}, ++ {name: "pwalk.Walk", walker: Walk}, ++ // test WalkN with various values of N ++ {name: "pwalk.Walk1", walker: genWalkN(1)}, ++ {name: "pwalk.Walk2", walker: genWalkN(2)}, ++ {name: "pwalk.Walk4", walker: genWalkN(4)}, ++ {name: "pwalk.Walk8", walker: genWalkN(8)}, ++ {name: "pwalk.Walk16", walker: genWalkN(16)}, ++ {name: "pwalk.Walk32", walker: genWalkN(32)}, ++ {name: "pwalk.Walk64", walker: genWalkN(64)}, ++ {name: "pwalk.Walk128", walker: genWalkN(128)}, ++ {name: "pwalk.Walk256", walker: genWalkN(256)}, ++ } ++ ++ dir, total := prepareTestSet(b, levels, dirs, files) ++ b.Logf("dataset: %d levels x %d dirs x %d files, total entries: %d", levels, dirs, files, total) ++ ++ for _, bm := range benchmarks { ++ for _, w := range walkers { ++ walker := w.walker ++ walkFn := bm.walk ++ // preheat ++ if err := w.walker(dir, bm.walk); err != nil { ++ b.Errorf("walk failed: %v", err) ++ } ++ // benchmark ++ b.Run(bm.name+"/"+w.name, func(b *testing.B) { ++ for i := 0; i < b.N; i++ { ++ if err := walker(dir, walkFn); err != nil { ++ b.Errorf("walk failed: %v", err) ++ } ++ } ++ }) ++ } ++ } ++} ++ ++func cbEmpty(_ string, _ os.FileInfo, _ error) error { ++ return nil ++} ++ ++func cbChownChmod(path string, info os.FileInfo, _ error) error { ++ _ = os.Chown(path, 0, 0) ++ mode := os.FileMode(0o644) ++ if info.Mode().IsDir() { ++ mode = os.FileMode(0o755) ++ } ++ _ = os.Chmod(path, mode) ++ ++ return nil ++} ++ ++func cbReadFile(path string, info os.FileInfo, _ error) error { ++ var err error ++ if info.Mode().IsRegular() { ++ _, err = os.ReadFile(path) ++ } ++ return err ++} ++ ++func cbRandomSleep(_ string, _ os.FileInfo, _ error) error { ++ time.Sleep(time.Duration(rand.Intn(500)) * time.Microsecond) //nolint:gosec // ignore G404: Use of weak random number generator ++ return nil ++} +diff --git a/internal/third_party/selinux/pkg/pwalkdir/README.md b/internal/third_party/selinux/pkg/pwalkdir/README.md +new file mode 100644 +index 00000000..b827e7dd +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalkdir/README.md +@@ -0,0 +1,56 @@ ++## pwalkdir: parallel implementation of filepath.WalkDir ++ ++This is a wrapper for [filepath.WalkDir](https://pkg.go.dev/path/filepath#WalkDir) ++which may speed it up by calling multiple callback functions (WalkDirFunc) ++in parallel, utilizing goroutines. ++ ++By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks. ++This can be changed by using WalkN function which has the additional ++parameter, specifying the number of goroutines (concurrency). ++ ++### pwalk vs pwalkdir ++ ++This package is very similar to ++[pwalk](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir), ++but utilizes `filepath.WalkDir` (added to Go 1.16), which does not call stat(2) ++on every entry and is therefore faster (up to 3x, depending on usage scenario). ++ ++Users who are OK with requiring Go 1.16+ should switch to this ++implementation. ++ ++### Caveats ++ ++Please note the following limitations of this code: ++ ++* Unlike filepath.WalkDir, the order of calls is non-deterministic; ++ ++* Only primitive error handling is supported: ++ ++ * fs.SkipDir is not supported; ++ ++ * ErrNotExist errors from filepath.WalkDir are silently ignored for any path ++ except the top directory (WalkDir argument); any other error is returned to ++ the caller of WalkDir; ++ ++ * once any error is returned from any walkDirFunc instance, no more calls ++ to WalkDirFunc are made, and the error is returned to the caller of WalkDir; ++ ++ * if more than one WalkDirFunc instance will return an error, only one ++ of such errors will be propagated to and returned by WalkDir, others ++ will be silently discarded. ++ ++### Documentation ++ ++For the official documentation, see ++https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir ++ ++### Benchmarks ++ ++For a WalkDirFunc that consists solely of the return statement, this ++implementation is about 15% slower than the standard library's ++filepath.WalkDir. ++ ++Otherwise (if a WalkDirFunc is actually doing something) this is usually ++faster, except when the WalkDirN(..., 1) is used. Run `go test -bench .` ++to see how different operations can benefit from it, as well as how the ++level of parallelism affects the speed. +diff --git a/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go +new file mode 100644 +index 00000000..5d2d09a2 +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir.go +@@ -0,0 +1,123 @@ ++//go:build go1.16 ++// +build go1.16 ++ ++package pwalkdir ++ ++import ( ++ "errors" ++ "fmt" ++ "io/fs" ++ "path/filepath" ++ "runtime" ++ "sync" ++) ++ ++// Walk is a wrapper for filepath.WalkDir which can call multiple walkFn ++// in parallel, allowing to handle each item concurrently. A maximum of ++// twice the runtime.NumCPU() walkFn will be called at any one time. ++// If you want to change the maximum, use WalkN instead. ++// ++// The order of calls is non-deterministic. ++// ++// Note that this implementation only supports primitive error handling: ++// ++// - no errors are ever passed to walkFn; ++// ++// - once a walkFn returns any error, all further processing stops ++// and the error is returned to the caller of Walk; ++// ++// - filepath.SkipDir is not supported; ++// ++// - if more than one walkFn instance will return an error, only one ++// of such errors will be propagated and returned by Walk, others ++// will be silently discarded. ++func Walk(root string, walkFn fs.WalkDirFunc) error { ++ return WalkN(root, walkFn, runtime.NumCPU()*2) ++} ++ ++// WalkN is a wrapper for filepath.WalkDir which can call multiple walkFn ++// in parallel, allowing to handle each item concurrently. A maximum of ++// num walkFn will be called at any one time. ++// ++// Please see Walk documentation for caveats of using this function. ++func WalkN(root string, walkFn fs.WalkDirFunc, num int) error { ++ // make sure limit is sensible ++ if num < 1 { ++ return fmt.Errorf("walk(%q): num must be > 0", root) ++ } ++ ++ files := make(chan *walkArgs, 2*num) ++ errCh := make(chan error, 1) // Get the first error, ignore others. ++ ++ // Start walking a tree asap. ++ var ( ++ err error ++ wg sync.WaitGroup ++ ++ rootLen = len(root) ++ rootEntry *walkArgs ++ ) ++ wg.Add(1) ++ go func() { ++ err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error { ++ if err != nil { ++ // Walking a file tree can race with removal, ++ // so ignore ENOENT, except for root. ++ // https://github.com/opencontainers/selinux/issues/199. ++ if errors.Is(err, fs.ErrNotExist) && len(p) != rootLen { ++ return nil ++ } ++ close(files) ++ return err ++ } ++ if len(p) == rootLen { ++ // Root entry is processed separately below. ++ rootEntry = &walkArgs{path: p, entry: entry} ++ return nil ++ } ++ // Add a file to the queue unless a callback sent an error. ++ select { ++ case e := <-errCh: ++ close(files) ++ return e ++ default: ++ files <- &walkArgs{path: p, entry: entry} ++ return nil ++ } ++ }) ++ if err == nil { ++ close(files) ++ } ++ wg.Done() ++ }() ++ ++ wg.Add(num) ++ for i := 0; i < num; i++ { ++ go func() { ++ for file := range files { ++ if e := walkFn(file.path, file.entry, nil); e != nil { ++ select { ++ case errCh <- e: // sent ok ++ default: // buffer full ++ } ++ } ++ } ++ wg.Done() ++ }() ++ } ++ ++ wg.Wait() ++ ++ if err == nil { ++ err = walkFn(rootEntry.path, rootEntry.entry, nil) ++ } ++ ++ return err ++} ++ ++// walkArgs holds the arguments that were passed to the Walk or WalkN ++// functions. ++type walkArgs struct { ++ entry fs.DirEntry ++ path string ++} +diff --git a/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go +new file mode 100644 +index 00000000..e66a80d1 +--- /dev/null ++++ b/internal/third_party/selinux/pkg/pwalkdir/pwalkdir_test.go +@@ -0,0 +1,239 @@ ++//go:build go1.16 ++// +build go1.16 ++ ++package pwalkdir ++ ++import ( ++ "errors" ++ "io/fs" ++ "math/rand" ++ "os" ++ "path/filepath" ++ "runtime" ++ "sync/atomic" ++ "testing" ++ "time" ++) ++ ++func TestWalkDir(t *testing.T) { ++ var ac atomic.Uint32 ++ concurrency := runtime.NumCPU() * 2 ++ dir, total := prepareTestSet(t, 3, 2, 1) ++ ++ err := WalkN(dir, ++ func(_ string, _ fs.DirEntry, _ error) error { ++ ac.Add(1) ++ return nil ++ }, ++ concurrency) ++ if err != nil { ++ t.Errorf("Walk failed: %v", err) ++ } ++ count := ac.Load() ++ if count != total { ++ t.Errorf("File count mismatch: found %d, expected %d", count, total) ++ } ++ ++ t.Logf("concurrency: %d, files found: %d", concurrency, count) ++} ++ ++func TestWalkDirTopLevelErrNotExistNotIgnored(t *testing.T) { ++ err := WalkN("non-existent-directory", cbEmpty, 8) ++ if err == nil { ++ t.Fatal("expected ErrNotExist, got nil") ++ } ++} ++ ++// https://github.com/opencontainers/selinux/issues/199 ++func TestWalkDirRaceWithRemoval(t *testing.T) { ++ var ac atomic.Uint32 ++ concurrency := runtime.NumCPU() * 2 ++ // This test is still on a best-effort basis, meaning it can still pass ++ // when there is a bug in the code, but the larger the test set is, the ++ // higher the probability that this test fails (without a fix). ++ // ++ // With this set (4, 5, 6), and the fix commented out, it fails ++ // about 90 out of 100 runs on my machine. ++ dir, total := prepareTestSet(t, 4, 5, 6) ++ ++ // Make walk race with removal. ++ go os.RemoveAll(dir) ++ err := WalkN(dir, ++ func(_ string, _ fs.DirEntry, _ error) error { ++ ac.Add(1) ++ return nil ++ }, ++ concurrency) ++ count := ac.Load() ++ t.Logf("found %d of %d files", count, total) ++ if err != nil { ++ t.Fatalf("expected nil, got %v", err) ++ } ++} ++ ++func TestWalkDirManyErrors(t *testing.T) { ++ var ac atomic.Uint32 ++ dir, total := prepareTestSet(t, 3, 3, 2) ++ ++ maxFiles := total / 2 ++ e42 := errors.New("42") ++ err := Walk(dir, ++ func(_ string, _ fs.DirEntry, _ error) error { ++ if ac.Add(1) > maxFiles { ++ return e42 ++ } ++ return nil ++ }) ++ count := ac.Load() ++ t.Logf("found %d of %d files", count, total) ++ ++ if err == nil { ++ t.Error("Walk succeeded, but error is expected") ++ if count != total { ++ t.Errorf("File count mismatch: found %d, expected %d", count, total) ++ } ++ } ++} ++ ++func makeManyDirs(prefix string, levels, dirs, files int) (count uint32, err error) { ++ for d := 0; d < dirs; d++ { ++ var dir string ++ dir, err = os.MkdirTemp(prefix, "d-") ++ if err != nil { ++ return count, err ++ } ++ count++ ++ for f := 0; f < files; f++ { ++ var fi *os.File ++ fi, err = os.CreateTemp(dir, "f-") ++ if err != nil { ++ return count, err ++ } ++ fi.Close() ++ count++ ++ } ++ if levels == 0 { ++ continue ++ } ++ var c uint32 ++ if c, err = makeManyDirs(dir, levels-1, dirs, files); err != nil { ++ return count, err ++ } ++ count += c ++ } ++ ++ return count, err ++} ++ ++// prepareTestSet() creates a directory tree of shallow files, ++// to be used for testing or benchmarking. ++// ++// Total dirs: dirs^levels + dirs^(levels-1) + ... + dirs^1 ++// Total files: total_dirs * files ++func prepareTestSet(tb testing.TB, levels, dirs, files int) (dir string, total uint32) { ++ tb.Helper() ++ var err error ++ ++ dir = tb.TempDir() ++ total, err = makeManyDirs(dir, levels, dirs, files) ++ if err != nil { ++ tb.Fatal(err) ++ } ++ total++ // this dir ++ ++ return dir, total ++} ++ ++type walkerFunc func(root string, walkFn fs.WalkDirFunc) error ++ ++func genWalkN(n int) walkerFunc { ++ return func(root string, walkFn fs.WalkDirFunc) error { ++ return WalkN(root, walkFn, n) ++ } ++} ++ ++func BenchmarkWalk(b *testing.B) { ++ const ( ++ levels = 5 // how deep ++ dirs = 3 // dirs on each levels ++ files = 8 // files on each levels ++ ) ++ ++ benchmarks := []struct { ++ walk fs.WalkDirFunc ++ name string ++ }{ ++ {name: "Empty", walk: cbEmpty}, ++ {name: "ReadFile", walk: cbReadFile}, ++ {name: "ChownChmod", walk: cbChownChmod}, ++ {name: "RandomSleep", walk: cbRandomSleep}, ++ } ++ ++ walkers := []struct { ++ walker walkerFunc ++ name string ++ }{ ++ {name: "filepath.WalkDir", walker: filepath.WalkDir}, ++ {name: "pwalkdir.Walk", walker: Walk}, ++ // test WalkN with various values of N ++ {name: "pwalkdir.Walk1", walker: genWalkN(1)}, ++ {name: "pwalkdir.Walk2", walker: genWalkN(2)}, ++ {name: "pwalkdir.Walk4", walker: genWalkN(4)}, ++ {name: "pwalkdir.Walk8", walker: genWalkN(8)}, ++ {name: "pwalkdir.Walk16", walker: genWalkN(16)}, ++ {name: "pwalkdir.Walk32", walker: genWalkN(32)}, ++ {name: "pwalkdir.Walk64", walker: genWalkN(64)}, ++ {name: "pwalkdir.Walk128", walker: genWalkN(128)}, ++ {name: "pwalkdir.Walk256", walker: genWalkN(256)}, ++ } ++ ++ dir, total := prepareTestSet(b, levels, dirs, files) ++ b.Logf("dataset: %d levels x %d dirs x %d files, total entries: %d", levels, dirs, files, total) ++ ++ for _, bm := range benchmarks { ++ for _, w := range walkers { ++ walker := w.walker ++ walkFn := bm.walk ++ // preheat ++ if err := w.walker(dir, bm.walk); err != nil { ++ b.Errorf("walk failed: %v", err) ++ } ++ // benchmark ++ b.Run(bm.name+"/"+w.name, func(b *testing.B) { ++ for i := 0; i < b.N; i++ { ++ if err := walker(dir, walkFn); err != nil { ++ b.Errorf("walk failed: %v", err) ++ } ++ } ++ }) ++ } ++ } ++} ++ ++func cbEmpty(_ string, _ fs.DirEntry, _ error) error { ++ return nil ++} ++ ++func cbChownChmod(path string, e fs.DirEntry, _ error) error { ++ _ = os.Chown(path, 0, 0) ++ mode := os.FileMode(0o644) ++ if e.IsDir() { ++ mode = os.FileMode(0o755) ++ } ++ _ = os.Chmod(path, mode) ++ ++ return nil ++} ++ ++func cbReadFile(path string, e fs.DirEntry, _ error) error { ++ var err error ++ if e.Type().IsRegular() { ++ _, err = os.ReadFile(path) ++ } ++ return err ++} ++ ++func cbRandomSleep(_ string, _ fs.DirEntry, _ error) error { ++ time.Sleep(time.Duration(rand.Intn(500)) * time.Microsecond) //nolint:gosec // ignore G404: Use of weak random number generator ++ return nil ++} +diff --git a/libcontainer/apparmor/apparmor_linux.go b/libcontainer/apparmor/apparmor_linux.go +index 17d36ed1..a3a8e932 100644 +--- a/libcontainer/apparmor/apparmor_linux.go ++++ b/libcontainer/apparmor/apparmor_linux.go +@@ -6,6 +6,9 @@ import ( + "os" + "sync" + ++ "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/utils" + ) + +@@ -36,19 +39,13 @@ func setProcAttr(attr, value string) error { + // Under AppArmor you can only change your own attr, so there's no reason + // to not use /proc/thread-self/ (instead of /proc//, like libapparmor + // does). +- attrPath, closer := utils.ProcThreadSelf(attrSubPath) +- defer closer() +- +- f, err := os.OpenFile(attrPath, os.O_WRONLY, 0) ++ f, closer, err := pathrs.ProcThreadSelfOpen(attrSubPath, unix.O_WRONLY|unix.O_CLOEXEC) + if err != nil { + return err + } ++ defer closer() + defer f.Close() + +- if err := utils.EnsureProcHandle(f); err != nil { +- return err +- } +- + _, err = f.WriteString(value) + return err + } +diff --git a/libcontainer/console_linux.go b/libcontainer/console_linux.go +index e506853e..c93151bc 100644 +--- a/libcontainer/console_linux.go ++++ b/libcontainer/console_linux.go +@@ -1,43 +1,164 @@ + package libcontainer + + import ( ++ "errors" ++ "fmt" + "os" ++ "runtime" + ++ "github.com/containerd/console" + "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/runc/internal/linux" ++ "github.com/opencontainers/runc/internal/pathrs" ++ "github.com/opencontainers/runc/internal/sys" ++ "github.com/opencontainers/runc/libcontainer/utils" + ) + +-// mount initializes the console inside the rootfs mounting with the specified mount label +-// and applying the correct ownership of the console. +-func mountConsole(slavePath string) error { +- f, err := os.Create("/dev/console") +- if err != nil && !os.IsExist(err) { +- return err ++// checkPtmxHandle checks that the given file handle points to a real ++// /dev/pts/ptmx device inode on a real devpts mount. We cannot (trivially) ++// check that it is *the* /dev/pts for the container itself, but this is good ++// enough. ++func checkPtmxHandle(ptmx *os.File) error { ++ //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs ++ const ( ++ PTMX_MAJOR = 5 // from TTYAUX_MAJOR in ++ PTMX_MINOR = 2 // from mknod_ptmx in fs/devpts/inode.c ++ PTMX_INO = 2 // from mknod_ptmx in fs/devpts/inode.c ++ ) ++ return sys.VerifyInode(ptmx, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { ++ if statfs.Type != unix.DEVPTS_SUPER_MAGIC { ++ return fmt.Errorf("ptmx handle is not on a real devpts mount: super magic is %#x", statfs.Type) ++ } ++ if stat.Ino != PTMX_INO { ++ return fmt.Errorf("ptmx handle has wrong inode number: %v", stat.Ino) ++ } ++ if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != unix.Mkdev(PTMX_MAJOR, PTMX_MINOR) { ++ return fmt.Errorf("ptmx handle is not a real char ptmx device: ftype %#x %d:%d", ++ stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) ++ } ++ return nil ++ }) ++} ++ ++func isPtyNoIoctlError(err error) bool { ++ // The kernel converts -ENOIOCTLCMD to -ENOTTY automatically, but handle ++ // -EINVAL just in case (which some drivers do, include pty). ++ return errors.Is(err, unix.EINVAL) || errors.Is(err, unix.ENOTTY) ++} ++ ++func getPtyPeer(pty console.Console, unsafePeerPath string, flags int) (*os.File, error) { ++ peer, err := linux.GetPtyPeer(pty.Fd(), unsafePeerPath, flags) ++ if err == nil || !isPtyNoIoctlError(err) { ++ return peer, err + } +- if f != nil { +- // Ensure permission bits (can be different because of umask). +- if err := f.Chmod(0o666); err != nil { +- return err ++ ++ // On pre-TIOCGPTPEER kernels (Linux < 4.13), we need to fallback to using ++ // the /dev/pts/$n path generated using TIOCGPTN. We can do some validation ++ // that the inode is correct because the Unix-98 pty has a consistent ++ // numbering scheme for the device number of the peer. ++ ++ peerNum, err := unix.IoctlGetUint32(int(pty.Fd()), unix.TIOCGPTN) ++ if err != nil { ++ return nil, fmt.Errorf("get peer number of pty: %w", err) ++ } ++ //nolint:revive,staticcheck,nolintlint // ignore "don't use ALL_CAPS" warning // nolintlint is needed to work around the different lint configs ++ const ( ++ UNIX98_PTY_SLAVE_MAJOR = 136 // from ++ ) ++ wantPeerDev := unix.Mkdev(UNIX98_PTY_SLAVE_MAJOR, peerNum) ++ ++ // Use O_PATH to avoid opening a bad inode before we validate it. ++ peerHandle, err := os.OpenFile(unsafePeerPath, unix.O_PATH|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return nil, err ++ } ++ defer peerHandle.Close() ++ ++ if err := sys.VerifyInode(peerHandle, func(stat *unix.Stat_t, statfs *unix.Statfs_t) error { ++ if statfs.Type != unix.DEVPTS_SUPER_MAGIC { ++ return fmt.Errorf("pty peer handle is not on a real devpts mount: super magic is %#x", statfs.Type) ++ } ++ if stat.Mode&unix.S_IFMT != unix.S_IFCHR || stat.Rdev != wantPeerDev { ++ return fmt.Errorf("pty peer handle is not the real char device for pty %d: ftype %#x %d:%d", ++ peerNum, stat.Mode&unix.S_IFMT, unix.Major(stat.Rdev), unix.Minor(stat.Rdev)) + } +- f.Close() ++ return nil ++ }); err != nil { ++ return nil, err + } +- return mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "") ++ ++ return pathrs.Reopen(peerHandle, flags) + } + +-// dupStdio opens the slavePath for the console and dups the fds to the current +-// processes stdio, fd 0,1,2. +-func dupStdio(slavePath string) error { +- fd, err := unix.Open(slavePath, unix.O_RDWR, 0) ++// safeAllocPty returns a new (ptmx, peer pty) allocation for use inside a ++// container. ++func safeAllocPty() (pty console.Console, peer *os.File, Err error) { ++ // TODO: Use openat2(RESOLVE_NO_SYMLINKS|RESOLVE_NO_XDEV). ++ ptmxHandle, err := os.OpenFile("/dev/pts/ptmx", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return nil, nil, err ++ } ++ defer ptmxHandle.Close() ++ ++ if err := checkPtmxHandle(ptmxHandle); err != nil { ++ return nil, nil, fmt.Errorf("verify ptmx handle: %w", err) ++ } ++ ++ ptyFile, err := pathrs.Reopen(ptmxHandle, unix.O_RDWR|unix.O_NOCTTY) ++ if err != nil { ++ return nil, nil, fmt.Errorf("reopen ptmx to get new pty pair: %w", err) ++ } ++ // On success, the ownership is transferred to pty. ++ defer func() { ++ if Err != nil { ++ _ = ptyFile.Close() ++ } ++ }() ++ ++ pty, unsafePeerPath, err := console.NewPtyFromFile(ptyFile) + if err != nil { +- return &os.PathError{ +- Op: "open", +- Path: slavePath, +- Err: err, ++ return nil, nil, err ++ } ++ defer func() { ++ if Err != nil { ++ _ = pty.Close() + } ++ }() ++ ++ peer, err = getPtyPeer(pty, unsafePeerPath, unix.O_RDWR|unix.O_NOCTTY) ++ if err != nil { ++ return nil, nil, fmt.Errorf("failed to get peer end of newly-allocated console: %w", err) ++ } ++ return pty, peer, nil ++} ++ ++// mountConsole bind-mounts the provided pty on top of /dev/console so programs ++// that operate on /dev/console operate on the correct container pty. ++func mountConsole(peerPty *os.File) error { ++ console, err := os.OpenFile("/dev/console", unix.O_NOFOLLOW|unix.O_CREAT|unix.O_CLOEXEC, 0o666) ++ if err != nil { ++ return fmt.Errorf("create /dev/console mount target: %w", err) + } ++ defer console.Close() ++ ++ dstFd, closer := utils.ProcThreadSelfFd(console.Fd()) ++ defer closer() ++ ++ mntSrc := &mountSource{ ++ Type: mountSourcePlain, ++ file: peerPty, ++ } ++ return mountViaFds(peerPty.Name(), mntSrc, "/dev/console", dstFd, "bind", unix.MS_BIND, "") ++} ++ ++// dupStdio replaces stdio with the given peerPty. ++func dupStdio(peerPty *os.File) error { + for _, i := range []int{0, 1, 2} { +- if err := unix.Dup3(fd, i, 0); err != nil { ++ if err := unix.Dup3(int(peerPty.Fd()), i, 0); err != nil { + return err + } + } ++ runtime.KeepAlive(peerPty) + return nil + } +diff --git a/libcontainer/criu_linux.go b/libcontainer/criu_linux.go +index 4c6ae714..18d7b906 100644 +--- a/libcontainer/criu_linux.go ++++ b/libcontainer/criu_linux.go +@@ -519,34 +519,9 @@ func (c *Container) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { + } + } + +-// makeCriuRestoreMountpoints makes the actual mountpoints for the +-// restore using CRIU. This function is inspired from the code in +-// rootfs_linux.go. +-func (c *Container) makeCriuRestoreMountpoints(m *configs.Mount) error { +- if m.Device == "cgroup" { +- // No mount point(s) need to be created: +- // +- // * for v1, mount points are saved by CRIU because +- // /sys/fs/cgroup is a tmpfs mount +- // +- // * for v2, /sys/fs/cgroup is a real mount, but +- // the mountpoint appears as soon as /sys is mounted +- return nil +- } +- // TODO: pass srcFD? Not sure if criu is impacted by issue #2484. +- me := mountEntry{Mount: m} +- // For all other filesystems, just make the target. +- if _, err := createMountpoint(c.config.Rootfs, me); err != nil { +- return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err) +- } +- return nil +-} +- +-// isPathInPrefixList is a small function for CRIU restore to make sure +-// mountpoints, which are on a tmpfs, are not created in the roofs. +-func isPathInPrefixList(path string, prefix []string) bool { +- for _, p := range prefix { +- if strings.HasPrefix(path, p+"/") { ++func isOnTmpfs(path string, mounts []*configs.Mount) bool { ++ for _, m := range mounts { ++ if m.Device == "tmpfs" && strings.HasPrefix(path, m.Destination+"/") { + return true + } + } +@@ -560,17 +535,6 @@ func isPathInPrefixList(path string, prefix []string) bool { + // This function also creates missing mountpoints as long as they + // are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. + func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { +- // First get a list of a all tmpfs mounts +- tmpfs := []string{} +- for _, m := range mounts { +- switch m.Device { +- case "tmpfs": +- tmpfs = append(tmpfs, m.Destination) +- } +- } +- // Now go through all mounts and create the mountpoints +- // if the mountpoints are not on a tmpfs, as CRIU will +- // restore the complete tmpfs content from its checkpoint. + umounts := []string{} + defer func() { + for _, u := range umounts { +@@ -586,28 +550,51 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error { + }) + } + }() ++ // Now go through all mounts and create the required mountpoints. + for _, m := range mounts { +- if !isPathInPrefixList(m.Destination, tmpfs) { +- if err := c.makeCriuRestoreMountpoints(m); err != nil { ++ // No cgroup mount point(s) need to be created: ++ // * for v1, mount points are saved by CRIU because ++ // /sys/fs/cgroup is a tmpfs mount; ++ // * for v2, /sys/fs/cgroup is a real mount, but ++ // the mountpoint appears as soon as /sys is mounted. ++ if m.Device == "cgroup" { ++ continue ++ } ++ // If the mountpoint is on a tmpfs, skip it as CRIU will ++ // restore the complete tmpfs content from its checkpoint. ++ if isOnTmpfs(m.Destination, mounts) { ++ continue ++ } ++ me := mountEntry{Mount: m} ++ if err := me.createOpenMountpoint(c.config.Rootfs); err != nil { ++ return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err) ++ } ++ if me.dstFile != nil { ++ defer me.dstFile.Close() ++ } ++ // If the mount point is a bind mount, we need to mount ++ // it now so that runc can create the necessary mount ++ // points for mounts in bind mounts. ++ // This also happens during initial container creation. ++ // Without this CRIU restore will fail ++ // See: https://github.com/opencontainers/runc/issues/2748 ++ // It is also not necessary to order the mount points ++ // because during initial container creation mounts are ++ // set up in the order they are configured. ++ if m.Device == "bind" { ++ if err := utils.WithProcfdFile(me.dstFile, func(dstFd string) error { ++ return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "") ++ }); err != nil { + return err + } +- // If the mount point is a bind mount, we need to mount +- // it now so that runc can create the necessary mount +- // points for mounts in bind mounts. +- // This also happens during initial container creation. +- // Without this CRIU restore will fail +- // See: https://github.com/opencontainers/runc/issues/2748 +- // It is also not necessary to order the mount points +- // because during initial container creation mounts are +- // set up in the order they are configured. +- if m.Device == "bind" { +- if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(dstFd string) error { +- return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "") +- }); err != nil { +- return err +- } +- umounts = append(umounts, m.Destination) +- } ++ umounts = append(umounts, m.Destination) ++ } ++ if me.dstFile != nil { ++ // As this is being done in a loop, the defer earlier will be ++ // delayed until all mountpoints are handled -- for a config with ++ // many mountpoints this could result in a lot of open files. So we ++ // opportunistically close the file as well as deferring it. ++ _ = me.dstFile.Close() + } + } + return nil +diff --git a/libcontainer/dmz/cloned_binary_linux.go b/libcontainer/dmz/cloned_binary_linux.go +index 1c034e4e..9d392760 100644 +--- a/libcontainer/dmz/cloned_binary_linux.go ++++ b/libcontainer/dmz/cloned_binary_linux.go +@@ -10,6 +10,7 @@ import ( + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/system" + ) + +@@ -67,7 +68,7 @@ func sealFile(f **os.File) error { + // When sealing an O_TMPFILE-style descriptor we need to + // re-open the path as O_PATH to clear the existing write + // handle we have. +- opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0) ++ opath, err := pathrs.Reopen(*f, unix.O_PATH|unix.O_CLOEXEC) + if err != nil { + return fmt.Errorf("reopen tmpfile: %w", err) + } +diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go +index eddbfba6..ee402095 100644 +--- a/libcontainer/init_linux.go ++++ b/libcontainer/init_linux.go +@@ -5,6 +5,7 @@ import ( + "encoding/json" + "errors" + "fmt" ++ "io" + "net" + "os" + "path/filepath" +@@ -21,6 +22,7 @@ import ( + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" + ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer/capabilities" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/configs" +@@ -379,12 +381,13 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { + // the UID owner of the console to be the user the process will run as (so + // they can actually control their console). + +- pty, slavePath, err := console.NewPty() ++ pty, peerPty, err := safeAllocPty() + if err != nil { + return err + } + // After we return from here, we don't need the console anymore. + defer pty.Close() ++ defer peerPty.Close() + + if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { + err = pty.Resize(console.WinSize{ +@@ -398,7 +401,7 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { + + // Mount the console inside our rootfs. + if mount { +- if err := mountConsole(slavePath); err != nil { ++ if err := mountConsole(peerPty); err != nil { + return err + } + } +@@ -409,7 +412,7 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { + runtime.KeepAlive(pty) + + // Now, dup over all the things. +- return dupStdio(slavePath) ++ return dupStdio(peerPty) + } + + // syncParentReady sends to the given pipe a JSON payload which indicates that +@@ -511,7 +514,12 @@ func setupUser(config *initConfig) error { + // We don't need to use /proc/thread-self here because setgroups is a + // per-userns file and thus is global to all threads in a thread-group. + // This lets us avoid having to do runtime.LockOSThread. +- setgroups, err := os.ReadFile("/proc/self/setgroups") ++ var setgroups []byte ++ setgroupsFile, err := pathrs.ProcSelfOpen("setgroups", unix.O_RDONLY) ++ if err == nil { ++ setgroups, err = io.ReadAll(setgroupsFile) ++ _ = setgroupsFile.Close() ++ } + if err != nil && !os.IsNotExist(err) { + return err + } +@@ -555,19 +563,16 @@ func setupUser(config *initConfig) error { + // The ownership needs to match because it is created outside of the container and needs to be + // localized. + func fixStdioPermissions(u *user.ExecUser) error { +- var null unix.Stat_t +- if err := unix.Stat("/dev/null", &null); err != nil { +- return &os.PathError{Op: "stat", Path: "/dev/null", Err: err} +- } + for _, file := range []*os.File{os.Stdin, os.Stdout, os.Stderr} { + var s unix.Stat_t + if err := unix.Fstat(int(file.Fd()), &s); err != nil { + return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} + } + +- // Skip chown if uid is already the one we want or any of the STDIO descriptors +- // were redirected to /dev/null. +- if int(s.Uid) == u.Uid || s.Rdev == null.Rdev { ++ // Skip chown if: ++ // - uid is already the one we want, or ++ // - fd is opened to /dev/null. ++ if int(s.Uid) == u.Uid || isDevNull(&s) { + continue + } + +diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go +index e8a2dc53..c0fbd101 100644 +--- a/libcontainer/integration/exec_test.go ++++ b/libcontainer/integration/exec_test.go +@@ -14,12 +14,13 @@ import ( + "syscall" + "testing" + ++ "github.com/opencontainers/runc/internal/linux" ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/systemd" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/internal/userns" +- "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/runtime-spec/specs-go" + + "golang.org/x/sys/unix" +@@ -1695,11 +1696,9 @@ func TestFdLeaksSystemd(t *testing.T) { + } + + func fdList(t *testing.T) []string { +- procSelfFd, closer := utils.ProcThreadSelf("fd") +- defer closer() +- +- fdDir, err := os.Open(procSelfFd) ++ fdDir, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) + ok(t, err) ++ defer closer() + defer fdDir.Close() + + fds, err := fdDir.Readdirnames(-1) +@@ -1738,8 +1737,10 @@ func testFdLeaks(t *testing.T, systemd bool) { + + count := 0 + +- procSelfFd, closer := utils.ProcThreadSelf("fd/") ++ procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) ++ ok(t, err) + defer closer() ++ defer procSelfFd.Close() + + next_fd: + for _, fd1 := range fds1 { +@@ -1748,7 +1749,7 @@ next_fd: + continue next_fd + } + } +- dst, _ := os.Readlink(filepath.Join(procSelfFd, fd1)) ++ dst, _ := linux.Readlinkat(procSelfFd, fd1) + for _, ex := range excludedPaths { + if ex == dst { + continue next_fd +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index f7cd95dd..377642c9 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -5,14 +5,15 @@ import ( + "errors" + "fmt" + "os" +- "path" + "path/filepath" ++ "runtime" + "strconv" + "strings" + "syscall" + "time" + + securejoin "github.com/cyphar/filepath-securejoin" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" + "github.com/moby/sys/mountinfo" + "github.com/moby/sys/userns" + "github.com/mrunalp/fileutils" +@@ -21,6 +22,8 @@ import ( + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + ++ "github.com/opencontainers/runc/internal/pathrs" ++ "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/configs" +@@ -43,6 +46,7 @@ type mountConfig struct { + type mountEntry struct { + *configs.Mount + srcFile *mountSource ++ dstFile *os.File + } + + // srcName is only meant for error messages, it returns a "friendly" name. +@@ -281,8 +285,8 @@ func cleanupTmp(tmpdir string) { + _ = os.RemoveAll(tmpdir) + } + +-func mountCgroupV1(m *configs.Mount, c *mountConfig) error { +- binds, err := getCgroupMounts(m) ++func mountCgroupV1(m mountEntry, c *mountConfig) error { ++ binds, err := getCgroupMounts(m.Mount) + if err != nil { + return err + } +@@ -313,7 +317,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { + // inside the tmpfs, so we don't want to resolve symlinks). + subsystemPath := filepath.Join(c.root, b.Destination) + subsystemName := filepath.Base(b.Destination) +- if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { ++ if err := pathrs.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { + return err + } + if err := utils.WithProcfd(c.root, b.Destination, func(dstFd string) error { +@@ -352,8 +356,8 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { + return nil + } + +-func mountCgroupV2(m *configs.Mount, c *mountConfig) error { +- err := utils.WithProcfd(c.root, m.Destination, func(dstFd string) error { ++func mountCgroupV2(m mountEntry, c *mountConfig) error { ++ err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, nil, m.Destination, dstFd, "cgroup2", uintptr(m.Flags), m.Data) + }) + if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) { +@@ -382,14 +386,14 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error { + // + // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted + // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`). +- err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { +- return maskPath(procfd, c.label) ++ err = utils.WithProcfdFile(m.dstFile, func(procfd string) error { ++ return maskPaths([]string{procfd}, c.label) + }) + } + return err + } + +-func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { ++func doTmpfsCopyUp(m mountEntry, mountLabel string) (Err error) { + // Set up a scratch dir for the tmpfs on the host. + tmpdir, err := prepareTmp("/tmp") + if err != nil { +@@ -402,13 +406,19 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { + } + defer os.RemoveAll(tmpDir) + +- // Configure the *host* tmpdir as if it's the container mount. We change +- // m.Destination since we are going to mount *on the host*. +- oldDest := m.Destination +- m.Destination = tmpDir +- err = mountPropagate(m, "/", mountLabel) +- m.Destination = oldDest ++ tmpDirFile, err := os.OpenFile(tmpDir, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { ++ return fmt.Errorf("tmpcopyup: %w", err) ++ } ++ defer tmpDirFile.Close() ++ ++ // Configure the *host* tmpdir as if it's the container mount. We change ++ // m.dstFile since we are going to mount *on the host*. ++ hostMount := mountEntry{ ++ Mount: m.Mount, ++ dstFile: tmpDirFile, ++ } ++ if err := hostMount.mountPropagate("/", mountLabel); err != nil { + return err + } + defer func() { +@@ -419,7 +429,7 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { + } + }() + +- return utils.WithProcfd(rootfs, m.Destination, func(dstFd string) (Err error) { ++ return utils.WithProcfdFile(m.dstFile, func(dstFd string) (Err error) { + // Copy the container data to the host tmpdir. We append "/" to force + // CopyDirectory to resolve the symlink rather than trying to copy the + // symlink itself. +@@ -481,72 +491,76 @@ func statfsToMountFlags(st unix.Statfs_t) int { + + var errRootfsToFile = errors.New("config tries to change rootfs to file") + +-func createMountpoint(rootfs string, m mountEntry) (string, error) { +- dest, err := securejoin.SecureJoin(rootfs, m.Destination) ++func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { ++ unsafePath := utils.StripRoot(rootfs, m.Destination) ++ dstFile, err := pathrs.OpenInRoot(rootfs, unsafePath, unix.O_PATH) ++ defer func() { ++ if dstFile != nil && Err != nil { ++ _ = dstFile.Close() ++ } ++ }() + if err != nil { +- return "", err +- } +- if err := checkProcMount(rootfs, dest, m); err != nil { +- return "", fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err) +- } ++ if !errors.Is(err, unix.ENOENT) { ++ return fmt.Errorf("lookup mountpoint target: %w", err) ++ } + +- switch m.Device { +- case "bind": +- fi, _, err := m.srcStat() +- if err != nil { +- // Error out if the source of a bind mount does not exist as we +- // will be unable to bind anything to it. +- return "", err +- } +- // If the original source is not a directory, make the target a file. +- if !fi.IsDir() { +- // Make sure we aren't tricked into trying to make the root a file. +- if rootfs == dest { +- return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile) +- } +- // Make the parent directory. +- destDir, destBase := filepath.Split(dest) +- destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755) ++ // If the mountpoint doesn't already exist, we want to create a mountpoint ++ // that makes sense for the source. For file bind-mounts this is an empty ++ // file, for everything else it's a directory. ++ dstIsFile := false ++ if m.Device == "bind" { ++ fi, _, err := m.srcStat() + if err != nil { +- return "", fmt.Errorf("make parent dir of file bind-mount: %w", err) +- } +- defer destDirFd.Close() +- // Make the target file. We want to avoid opening any file that is +- // already there because it could be a "bad" file like an invalid +- // device or hung tty that might cause a DoS, so we use mknodat. +- // destBase does not contain any "/" components, and mknodat does +- // not follow trailing symlinks, so we can safely just call mknodat +- // here. +- if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil { +- // If we get EEXIST, there was already an inode there and +- // we can consider that a success. +- if !errors.Is(err, unix.EEXIST) { +- err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err} +- return "", fmt.Errorf("create target of file bind-mount: %w", err) +- } ++ // Error out if the source of a bind mount does not exist as we ++ // will be unable to bind anything to it. ++ return err + } +- // Nothing left to do. +- return dest, nil ++ dstIsFile = !fi.IsDir() + } + +- case "tmpfs": +- // If the original target exists, copy the mode for the tmpfs mount. +- if stat, err := os.Stat(dest); err == nil { +- dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) +- if m.Data != "" { +- dt = dt + "," + m.Data +- } +- m.Data = dt ++ if dstIsFile { ++ dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644) ++ } else { ++ dstFile, err = pathrs.MkdirAllInRootOpen(rootfs, unsafePath, 0o755) ++ } ++ if err != nil { ++ return fmt.Errorf("make mountpoint %q: %w", m.Destination, err) ++ } ++ } + +- // Nothing left to do. +- return dest, nil ++ if m.Device == "tmpfs" { ++ // If the original target exists, copy the mode for the tmpfs mount. ++ stat, err := dstFile.Stat() ++ if err != nil { ++ return fmt.Errorf("check tmpfs source mode: %w", err) + } ++ dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode())) ++ if m.Data != "" { ++ dt = dt + "," + m.Data ++ } ++ m.Data = dt + } + +- if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { +- return "", err ++ dstFullPath, err := procfs.ProcSelfFdReadlink(dstFile) ++ if err != nil { ++ return fmt.Errorf("get mount destination real path: %w", err) ++ } ++ if !pathrs.IsLexicallyInRoot(rootfs, dstFullPath) { ++ return fmt.Errorf("mountpoint %q is outside of rootfs %q", dstFullPath, rootfs) ++ } ++ if relPath, err := filepath.Rel(rootfs, dstFullPath); err != nil { ++ return fmt.Errorf("get relative path of %q: %w", dstFullPath, err) ++ } else if relPath == "." { ++ return fmt.Errorf("mountpoint %q is on the top of rootfs %q", dstFullPath, rootfs) + } +- return dest, nil ++ // TODO: Make checkProcMount use dstFile directly to avoid the need to ++ // operate on paths here. ++ if err := checkProcMount(rootfs, dstFullPath, *m); err != nil { ++ return fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err) ++ } ++ // Update mountEntry. ++ m.dstFile = dstFile ++ return nil + } + + func mountToRootfs(c *mountConfig, m mountEntry) error { +@@ -562,7 +576,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { + // TODO: This won't be necessary once we switch to libpathrs and we can + // stop all of these symlink-exchange attacks. + dest := filepath.Clean(m.Destination) +- if !utils.IsLexicallyInRoot(rootfs, dest) { ++ if !pathrs.IsLexicallyInRoot(rootfs, dest) { + // Do not use securejoin as it resolves symlinks. + dest = filepath.Join(rootfs, dest) + } +@@ -576,36 +590,47 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { + } else if !fi.IsDir() { + return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) + } +- if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { ++ dstFile, err := pathrs.MkdirAllInRootOpen(rootfs, dest, 0o755) ++ if err != nil { + return err + } +- // Selinux kernels do not support labeling of /proc or /sys. +- return mountPropagate(m, rootfs, "") ++ defer dstFile.Close() ++ // "proc" and "sys" mounts need special handling (without resolving the ++ // destination) to avoid attacks. ++ m.dstFile = dstFile ++ return m.mountPropagate(rootfs, "") + } + +- dest, err := createMountpoint(rootfs, m) +- if err != nil { ++ mountLabel := c.label ++ if err := m.createOpenMountpoint(rootfs); err != nil { + return fmt.Errorf("create mountpoint for %s mount: %w", m.Destination, err) + } +- mountLabel := c.label ++ defer func() { ++ if m.dstFile != nil { ++ _ = m.dstFile.Close() ++ m.dstFile = nil ++ } ++ }() + + switch m.Device { + case "mqueue": +- if err := mountPropagate(m, rootfs, ""); err != nil { ++ if err := m.mountPropagate(rootfs, ""); err != nil { + return err + } +- return label.SetFileLabel(dest, mountLabel) ++ return utils.WithProcfdFile(m.dstFile, func(dstFd string) error { ++ return label.SetFileLabel(dstFd, mountLabel) ++ }) + case "tmpfs": ++ var err error + if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP { +- err = doTmpfsCopyUp(m, rootfs, mountLabel) ++ err = doTmpfsCopyUp(m, mountLabel) + } else { +- err = mountPropagate(m, rootfs, mountLabel) ++ err = m.mountPropagate(rootfs, mountLabel) + } +- + return err + case "bind": + // open_tree()-related shenanigans are all handled in mountViaFds. +- if err := mountPropagate(m, rootfs, mountLabel); err != nil { ++ if err := m.mountPropagate(rootfs, mountLabel); err != nil { + return err + } + +@@ -619,7 +644,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { + // contrast to mount(8)'s current behaviour, but is what users probably + // expect. See . + if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 { +- if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { ++ if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT + // The runtime-spec says we SHOULD map to the relevant mount(8) + // behaviour. However, it's not clear whether we want the +@@ -712,14 +737,14 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { + return err + } + } +- return setRecAttr(m.Mount, rootfs) ++ return setRecAttr(m) + case "cgroup": + if cgroups.IsCgroup2UnifiedMode() { +- return mountCgroupV2(m.Mount, c) ++ return mountCgroupV2(m, c) + } +- return mountCgroupV1(m.Mount, c) ++ return mountCgroupV1(m, c) + default: +- return mountPropagate(m, rootfs, mountLabel) ++ return m.mountPropagate(rootfs, mountLabel) + } + } + +@@ -867,20 +892,20 @@ func setupDevSymlinks(rootfs string) error { + // needs to be called after we chroot/pivot into the container's rootfs so that any + // symlinks are resolved locally. + func reOpenDevNull() error { +- var stat, devNullStat unix.Stat_t + file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) + if err != nil { + return err + } +- defer file.Close() //nolint: errcheck +- if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil { +- return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} ++ defer file.Close() ++ if err := verifyDevNull(file); err != nil { ++ return fmt.Errorf("can't reopen /dev/null: %w", err) + } + for fd := 0; fd < 3; fd++ { ++ var stat unix.Stat_t + if err := unix.Fstat(fd, &stat); err != nil { + return &os.PathError{Op: "fstat", Path: "fd " + strconv.Itoa(fd), Err: err} + } +- if stat.Rdev == devNullStat.Rdev { ++ if isDevNull(&stat) { + // Close and re-open the fd. + if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { + return &os.PathError{ +@@ -913,16 +938,15 @@ func createDevices(config *configs.Config) error { + return nil + } + +-func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error { +- f, err := os.Create(dest) +- if err != nil && !os.IsExist(err) { +- return err +- } +- if f != nil { +- _ = f.Close() ++func bindMountDeviceNode(destDir *os.File, destName string, node *devices.Device) error { ++ dstFile, err := utils.Openat(destDir, destName, unix.O_CREAT|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0o000) ++ if err != nil { ++ return fmt.Errorf("create device inode %s: %w", node.Path, err) + } +- return utils.WithProcfd(rootfs, dest, func(dstFd string) error { +- return mountViaFds(node.Path, nil, dest, dstFd, "bind", unix.MS_BIND, "") ++ defer dstFile.Close() ++ ++ return utils.WithProcfdFile(dstFile, func(dstFd string) error { ++ return mountViaFds(node.Path, nil, dstFile.Name(), dstFd, "bind", unix.MS_BIND, "") + }) + } + +@@ -932,31 +956,33 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { + // The node only exists for cgroup reasons, ignore it here. + return nil + } +- dest, err := securejoin.SecureJoin(rootfs, node.Path) ++ destPath, err := securejoin.SecureJoin(rootfs, node.Path) + if err != nil { + return err + } +- if dest == rootfs { ++ if destPath == rootfs { + return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile) + } +- if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil { +- return err ++ destDirPath, destName := filepath.Split(destPath) ++ destDir, err := pathrs.MkdirAllInRootOpen(rootfs, destDirPath, 0o755) ++ if err != nil { ++ return fmt.Errorf("mkdir parent of device inode %q: %w", node.Path, err) + } + if bind { +- return bindMountDeviceNode(rootfs, dest, node) ++ return bindMountDeviceNode(destDir, destName, node) + } +- if err := mknodDevice(dest, node); err != nil { ++ if err := mknodDevice(destDir, destName, node); err != nil { + if errors.Is(err, os.ErrExist) { + return nil + } else if errors.Is(err, os.ErrPermission) { +- return bindMountDeviceNode(rootfs, dest, node) ++ return bindMountDeviceNode(destDir, destName, node) + } + return err + } + return nil + } + +-func mknodDevice(dest string, node *devices.Device) error { ++func mknodDevice(destDir *os.File, destName string, node *devices.Device) error { + fileMode := node.FileMode + switch node.Type { + case devices.BlockDevice: +@@ -972,14 +998,44 @@ func mknodDevice(dest string, node *devices.Device) error { + if err != nil { + return err + } +- if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil { +- return &os.PathError{Op: "mknod", Path: dest, Err: err} ++ if err := unix.Mknodat(int(destDir.Fd()), destName, uint32(fileMode), int(dev)); err != nil { ++ return &os.PathError{Op: "mknodat", Path: filepath.Join(destDir.Name(), destName), Err: err} + } +- // Ensure permission bits (can be different because of umask). +- if err := os.Chmod(dest, fileMode); err != nil { ++ ++ // Get a handle and verify that it matches the expected inode type and ++ // major:minor before we operate on it. ++ devFile, err := utils.Openat(destDir, destName, unix.O_NOFOLLOW|unix.O_PATH, 0) ++ if err != nil { ++ return fmt.Errorf("open new %c device inode %s: %w", node.Type, node.Path, err) ++ } ++ defer devFile.Close() ++ ++ if err := sys.VerifyInode(devFile, func(stat *unix.Stat_t, _ *unix.Statfs_t) error { ++ if stat.Mode&unix.S_IFMT != uint32(fileMode)&unix.S_IFMT { ++ return fmt.Errorf("new %c device inode %s has incorrect ftype: %#x doesn't match expected %#v", ++ node.Type, node.Path, ++ stat.Mode&unix.S_IFMT, fileMode&unix.S_IFMT) ++ } ++ if stat.Rdev != dev { ++ return fmt.Errorf("new %c device inode %s has incorrect major:minor: %d:%d doesn't match expected %d:%d", ++ node.Type, node.Path, ++ unix.Major(stat.Rdev), unix.Minor(stat.Rdev), ++ unix.Major(dev), unix.Minor(dev)) ++ } ++ return nil ++ }); err != nil { + return err + } +- return os.Chown(dest, int(node.Uid), int(node.Gid)) ++ ++ // Ensure permission bits (can be different because of umask). ++ if err := sys.FchmodFile(devFile, uint32(fileMode)); err != nil { ++ return fmt.Errorf("update new %c device inode %s file mode: %w", node.Type, node.Path, err) ++ } ++ if err := sys.FchownFile(devFile, int(node.Uid), int(node.Gid)); err != nil { ++ return fmt.Errorf("update new %c device inode %s owner: %w", node.Type, node.Path, err) ++ } ++ runtime.KeepAlive(devFile) ++ return nil + } + + // rootfsParentMountPrivate ensures rootfs parent mount is private. +@@ -1233,31 +1289,111 @@ func remountReadonly(m *configs.Mount) error { + return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) + } + +-// maskPath masks the top of the specified path inside a container to avoid ++func isDevNull(st *unix.Stat_t) bool { ++ return st.Mode&unix.S_IFMT == unix.S_IFCHR && st.Rdev == unix.Mkdev(1, 3) ++} ++ ++func verifyDevNull(f *os.File) error { ++ return sys.VerifyInode(f, func(st *unix.Stat_t, _ *unix.Statfs_t) error { ++ if !isDevNull(st) { ++ return errors.New("container's /dev/null is invalid") ++ } ++ return nil ++ }) ++} ++ ++// maskPaths masks the top of the specified paths inside a container to avoid + // security issues from processes reading information from non-namespace aware + // mounts ( proc/kcore ). + // For files, maskPath bind mounts /dev/null over the top of the specified path. + // For directories, maskPath mounts read-only tmpfs over the top of the specified path. +-func maskPath(path string, mountLabel string) error { +- if err := mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !errors.Is(err, os.ErrNotExist) { +- if errors.Is(err, unix.ENOTDIR) { +- return mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) ++func maskPaths(paths []string, mountLabel string) error { ++ devNull, err := os.OpenFile("/dev/null", unix.O_PATH, 0) ++ if err != nil { ++ return fmt.Errorf("can't mask paths: %w", err) ++ } ++ defer devNull.Close() ++ if err := verifyDevNull(devNull); err != nil { ++ return fmt.Errorf("can't mask paths: %w", err) ++ } ++ devNullSrc := &mountSource{Type: mountSourcePlain, file: devNull} ++ procSelfFd, closer := utils.ProcThreadSelf("fd/") ++ defer closer() ++ ++ for _, path := range paths { ++ // Open the target path; skip if it doesn't exist. ++ dstFh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) ++ if err != nil { ++ if errors.Is(err, os.ErrNotExist) { ++ continue ++ } ++ return fmt.Errorf("can't mask path %q: %w", path, err) ++ } ++ st, err := dstFh.Stat() ++ if err != nil { ++ dstFh.Close() ++ return fmt.Errorf("can't mask path %q: %w", path, err) ++ } ++ var dstType string ++ if st.IsDir() { ++ // Destination is a directory: bind mount a ro tmpfs over it. ++ dstType = "dir" ++ err = mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) ++ } else { ++ // Destination is a file: mount it to /dev/null. ++ dstType = "path" ++ dstFd := filepath.Join(procSelfFd, strconv.Itoa(int(dstFh.Fd()))) ++ err = mountViaFds("", devNullSrc, path, dstFd, "", unix.MS_BIND, "") ++ } ++ dstFh.Close() ++ if err != nil { ++ return fmt.Errorf("can't mask %s %q: %w", dstType, path, err) + } +- return err + } ++ + return nil + } + +-// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. +-// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. +-func writeSystemProperty(key, value string) error { +- keyPath := strings.Replace(key, ".", "/", -1) +- return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) ++func reopenAfterMount(rootfs string, f *os.File, flags int) (_ *os.File, Err error) { ++ fullPath, err := procfs.ProcSelfFdReadlink(f) ++ if err != nil { ++ return nil, fmt.Errorf("get full path: %w", err) ++ } ++ if !pathrs.IsLexicallyInRoot(rootfs, fullPath) { ++ return nil, fmt.Errorf("mountpoint %q is outside of rootfs %q", fullPath, rootfs) ++ } ++ unsafePath := utils.StripRoot(rootfs, fullPath) ++ reopened, err := pathrs.OpenInRoot(rootfs, unsafePath, flags) ++ if err != nil { ++ return nil, fmt.Errorf("re-open mountpoint %q: %w", unsafePath, err) ++ } ++ defer func() { ++ if Err != nil { ++ _ = reopened.Close() ++ } ++ }() ++ ++ // NOTE: The best we can do here is confirm that the new mountpoint handle ++ // matches the original target handle, but an attacker could've swapped a ++ // different path to replace it. In the worst case this could result in us ++ // applying later vfsmount flags onto the wrong mount. ++ // ++ // This is far from ideal, but the only way of doing this in a race-free ++ // way is to switch the new mount API (move_mount(2) does not require this ++ // re-opening step, and thus no such races are possible). ++ reopenedFullPath, err := procfs.ProcSelfFdReadlink(reopened) ++ if err != nil { ++ return nil, fmt.Errorf("check full path of re-opened mountpoint: %w", err) ++ } ++ if reopenedFullPath != fullPath { ++ return nil, fmt.Errorf("mountpoint %q was moved while re-opening", unsafePath) ++ } ++ return reopened, nil + } + + // Do the mount operation followed by additional mounts required to take care + // of propagation flags. This will always be scoped inside the container rootfs. +-func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { ++func (m *mountEntry) mountPropagate(rootfs string, mountLabel string) error { + var ( + data = label.FormatMountLabel(m.Data, mountLabel) + flags = m.Flags +@@ -1270,19 +1406,30 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { + flags &= ^unix.MS_RDONLY + } + +- // Because the destination is inside a container path which might be +- // mutating underneath us, we verify that we are actually going to mount +- // inside the container with WithProcfd() -- mounting through a procfd +- // mounts on the target. +- if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { ++ if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + return mountViaFds(m.Source, m.srcFile, m.Destination, dstFd, m.Device, uintptr(flags), data) + }); err != nil { + return err + } ++ ++ // We need to re-open the mountpoint after doing the mount, in order for us ++ // to operate on the new mount we just created. However, we cannot use ++ // pathrs.Reopen because we need to re-resolve from the parent directory to ++ // get a new handle to the top mount. ++ // ++ // TODO: Use move_mount(2) on newer kernels so that this is no longer ++ // necessary on modern systems. ++ newDstFile, err := reopenAfterMount(rootfs, m.dstFile, unix.O_PATH) ++ if err != nil { ++ return fmt.Errorf("reopen mountpoint after mount: %w", err) ++ } ++ _ = m.dstFile.Close() ++ m.dstFile = newDstFile ++ + // We have to apply mount propagation flags in a separate WithProcfd() call + // because the previous call invalidates the passed procfd -- the mount + // target needs to be re-opened. +- if err := utils.WithProcfd(rootfs, m.Destination, func(dstFd string) error { ++ if err := utils.WithProcfdFile(m.dstFile, func(dstFd string) error { + for _, pflag := range m.PropagationFlags { + if err := mountViaFds("", nil, m.Destination, dstFd, "", uintptr(pflag), ""); err != nil { + return err +@@ -1295,11 +1442,11 @@ func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { + return nil + } + +-func setRecAttr(m *configs.Mount, rootfs string) error { ++func setRecAttr(m mountEntry) error { + if m.RecAttr == nil { + return nil + } +- return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { ++ return utils.WithProcfdFile(m.dstFile, func(procfd string) error { + return unix.MountSetattr(-1, procfd, unix.AT_RECURSIVE, m.RecAttr) + }) + } +diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go +index 9f7fa45d..6a46eff7 100644 +--- a/libcontainer/standard_init_linux.go ++++ b/libcontainer/standard_init_linux.go +@@ -11,6 +11,8 @@ import ( + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + ++ "github.com/opencontainers/runc/internal/pathrs" ++ "github.com/opencontainers/runc/internal/sys" + "github.com/opencontainers/runc/libcontainer/apparmor" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/keys" +@@ -130,20 +132,17 @@ func (l *linuxStandardInit) Init() error { + return fmt.Errorf("unable to apply apparmor profile: %w", err) + } + +- for key, value := range l.config.Config.Sysctl { +- if err := writeSystemProperty(key, value); err != nil { +- return err +- } ++ if err := sys.WriteSysctls(l.config.Config.Sysctl); err != nil { ++ return err + } + for _, path := range l.config.Config.ReadonlyPaths { + if err := readonlyPath(path); err != nil { + return fmt.Errorf("can't make %q read-only: %w", path, err) + } + } +- for _, path := range l.config.Config.MaskPaths { +- if err := maskPath(path, l.config.Config.MountLabel); err != nil { +- return fmt.Errorf("can't mask path %s: %w", path, err) +- } ++ ++ if err := maskPaths(l.config.Config.MaskPaths, l.config.Config.MountLabel); err != nil { ++ return err + } + pdeath, err := system.GetParentDeathSignal() + if err != nil { +@@ -244,19 +243,17 @@ func (l *linuxStandardInit) Init() error { + return fmt.Errorf("close log pipe: %w", err) + } + +- fifoPath, closer := utils.ProcThreadSelfFd(l.fifoFile.Fd()) +- defer closer() +- + // Wait for the FIFO to be opened on the other side before exec-ing the + // user process. We open it through /proc/self/fd/$fd, because the fd that + // was given to us was an O_PATH fd to the fifo itself. Linux allows us to + // re-open an O_PATH fd through /proc. +- fd, err := unix.Open(fifoPath, unix.O_WRONLY|unix.O_CLOEXEC, 0) ++ fifoFile, err := pathrs.Reopen(l.fifoFile, unix.O_WRONLY|unix.O_CLOEXEC) + if err != nil { +- return &os.PathError{Op: "open exec fifo", Path: fifoPath, Err: err} ++ return fmt.Errorf("reopen exec fifo: %w", err) + } +- if _, err := unix.Write(fd, []byte("0")); err != nil { +- return &os.PathError{Op: "write exec fifo", Path: fifoPath, Err: err} ++ defer fifoFile.Close() ++ if _, err := fifoFile.Write([]byte("0")); err != nil { ++ return &os.PathError{Op: "write exec fifo", Path: fifoFile.Name(), Err: err} + } + + // Close the O_PATH fifofd fd before exec because the kernel resets +@@ -265,6 +262,7 @@ func (l *linuxStandardInit) Init() error { + // N.B. the core issue itself (passing dirfds to the host filesystem) has + // since been resolved. + // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 ++ _ = fifoFile.Close() + _ = l.fifoFile.Close() + + s := l.config.SpecState +diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go +index 7bbf92a3..da3dbf53 100644 +--- a/libcontainer/system/linux.go ++++ b/libcontainer/system/linux.go +@@ -214,3 +214,23 @@ func SetLinuxPersonality(personality int) error { + } + return nil + } ++ ++// GetPtyPeer is a wrapper for ioctl(TIOCGPTPEER). ++func GetPtyPeer(ptyFd uintptr, unsafePeerPath string, flags int) (*os.File, error) { ++ // Make sure O_NOCTTY is always set -- otherwise runc might accidentally ++ // gain it as a controlling terminal. O_CLOEXEC also needs to be set to ++ // make sure we don't leak the handle either. ++ flags |= unix.O_NOCTTY | unix.O_CLOEXEC ++ ++ // There is no nice wrapper for this kind of ioctl in unix. ++ peerFd, _, errno := unix.Syscall( ++ unix.SYS_IOCTL, ++ ptyFd, ++ uintptr(unix.TIOCGPTPEER), ++ uintptr(flags), ++ ) ++ if errno != 0 { ++ return nil, os.NewSyscallError("ioctl TIOCGPTPEER", errno) ++ } ++ return os.NewFile(peerFd, unsafePeerPath), nil ++} +diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go +index 774443ec..34850dd8 100644 +--- a/libcontainer/system/proc.go ++++ b/libcontainer/system/proc.go +@@ -2,10 +2,12 @@ package system + + import ( + "fmt" ++ "io" + "os" +- "path/filepath" + "strconv" + "strings" ++ ++ "github.com/opencontainers/runc/internal/pathrs" + ) + + // State is the status of a process. +@@ -66,8 +68,16 @@ type Stat_t struct { + } + + // Stat returns a Stat_t instance for the specified process. +-func Stat(pid int) (stat Stat_t, err error) { +- bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) ++func Stat(pid int) (Stat_t, error) { ++ var stat Stat_t ++ ++ statFile, err := pathrs.ProcPidOpen(pid, "stat", os.O_RDONLY) ++ if err != nil { ++ return stat, err ++ } ++ defer statFile.Close() ++ ++ bytes, err := io.ReadAll(statFile) + if err != nil { + return stat, err + } +diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go +index db420ea6..3e008bd4 100644 +--- a/libcontainer/utils/utils.go ++++ b/libcontainer/utils/utils.go +@@ -65,11 +65,11 @@ func CleanPath(path string) string { + return filepath.Clean(path) + } + +-// stripRoot returns the passed path, stripping the root path if it was ++// StripRoot returns the passed path, stripping the root path if it was + // (lexicially) inside it. Note that both passed paths will always be treated + // as absolute, and the returned path will also always be absolute. In + // addition, the paths are cleaned before stripping the root. +-func stripRoot(root, path string) string { ++func StripRoot(root, path string) string { + // Make the paths clean and absolute. + root, path = CleanPath("/"+root), CleanPath("/"+path) + switch { +diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go +index 06c042f5..4b5fd833 100644 +--- a/libcontainer/utils/utils_test.go ++++ b/libcontainer/utils/utils_test.go +@@ -131,9 +131,9 @@ func TestStripRoot(t *testing.T) { + {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"}, + {"foo/bar", "foo/bar/baz/beets", "/baz/beets"}, + } { +- got := stripRoot(test.root, test.path) ++ got := StripRoot(test.root, test.path) + if got != test.out { +- t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out) ++ t.Errorf("StripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out) + } + } + } +diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go +index 8f179b6a..638878d7 100644 +--- a/libcontainer/utils/utils_unix.go ++++ b/libcontainer/utils/utils_unix.go +@@ -9,27 +9,15 @@ import ( + "path/filepath" + "runtime" + "strconv" +- "strings" + "sync" + _ "unsafe" // for go:linkname + + securejoin "github.com/cyphar/filepath-securejoin" ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + ) + +-// EnsureProcHandle returns whether or not the given file handle is on procfs. +-func EnsureProcHandle(fh *os.File) error { +- var buf unix.Statfs_t +- if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil { +- return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err) +- } +- if buf.Type != unix.PROC_SUPER_MAGIC { +- return fmt.Errorf("%s is not on procfs", fh.Name()) +- } +- return nil +-} +- + var ( + haveCloseRangeCloexecBool bool + haveCloseRangeCloexecOnce sync.Once +@@ -59,19 +47,13 @@ type fdFunc func(fd int) + // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in + // the current process. + func fdRangeFrom(minFd int, fn fdFunc) error { +- procSelfFd, closer := ProcThreadSelf("fd") +- defer closer() +- +- fdDir, err := os.Open(procSelfFd) ++ fdDir, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) + if err != nil { +- return err ++ return fmt.Errorf("get handle to /proc/thread-self/fd: %w", err) + } ++ defer closer() + defer fdDir.Close() + +- if err := EnsureProcHandle(fdDir); err != nil { +- return err +- } +- + fdList, err := fdDir.Readdirnames(-1) + if err != nil { + return err +@@ -164,8 +146,8 @@ func NewSockPair(name string) (parent, child *os.File, err error) { + // the passed closure (the file handle will be freed once the closure returns). + func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { + // Remove the root then forcefully resolve inside the root. +- unsafePath = stripRoot(root, unsafePath) +- path, err := securejoin.SecureJoin(root, unsafePath) ++ unsafePath = StripRoot(root, unsafePath) ++ fullPath, err := securejoin.SecureJoin(root, unsafePath) + if err != nil { + return fmt.Errorf("resolving path inside rootfs failed: %w", err) + } +@@ -174,7 +156,7 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { + defer closer() + + // Open the target path. +- fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) ++ fh, err := os.OpenFile(fullPath, unix.O_PATH|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("open o_path procfd: %w", err) + } +@@ -184,13 +166,24 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { + // Double-check the path is the one we expected. + if realpath, err := os.Readlink(procfd); err != nil { + return fmt.Errorf("procfd verification failed: %w", err) +- } else if realpath != path { ++ } else if realpath != fullPath { + return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) + } + + return fn(procfd) + } + ++// WithProcfdFile is a very minimal wrapper around [ProcThreadSelfFd], intended ++// to make migrating from [WithProcfd] and [WithProcfdPath] usage easier. The ++// caller is responsible for making sure that the provided file handle is ++// actually safe to operate on. ++func WithProcfdFile(file *os.File, fn func(procfd string) error) error { ++ fdpath, closer := ProcThreadSelfFd(file.Fd()) ++ defer closer() ++ ++ return fn(fdpath) ++} ++ + type ProcThreadSelfCloser func() + + var ( +@@ -262,88 +255,6 @@ func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { + return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) + } + +-// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), +-// but properly handling the case where path or root are "/". +-// +-// NOTE: The return value only make sense if the path doesn't contain "..". +-func IsLexicallyInRoot(root, path string) bool { +- if root != "/" { +- root += "/" +- } +- if path != "/" { +- path += "/" +- } +- return strings.HasPrefix(path, root) +-} +- +-// MkdirAllInRootOpen attempts to make +-// +-// path, _ := securejoin.SecureJoin(root, unsafePath) +-// os.MkdirAll(path, mode) +-// os.Open(path) +-// +-// safer against attacks where components in the path are changed between +-// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +-// try to detect any symlink components in the path while we are doing the +-// MkdirAll. +-// +-// NOTE: If unsafePath is a subpath of root, we assume that you have already +-// called SecureJoin and so we use the provided path verbatim without resolving +-// any symlinks (this is done in a way that avoids symlink-exchange races). +-// This means that the path also must not contain ".." elements, otherwise an +-// error will occur. +-// +-// This uses securejoin.MkdirAllHandle under the hood, but it has special +-// handling if unsafePath has already been scoped within the rootfs (this is +-// needed for a lot of runc callers and fixing this would require reworking a +-// lot of path logic). +-func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (_ *os.File, Err error) { +- // If the path is already "within" the root, get the path relative to the +- // root and use that as the unsafe path. This is necessary because a lot of +- // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring +- // all of them to stop using these SecureJoin'd paths would require a fair +- // amount of work. +- // TODO(cyphar): Do the refactor to libpathrs once it's ready. +- if IsLexicallyInRoot(root, unsafePath) { +- subPath, err := filepath.Rel(root, unsafePath) +- if err != nil { +- return nil, err +- } +- unsafePath = subPath +- } +- +- // Check for any silly mode bits. +- if mode&^0o7777 != 0 { +- return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) +- } +- // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if +- // passed. While it would make sense to return an error in that case (since +- // the user has asked for a mode that won't be applied), for compatibility +- // reasons we have to ignore these bits. +- if ignoredBits := mode &^ 0o1777; ignoredBits != 0 { +- logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits) +- mode &= 0o1777 +- } +- +- rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) +- if err != nil { +- return nil, fmt.Errorf("open root handle: %w", err) +- } +- defer rootDir.Close() +- +- return securejoin.MkdirAllHandle(rootDir, unsafePath, mode) +-} +- +-// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +-// returned handle, for callers that don't need to use it. +-func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error { +- f, err := MkdirAllInRootOpen(root, unsafePath, mode) +- if err == nil { +- _ = f.Close() +- } +- return err +-} +- + // Openat is a Go-friendly openat(2) wrapper. + func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { + dirFd := unix.AT_FDCWD +diff --git a/utils_linux.go b/utils_linux.go +index 013dbcf4..0657faf5 100644 +--- a/utils_linux.go ++++ b/utils_linux.go +@@ -15,6 +15,7 @@ import ( + "github.com/urfave/cli" + "golang.org/x/sys/unix" + ++ "github.com/opencontainers/runc/internal/pathrs" + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" +@@ -234,10 +235,14 @@ func (r *runner) run(config *specs.Process) (int, error) { + process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) + } + baseFd := 3 + len(process.ExtraFiles) +- procSelfFd, closer := utils.ProcThreadSelf("fd/") ++ procSelfFd, closer, err := pathrs.ProcThreadSelfOpen("fd/", unix.O_DIRECTORY|unix.O_CLOEXEC) ++ if err != nil { ++ return -1, err ++ } + defer closer() ++ defer procSelfFd.Close() + for i := baseFd; i < baseFd+r.preserveFDs; i++ { +- _, err = os.Stat(filepath.Join(procSelfFd, strconv.Itoa(i))) ++ err := unix.Faccessat(int(procSelfFd.Fd()), strconv.Itoa(i), unix.F_OK, 0) + if err != nil { + return -1, fmt.Errorf("unable to stat preserved-fd %d (of %d): %w", i-baseFd, r.preserveFDs, err) + } +diff --git a/vendor/github.com/containerd/console/console_other.go b/vendor/github.com/containerd/console/console_other.go +index 933dfadd..968c5771 100644 +--- a/vendor/github.com/containerd/console/console_other.go ++++ b/vendor/github.com/containerd/console/console_other.go +@@ -1,5 +1,5 @@ +-//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !solaris && !windows && !zos +-// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows,!zos ++//go:build !darwin && !freebsd && !linux && !netbsd && !openbsd && !windows && !zos ++// +build !darwin,!freebsd,!linux,!netbsd,!openbsd,!windows,!zos + + /* + Copyright The containerd Authors. +diff --git a/vendor/github.com/containerd/console/console_unix.go b/vendor/github.com/containerd/console/console_unix.go +index 161f5d12..aa4c6962 100644 +--- a/vendor/github.com/containerd/console/console_unix.go ++++ b/vendor/github.com/containerd/console/console_unix.go +@@ -31,6 +31,15 @@ func NewPty() (Console, string, error) { + if err != nil { + return nil, "", err + } ++ return NewPtyFromFile(f) ++} ++ ++// NewPtyFromFile creates a new pty pair, just like [NewPty] except that the ++// provided [os.File] is used as the master rather than automatically creating ++// a new master from /dev/ptmx. The ownership of [os.File] is passed to the ++// returned [Console], so the caller must be careful to not call Close on the ++// underlying file. ++func NewPtyFromFile(f File) (Console, string, error) { + slave, err := ptsname(f) + if err != nil { + return nil, "", err +diff --git a/vendor/github.com/containerd/console/tc_darwin.go b/vendor/github.com/containerd/console/tc_darwin.go +index 78715458..77c695a4 100644 +--- a/vendor/github.com/containerd/console/tc_darwin.go ++++ b/vendor/github.com/containerd/console/tc_darwin.go +@@ -18,7 +18,6 @@ package console + + import ( + "fmt" +- "os" + + "golang.org/x/sys/unix" + ) +@@ -30,12 +29,12 @@ const ( + + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + return unix.IoctlSetPointerInt(int(f.Fd()), unix.TIOCPTYUNLK, 0) + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME) + if err != nil { + return "", err +diff --git a/vendor/github.com/containerd/console/tc_freebsd_cgo.go b/vendor/github.com/containerd/console/tc_freebsd_cgo.go +index 33282579..627f7d55 100644 +--- a/vendor/github.com/containerd/console/tc_freebsd_cgo.go ++++ b/vendor/github.com/containerd/console/tc_freebsd_cgo.go +@@ -21,7 +21,6 @@ package console + + import ( + "fmt" +- "os" + + "golang.org/x/sys/unix" + ) +@@ -39,7 +38,7 @@ const ( + + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + fd := C.int(f.Fd()) + if _, err := C.unlockpt(fd); err != nil { + C.close(fd) +@@ -49,7 +48,7 @@ func unlockpt(f *os.File) error { + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err +diff --git a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go +index 18a9b9cb..434ba46e 100644 +--- a/vendor/github.com/containerd/console/tc_freebsd_nocgo.go ++++ b/vendor/github.com/containerd/console/tc_freebsd_nocgo.go +@@ -21,7 +21,6 @@ package console + + import ( + "fmt" +- "os" + + "golang.org/x/sys/unix" + ) +@@ -42,12 +41,12 @@ const ( + + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + panic("unlockpt() support requires cgo.") + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err +diff --git a/vendor/github.com/containerd/console/tc_linux.go b/vendor/github.com/containerd/console/tc_linux.go +index 7d552ea4..e98dc022 100644 +--- a/vendor/github.com/containerd/console/tc_linux.go ++++ b/vendor/github.com/containerd/console/tc_linux.go +@@ -18,7 +18,6 @@ package console + + import ( + "fmt" +- "os" + "unsafe" + + "golang.org/x/sys/unix" +@@ -31,7 +30,7 @@ const ( + + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + var u int32 + // XXX do not use unix.IoctlSetPointerInt here, see commit dbd69c59b81. + if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))); err != 0 { +@@ -41,7 +40,7 @@ func unlockpt(f *os.File) error { + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + var u uint32 + // XXX do not use unix.IoctlGetInt here, see commit dbd69c59b81. + if _, _, err := unix.Syscall(unix.SYS_IOCTL, f.Fd(), unix.TIOCGPTN, uintptr(unsafe.Pointer(&u))); err != 0 { +diff --git a/vendor/github.com/containerd/console/tc_netbsd.go b/vendor/github.com/containerd/console/tc_netbsd.go +index 71227aef..73cf4397 100644 +--- a/vendor/github.com/containerd/console/tc_netbsd.go ++++ b/vendor/github.com/containerd/console/tc_netbsd.go +@@ -18,7 +18,6 @@ package console + + import ( + "bytes" +- "os" + + "golang.org/x/sys/unix" + ) +@@ -31,12 +30,12 @@ const ( + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. + // This does not exist on NetBSD, it does not allocate controlling terminals on open +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + return nil + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + ptm, err := unix.IoctlGetPtmget(int(f.Fd()), unix.TIOCPTSNAME) + if err != nil { + return "", err +diff --git a/vendor/github.com/containerd/console/tc_openbsd_cgo.go b/vendor/github.com/containerd/console/tc_openbsd_cgo.go +index 0e76f6cc..46f4250c 100644 +--- a/vendor/github.com/containerd/console/tc_openbsd_cgo.go ++++ b/vendor/github.com/containerd/console/tc_openbsd_cgo.go +@@ -20,8 +20,6 @@ + package console + + import ( +- "os" +- + "golang.org/x/sys/unix" + ) + +@@ -34,7 +32,7 @@ const ( + ) + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + ptspath, err := C.ptsname(C.int(f.Fd())) + if err != nil { + return "", err +@@ -44,7 +42,7 @@ func ptsname(f *os.File) (string, error) { + + // unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. + // unlockpt should be called before opening the slave side of a pty. +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + if _, err := C.grantpt(C.int(f.Fd())); err != nil { + return err + } +diff --git a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go +index dca92418..a8f9f6c2 100644 +--- a/vendor/github.com/containerd/console/tc_openbsd_nocgo.go ++++ b/vendor/github.com/containerd/console/tc_openbsd_nocgo.go +@@ -29,8 +29,6 @@ + package console + + import ( +- "os" +- + "golang.org/x/sys/unix" + ) + +@@ -39,10 +37,10 @@ const ( + cmdTcSet = unix.TIOCSETA + ) + +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + panic("ptsname() support requires cgo.") + } + +-func unlockpt(f *os.File) error { ++func unlockpt(f File) error { + panic("unlockpt() support requires cgo.") + } +diff --git a/vendor/github.com/containerd/console/tc_zos.go b/vendor/github.com/containerd/console/tc_zos.go +index fc90ba5f..23b0bd28 100644 +--- a/vendor/github.com/containerd/console/tc_zos.go ++++ b/vendor/github.com/containerd/console/tc_zos.go +@@ -17,7 +17,6 @@ + package console + + import ( +- "os" + "strings" + + "golang.org/x/sys/unix" +@@ -29,11 +28,11 @@ const ( + ) + + // unlockpt is a no-op on zos. +-func unlockpt(_ *os.File) error { ++func unlockpt(File) error { + return nil + } + + // ptsname retrieves the name of the first available pts for the given master. +-func ptsname(f *os.File) (string, error) { ++func ptsname(f File) (string, error) { + return "/dev/ttyp" + strings.TrimPrefix(f.Name(), "/dev/ptyp"), nil + } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml +new file mode 100644 +index 00000000..e965034e +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/.golangci.yml +@@ -0,0 +1,56 @@ ++# SPDX-License-Identifier: MPL-2.0 ++ ++# Copyright (C) 2025 Aleksa Sarai ++# Copyright (C) 2025 SUSE LLC ++# ++# This Source Code Form is subject to the terms of the Mozilla Public ++# License, v. 2.0. If a copy of the MPL was not distributed with this ++# file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++version: "2" ++ ++linters: ++ enable: ++ - asasalint ++ - asciicheck ++ - containedctx ++ - contextcheck ++ - errcheck ++ - errorlint ++ - exhaustive ++ - forcetypeassert ++ - godot ++ - goprintffuncname ++ - govet ++ - importas ++ - ineffassign ++ - makezero ++ - misspell ++ - musttag ++ - nilerr ++ - nilnesserr ++ - nilnil ++ - noctx ++ - prealloc ++ - revive ++ - staticcheck ++ - testifylint ++ - unconvert ++ - unparam ++ - unused ++ - usetesting ++ settings: ++ govet: ++ enable: ++ - nilness ++ testifylint: ++ enable-all: true ++ ++formatters: ++ enable: ++ - gofumpt ++ - goimports ++ settings: ++ goimports: ++ local-prefixes: ++ - github.com/cyphar/filepath-securejoin +diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +index ca0e3c62..6862467c 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md ++++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +@@ -6,6 +6,122 @@ and this project adheres to [Semantic Versioning](http://semver.org/). + + ## [Unreleased] ## + ++## [0.5.0] - 2025-09-26 ## ++ ++> Let the past die. Kill it if you have to. ++ ++> **NOTE**: With this release, some parts of ++> `github.com/cyphar/filepath-securejoin` are now licensed under the Mozilla ++> Public License (version 2). Please see [COPYING.md][] as well as the the ++> license header in each file for more details. ++ ++[COPYING.md]: ./COPYING.md ++ ++### Breaking ### ++- The new API introduced in the [0.3.0][] release has been moved to a new ++ subpackage called `pathrs-lite`. This was primarily done to better indicate ++ the split between the new and old APIs, as well as indicate to users the ++ purpose of this subpackage (it is a less complete version of [libpathrs][]). ++ ++ We have added some wrappers to the top-level package to ease the transition, ++ but those are deprecated and will be removed in the next minor release of ++ filepath-securejoin. Users should update their import paths. ++ ++ This new subpackage has also been relicensed under the Mozilla Public License ++ (version 2), please see [COPYING.md][] for more details. ++ ++### Added ### ++- Most of the key bits the safe `procfs` API have now been exported and are ++ available in `github.com/cyphar/filepath-securejoin/pathrs-lite/procfs`. At ++ the moment this primarily consists of a new `procfs.Handle` API: ++ ++ * `OpenProcRoot` returns a new handle to `/proc`, endeavouring to make it ++ safe if possible (`subset=pid` to protect against mistaken write attacks ++ and leaks, as well as using `fsopen(2)` to avoid racing mount attacks). ++ ++ `OpenUnsafeProcRoot` returns a handle without attempting to create one ++ with `subset=pid`, which makes it more dangerous to leak. Most users ++ should use `OpenProcRoot` (even if you need to use `ProcRoot` as the base ++ of an operation, as filepath-securejoin will internally open a handle when ++ necessary). ++ ++ * The `(*procfs.Handle).Open*` family of methods lets you get a safe ++ `O_PATH` handle to subpaths within `/proc` for certain subpaths. ++ ++ For `OpenThreadSelf`, the returned `ProcThreadSelfCloser` needs to be ++ called after you completely finish using the handle (this is necessary ++ because Go is multi-threaded and `ProcThreadSelf` references ++ `/proc/thread-self` which may disappear if we do not ++ `runtime.LockOSThread` -- `ProcThreadSelfCloser` is currently equivalent ++ to `runtime.UnlockOSThread`). ++ ++ Note that you cannot open any `procfs` symlinks (most notably magic-links) ++ using this API. At the moment, filepath-securejoin does not support this ++ feature (but [libpathrs][] does). ++ ++ * `ProcSelfFdReadlink` lets you get the in-kernel path representation of a ++ file descriptor (think `readlink("/proc/self/fd/...")`), except that we ++ verify that there aren't any tricky overmounts that could fool the ++ process. ++ ++ Please be aware that the returned string is simply a snapshot at that ++ particular moment, and an attacker could move the file being pointed to. ++ In addition, complex namespace configurations could result in non-sensical ++ or confusing paths to be returned. The value received from this function ++ should only be used as secondary verification of some security property, ++ not as proof that a particular handle has a particular path. ++ ++ The procfs handle used internally by the API is the same as the rest of ++ `filepath-securejoin` (for privileged programs this is usually a private ++ in-process `procfs` instance created with `fsopen(2)`). ++ ++ As before, this is intended as a stop-gap before users migrate to ++ [libpathrs][], which provides a far more extensive safe `procfs` API and is ++ generally more robust. ++ ++- Previously, the hardened procfs implementation (used internally within ++ `Reopen` and `Open(at)InRoot`) only protected against overmount attacks on ++ systems with `openat2(2)` (Linux 5.6) or systems with `fsopen(2)` or ++ `open_tree(2)` (Linux 5.2) and programs with privileges to use them (with ++ some caveats about locked mounts that probably affect very few users). For ++ other users, an attacker with the ability to create malicious mounts (on most ++ systems, a sysadmin) could trick you into operating on files you didn't ++ expect. This attack only really makes sense in the context of container ++ runtime implementations. ++ ++ This was considered a reasonable trade-off, as the long-term intention was to ++ get all users to just switch to [libpathrs][] if they wanted to use the safe ++ `procfs` API (which had more extensive protections, and is what these new ++ protections in `filepath-securejoin` are based on). However, as the API ++ is now being exported it seems unwise to advertise the API as "safe" if we do ++ not protect against known attacks. ++ ++ The procfs API is now more protected against attackers on systems lacking the ++ aforementioned protections. However, the most comprehensive of these ++ protections effectively rely on [`statx(STATX_MNT_ID)`][statx.2] (Linux 5.8). ++ On older kernel versions, there is no effective protection (there is some ++ minimal protection against non-`procfs` filesystem components but a ++ sufficiently clever attacker can work around those). In addition, ++ `STATX_MNT_ID` is vulnerable to mount ID reuse attacks by sufficiently ++ motivated and privileged attackers -- this problem is mitigated with ++ `STATX_MNT_ID_UNIQUE` (Linux 6.8) but that raises the minimum kernel version ++ for more protection. ++ ++ The fact that these protections are quite limited despite needing a fair bit ++ of extra code to handle was one of the primary reasons we did not initially ++ implement this in `filepath-securejoin` ([libpathrs][] supports all of this, ++ of course). ++ ++### Fixed ### ++- RHEL 8 kernels have backports of `fsopen(2)` but in some testing we've found ++ that it has very bad (and very difficult to debug) performance issues, and so ++ we will explicitly refuse to use `fsopen(2)` if the running kernel version is ++ pre-5.2 and will instead fallback to `open("/proc")`. ++ ++[CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv ++[libpathrs]: https://github.com/cyphar/libpathrs ++[statx.2]: https://www.man7.org/linux/man-pages/man2/statx.2.html ++ + ## [0.4.1] - 2025-01-28 ## + + ### Fixed ### +@@ -173,7 +289,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). + safe to start migrating to as we have extensive tests ensuring they behave + correctly and are safe against various races and other attacks. + +-[libpathrs]: https://github.com/openSUSE/libpathrs ++[libpathrs]: https://github.com/cyphar/libpathrs + [open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html + + ## [0.2.5] - 2024-05-03 ## +@@ -238,7 +354,8 @@ This is our first release of `github.com/cyphar/filepath-securejoin`, + containing a full implementation with a coverage of 93.5% (the only missing + cases are the error cases, which are hard to mocktest at the moment). + +-[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...HEAD ++[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...HEAD ++[0.5.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...v0.5.0 + [0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1 + [0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0 + [0.3.6]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.5...v0.3.6 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/COPYING.md b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md +new file mode 100644 +index 00000000..520e822b +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/COPYING.md +@@ -0,0 +1,447 @@ ++## COPYING ## ++ ++`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` ++ ++This project is made up of code licensed under different licenses. Which code ++you use will have an impact on whether only one or both licenses apply to your ++usage of this library. ++ ++Note that **each file** in this project individually has a code comment at the ++start describing the license of that particular file -- this is the most ++accurate license information of this project; in case there is any conflict ++between this document and the comment at the start of a file, the comment shall ++take precedence. The only purpose of this document is to work around [a known ++technical limitation of pkg.go.dev's license checking tool when dealing with ++non-trivial project licenses][go75067]. ++ ++[go75067]: https://go.dev/issue/75067 ++ ++### `BSD-3-Clause` ### ++ ++At time of writing, the following files and directories are licensed under the ++BSD-3-Clause license: ++ ++ * `doc.go` ++ * `join*.go` ++ * `vfs.go` ++ * `internal/consts/*.go` ++ * `pathrs-lite/internal/gocompat/*.go` ++ * `pathrs-lite/internal/kernelversion/*.go` ++ ++The text of the BSD-3-Clause license used by this project is the following (the ++text is also available from the [`LICENSE.BSD`](./LICENSE.BSD) file): ++ ++``` ++Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. ++Copyright (C) 2017-2024 SUSE LLC. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are ++met: ++ ++ * Redistributions of source code must retain the above copyright ++notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above ++copyright notice, this list of conditions and the following disclaimer ++in the documentation and/or other materials provided with the ++distribution. ++ * Neither the name of Google Inc. nor the names of its ++contributors may be used to endorse or promote products derived from ++this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++``` ++ ++### `MPL-2.0` ### ++ ++All other files (unless otherwise marked) are licensed under the Mozilla Public ++License (version 2.0). ++ ++The text of the Mozilla Public License (version 2.0) is the following (the text ++is also available from the [`LICENSE.MPL-2.0`](./LICENSE.MPL-2.0) file): ++ ++``` ++Mozilla Public License Version 2.0 ++================================== ++ ++1. Definitions ++-------------- ++ ++1.1. "Contributor" ++ means each individual or legal entity that creates, contributes to ++ the creation of, or owns Covered Software. ++ ++1.2. "Contributor Version" ++ means the combination of the Contributions of others (if any) used ++ by a Contributor and that particular Contributor's Contribution. ++ ++1.3. "Contribution" ++ means Covered Software of a particular Contributor. ++ ++1.4. "Covered Software" ++ means Source Code Form to which the initial Contributor has attached ++ the notice in Exhibit A, the Executable Form of such Source Code ++ Form, and Modifications of such Source Code Form, in each case ++ including portions thereof. ++ ++1.5. "Incompatible With Secondary Licenses" ++ means ++ ++ (a) that the initial Contributor has attached the notice described ++ in Exhibit B to the Covered Software; or ++ ++ (b) that the Covered Software was made available under the terms of ++ version 1.1 or earlier of the License, but not also under the ++ terms of a Secondary License. ++ ++1.6. "Executable Form" ++ means any form of the work other than Source Code Form. ++ ++1.7. "Larger Work" ++ means a work that combines Covered Software with other material, in ++ a separate file or files, that is not Covered Software. ++ ++1.8. "License" ++ means this document. ++ ++1.9. "Licensable" ++ means having the right to grant, to the maximum extent possible, ++ whether at the time of the initial grant or subsequently, any and ++ all of the rights conveyed by this License. ++ ++1.10. "Modifications" ++ means any of the following: ++ ++ (a) any file in Source Code Form that results from an addition to, ++ deletion from, or modification of the contents of Covered ++ Software; or ++ ++ (b) any new file in Source Code Form that contains any Covered ++ Software. ++ ++1.11. "Patent Claims" of a Contributor ++ means any patent claim(s), including without limitation, method, ++ process, and apparatus claims, in any patent Licensable by such ++ Contributor that would be infringed, but for the grant of the ++ License, by the making, using, selling, offering for sale, having ++ made, import, or transfer of either its Contributions or its ++ Contributor Version. ++ ++1.12. "Secondary License" ++ means either the GNU General Public License, Version 2.0, the GNU ++ Lesser General Public License, Version 2.1, the GNU Affero General ++ Public License, Version 3.0, or any later versions of those ++ licenses. ++ ++1.13. "Source Code Form" ++ means the form of the work preferred for making modifications. ++ ++1.14. "You" (or "Your") ++ means an individual or a legal entity exercising rights under this ++ License. For legal entities, "You" includes any entity that ++ controls, is controlled by, or is under common control with You. For ++ purposes of this definition, "control" means (a) the power, direct ++ or indirect, to cause the direction or management of such entity, ++ whether by contract or otherwise, or (b) ownership of more than ++ fifty percent (50%) of the outstanding shares or beneficial ++ ownership of such entity. ++ ++2. License Grants and Conditions ++-------------------------------- ++ ++2.1. Grants ++ ++Each Contributor hereby grants You a world-wide, royalty-free, ++non-exclusive license: ++ ++(a) under intellectual property rights (other than patent or trademark) ++ Licensable by such Contributor to use, reproduce, make available, ++ modify, display, perform, distribute, and otherwise exploit its ++ Contributions, either on an unmodified basis, with Modifications, or ++ as part of a Larger Work; and ++ ++(b) under Patent Claims of such Contributor to make, use, sell, offer ++ for sale, have made, import, and otherwise transfer either its ++ Contributions or its Contributor Version. ++ ++2.2. Effective Date ++ ++The licenses granted in Section 2.1 with respect to any Contribution ++become effective for each Contribution on the date the Contributor first ++distributes such Contribution. ++ ++2.3. Limitations on Grant Scope ++ ++The licenses granted in this Section 2 are the only rights granted under ++this License. No additional rights or licenses will be implied from the ++distribution or licensing of Covered Software under this License. ++Notwithstanding Section 2.1(b) above, no patent license is granted by a ++Contributor: ++ ++(a) for any code that a Contributor has removed from Covered Software; ++ or ++ ++(b) for infringements caused by: (i) Your and any other third party's ++ modifications of Covered Software, or (ii) the combination of its ++ Contributions with other software (except as part of its Contributor ++ Version); or ++ ++(c) under Patent Claims infringed by Covered Software in the absence of ++ its Contributions. ++ ++This License does not grant any rights in the trademarks, service marks, ++or logos of any Contributor (except as may be necessary to comply with ++the notice requirements in Section 3.4). ++ ++2.4. Subsequent Licenses ++ ++No Contributor makes additional grants as a result of Your choice to ++distribute the Covered Software under a subsequent version of this ++License (see Section 10.2) or under the terms of a Secondary License (if ++permitted under the terms of Section 3.3). ++ ++2.5. Representation ++ ++Each Contributor represents that the Contributor believes its ++Contributions are its original creation(s) or it has sufficient rights ++to grant the rights to its Contributions conveyed by this License. ++ ++2.6. Fair Use ++ ++This License is not intended to limit any rights You have under ++applicable copyright doctrines of fair use, fair dealing, or other ++equivalents. ++ ++2.7. Conditions ++ ++Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted ++in Section 2.1. ++ ++3. Responsibilities ++------------------- ++ ++3.1. Distribution of Source Form ++ ++All distribution of Covered Software in Source Code Form, including any ++Modifications that You create or to which You contribute, must be under ++the terms of this License. You must inform recipients that the Source ++Code Form of the Covered Software is governed by the terms of this ++License, and how they can obtain a copy of this License. You may not ++attempt to alter or restrict the recipients' rights in the Source Code ++Form. ++ ++3.2. Distribution of Executable Form ++ ++If You distribute Covered Software in Executable Form then: ++ ++(a) such Covered Software must also be made available in Source Code ++ Form, as described in Section 3.1, and You must inform recipients of ++ the Executable Form how they can obtain a copy of such Source Code ++ Form by reasonable means in a timely manner, at a charge no more ++ than the cost of distribution to the recipient; and ++ ++(b) You may distribute such Executable Form under the terms of this ++ License, or sublicense it under different terms, provided that the ++ license for the Executable Form does not attempt to limit or alter ++ the recipients' rights in the Source Code Form under this License. ++ ++3.3. Distribution of a Larger Work ++ ++You may create and distribute a Larger Work under terms of Your choice, ++provided that You also comply with the requirements of this License for ++the Covered Software. If the Larger Work is a combination of Covered ++Software with a work governed by one or more Secondary Licenses, and the ++Covered Software is not Incompatible With Secondary Licenses, this ++License permits You to additionally distribute such Covered Software ++under the terms of such Secondary License(s), so that the recipient of ++the Larger Work may, at their option, further distribute the Covered ++Software under the terms of either this License or such Secondary ++License(s). ++ ++3.4. Notices ++ ++You may not remove or alter the substance of any license notices ++(including copyright notices, patent notices, disclaimers of warranty, ++or limitations of liability) contained within the Source Code Form of ++the Covered Software, except that You may alter any license notices to ++the extent required to remedy known factual inaccuracies. ++ ++3.5. Application of Additional Terms ++ ++You may choose to offer, and to charge a fee for, warranty, support, ++indemnity or liability obligations to one or more recipients of Covered ++Software. However, You may do so only on Your own behalf, and not on ++behalf of any Contributor. You must make it absolutely clear that any ++such warranty, support, indemnity, or liability obligation is offered by ++You alone, and You hereby agree to indemnify every Contributor for any ++liability incurred by such Contributor as a result of warranty, support, ++indemnity or liability terms You offer. You may include additional ++disclaimers of warranty and limitations of liability specific to any ++jurisdiction. ++ ++4. Inability to Comply Due to Statute or Regulation ++--------------------------------------------------- ++ ++If it is impossible for You to comply with any of the terms of this ++License with respect to some or all of the Covered Software due to ++statute, judicial order, or regulation then You must: (a) comply with ++the terms of this License to the maximum extent possible; and (b) ++describe the limitations and the code they affect. Such description must ++be placed in a text file included with all distributions of the Covered ++Software under this License. Except to the extent prohibited by statute ++or regulation, such description must be sufficiently detailed for a ++recipient of ordinary skill to be able to understand it. ++ ++5. Termination ++-------------- ++ ++5.1. The rights granted under this License will terminate automatically ++if You fail to comply with any of its terms. However, if You become ++compliant, then the rights granted under this License from a particular ++Contributor are reinstated (a) provisionally, unless and until such ++Contributor explicitly and finally terminates Your grants, and (b) on an ++ongoing basis, if such Contributor fails to notify You of the ++non-compliance by some reasonable means prior to 60 days after You have ++come back into compliance. Moreover, Your grants from a particular ++Contributor are reinstated on an ongoing basis if such Contributor ++notifies You of the non-compliance by some reasonable means, this is the ++first time You have received notice of non-compliance with this License ++from such Contributor, and You become compliant prior to 30 days after ++Your receipt of the notice. ++ ++5.2. If You initiate litigation against any entity by asserting a patent ++infringement claim (excluding declaratory judgment actions, ++counter-claims, and cross-claims) alleging that a Contributor Version ++directly or indirectly infringes any patent, then the rights granted to ++You by any and all Contributors for the Covered Software under Section ++2.1 of this License shall terminate. ++ ++5.3. In the event of termination under Sections 5.1 or 5.2 above, all ++end user license agreements (excluding distributors and resellers) which ++have been validly granted by You or Your distributors under this License ++prior to termination shall survive termination. ++ ++************************************************************************ ++* * ++* 6. Disclaimer of Warranty * ++* ------------------------- * ++* * ++* Covered Software is provided under this License on an "as is" * ++* basis, without warranty of any kind, either expressed, implied, or * ++* statutory, including, without limitation, warranties that the * ++* Covered Software is free of defects, merchantable, fit for a * ++* particular purpose or non-infringing. The entire risk as to the * ++* quality and performance of the Covered Software is with You. * ++* Should any Covered Software prove defective in any respect, You * ++* (not any Contributor) assume the cost of any necessary servicing, * ++* repair, or correction. This disclaimer of warranty constitutes an * ++* essential part of this License. No use of any Covered Software is * ++* authorized under this License except under this disclaimer. * ++* * ++************************************************************************ ++ ++************************************************************************ ++* * ++* 7. Limitation of Liability * ++* -------------------------- * ++* * ++* Under no circumstances and under no legal theory, whether tort * ++* (including negligence), contract, or otherwise, shall any * ++* Contributor, or anyone who distributes Covered Software as * ++* permitted above, be liable to You for any direct, indirect, * ++* special, incidental, or consequential damages of any character * ++* including, without limitation, damages for lost profits, loss of * ++* goodwill, work stoppage, computer failure or malfunction, or any * ++* and all other commercial damages or losses, even if such party * ++* shall have been informed of the possibility of such damages. This * ++* limitation of liability shall not apply to liability for death or * ++* personal injury resulting from such party's negligence to the * ++* extent applicable law prohibits such limitation. Some * ++* jurisdictions do not allow the exclusion or limitation of * ++* incidental or consequential damages, so this exclusion and * ++* limitation may not apply to You. * ++* * ++************************************************************************ ++ ++8. Litigation ++------------- ++ ++Any litigation relating to this License may be brought only in the ++courts of a jurisdiction where the defendant maintains its principal ++place of business and such litigation shall be governed by laws of that ++jurisdiction, without reference to its conflict-of-law provisions. ++Nothing in this Section shall prevent a party's ability to bring ++cross-claims or counter-claims. ++ ++9. Miscellaneous ++---------------- ++ ++This License represents the complete agreement concerning the subject ++matter hereof. If any provision of this License is held to be ++unenforceable, such provision shall be reformed only to the extent ++necessary to make it enforceable. Any law or regulation which provides ++that the language of a contract shall be construed against the drafter ++shall not be used to construe this License against a Contributor. ++ ++10. Versions of the License ++--------------------------- ++ ++10.1. New Versions ++ ++Mozilla Foundation is the license steward. Except as provided in Section ++10.3, no one other than the license steward has the right to modify or ++publish new versions of this License. Each version will be given a ++distinguishing version number. ++ ++10.2. Effect of New Versions ++ ++You may distribute the Covered Software under the terms of the version ++of the License under which You originally received the Covered Software, ++or under the terms of any subsequent version published by the license ++steward. ++ ++10.3. Modified Versions ++ ++If you create software not governed by this License, and you want to ++create a new license for such software, you may create and use a ++modified version of this License if you rename the license and remove ++any references to the name of the license steward (except to note that ++such modified license differs from this License). ++ ++10.4. Distributing Source Code Form that is Incompatible With Secondary ++Licenses ++ ++If You choose to distribute Source Code Form that is Incompatible With ++Secondary Licenses under the terms of this version of the License, the ++notice described in Exhibit B of this License must be attached. ++ ++Exhibit A - Source Code Form License Notice ++------------------------------------------- ++ ++ This Source Code Form is subject to the terms of the Mozilla Public ++ License, v. 2.0. If a copy of the MPL was not distributed with this ++ file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++If it is not possible or desirable to put the notice in a particular ++file, then You may include the notice in a location (such as a LICENSE ++file in a relevant directory) where a recipient would be likely to look ++for such a notice. ++ ++You may add additional accurate notices of copyright ownership. ++ ++Exhibit B - "Incompatible With Secondary Licenses" Notice ++--------------------------------------------------------- ++ ++ This Source Code Form is "Incompatible With Secondary Licenses", as ++ defined by the Mozilla Public License, v. 2.0. ++``` +diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD +similarity index 100% +rename from vendor/github.com/cyphar/filepath-securejoin/LICENSE +rename to vendor/github.com/cyphar/filepath-securejoin/LICENSE.BSD +diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 +new file mode 100644 +index 00000000..d0a1fa14 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE.MPL-2.0 +@@ -0,0 +1,373 @@ ++Mozilla Public License Version 2.0 ++================================== ++ ++1. Definitions ++-------------- ++ ++1.1. "Contributor" ++ means each individual or legal entity that creates, contributes to ++ the creation of, or owns Covered Software. ++ ++1.2. "Contributor Version" ++ means the combination of the Contributions of others (if any) used ++ by a Contributor and that particular Contributor's Contribution. ++ ++1.3. "Contribution" ++ means Covered Software of a particular Contributor. ++ ++1.4. "Covered Software" ++ means Source Code Form to which the initial Contributor has attached ++ the notice in Exhibit A, the Executable Form of such Source Code ++ Form, and Modifications of such Source Code Form, in each case ++ including portions thereof. ++ ++1.5. "Incompatible With Secondary Licenses" ++ means ++ ++ (a) that the initial Contributor has attached the notice described ++ in Exhibit B to the Covered Software; or ++ ++ (b) that the Covered Software was made available under the terms of ++ version 1.1 or earlier of the License, but not also under the ++ terms of a Secondary License. ++ ++1.6. "Executable Form" ++ means any form of the work other than Source Code Form. ++ ++1.7. "Larger Work" ++ means a work that combines Covered Software with other material, in ++ a separate file or files, that is not Covered Software. ++ ++1.8. "License" ++ means this document. ++ ++1.9. "Licensable" ++ means having the right to grant, to the maximum extent possible, ++ whether at the time of the initial grant or subsequently, any and ++ all of the rights conveyed by this License. ++ ++1.10. "Modifications" ++ means any of the following: ++ ++ (a) any file in Source Code Form that results from an addition to, ++ deletion from, or modification of the contents of Covered ++ Software; or ++ ++ (b) any new file in Source Code Form that contains any Covered ++ Software. ++ ++1.11. "Patent Claims" of a Contributor ++ means any patent claim(s), including without limitation, method, ++ process, and apparatus claims, in any patent Licensable by such ++ Contributor that would be infringed, but for the grant of the ++ License, by the making, using, selling, offering for sale, having ++ made, import, or transfer of either its Contributions or its ++ Contributor Version. ++ ++1.12. "Secondary License" ++ means either the GNU General Public License, Version 2.0, the GNU ++ Lesser General Public License, Version 2.1, the GNU Affero General ++ Public License, Version 3.0, or any later versions of those ++ licenses. ++ ++1.13. "Source Code Form" ++ means the form of the work preferred for making modifications. ++ ++1.14. "You" (or "Your") ++ means an individual or a legal entity exercising rights under this ++ License. For legal entities, "You" includes any entity that ++ controls, is controlled by, or is under common control with You. For ++ purposes of this definition, "control" means (a) the power, direct ++ or indirect, to cause the direction or management of such entity, ++ whether by contract or otherwise, or (b) ownership of more than ++ fifty percent (50%) of the outstanding shares or beneficial ++ ownership of such entity. ++ ++2. License Grants and Conditions ++-------------------------------- ++ ++2.1. Grants ++ ++Each Contributor hereby grants You a world-wide, royalty-free, ++non-exclusive license: ++ ++(a) under intellectual property rights (other than patent or trademark) ++ Licensable by such Contributor to use, reproduce, make available, ++ modify, display, perform, distribute, and otherwise exploit its ++ Contributions, either on an unmodified basis, with Modifications, or ++ as part of a Larger Work; and ++ ++(b) under Patent Claims of such Contributor to make, use, sell, offer ++ for sale, have made, import, and otherwise transfer either its ++ Contributions or its Contributor Version. ++ ++2.2. Effective Date ++ ++The licenses granted in Section 2.1 with respect to any Contribution ++become effective for each Contribution on the date the Contributor first ++distributes such Contribution. ++ ++2.3. Limitations on Grant Scope ++ ++The licenses granted in this Section 2 are the only rights granted under ++this License. No additional rights or licenses will be implied from the ++distribution or licensing of Covered Software under this License. ++Notwithstanding Section 2.1(b) above, no patent license is granted by a ++Contributor: ++ ++(a) for any code that a Contributor has removed from Covered Software; ++ or ++ ++(b) for infringements caused by: (i) Your and any other third party's ++ modifications of Covered Software, or (ii) the combination of its ++ Contributions with other software (except as part of its Contributor ++ Version); or ++ ++(c) under Patent Claims infringed by Covered Software in the absence of ++ its Contributions. ++ ++This License does not grant any rights in the trademarks, service marks, ++or logos of any Contributor (except as may be necessary to comply with ++the notice requirements in Section 3.4). ++ ++2.4. Subsequent Licenses ++ ++No Contributor makes additional grants as a result of Your choice to ++distribute the Covered Software under a subsequent version of this ++License (see Section 10.2) or under the terms of a Secondary License (if ++permitted under the terms of Section 3.3). ++ ++2.5. Representation ++ ++Each Contributor represents that the Contributor believes its ++Contributions are its original creation(s) or it has sufficient rights ++to grant the rights to its Contributions conveyed by this License. ++ ++2.6. Fair Use ++ ++This License is not intended to limit any rights You have under ++applicable copyright doctrines of fair use, fair dealing, or other ++equivalents. ++ ++2.7. Conditions ++ ++Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted ++in Section 2.1. ++ ++3. Responsibilities ++------------------- ++ ++3.1. Distribution of Source Form ++ ++All distribution of Covered Software in Source Code Form, including any ++Modifications that You create or to which You contribute, must be under ++the terms of this License. You must inform recipients that the Source ++Code Form of the Covered Software is governed by the terms of this ++License, and how they can obtain a copy of this License. You may not ++attempt to alter or restrict the recipients' rights in the Source Code ++Form. ++ ++3.2. Distribution of Executable Form ++ ++If You distribute Covered Software in Executable Form then: ++ ++(a) such Covered Software must also be made available in Source Code ++ Form, as described in Section 3.1, and You must inform recipients of ++ the Executable Form how they can obtain a copy of such Source Code ++ Form by reasonable means in a timely manner, at a charge no more ++ than the cost of distribution to the recipient; and ++ ++(b) You may distribute such Executable Form under the terms of this ++ License, or sublicense it under different terms, provided that the ++ license for the Executable Form does not attempt to limit or alter ++ the recipients' rights in the Source Code Form under this License. ++ ++3.3. Distribution of a Larger Work ++ ++You may create and distribute a Larger Work under terms of Your choice, ++provided that You also comply with the requirements of this License for ++the Covered Software. If the Larger Work is a combination of Covered ++Software with a work governed by one or more Secondary Licenses, and the ++Covered Software is not Incompatible With Secondary Licenses, this ++License permits You to additionally distribute such Covered Software ++under the terms of such Secondary License(s), so that the recipient of ++the Larger Work may, at their option, further distribute the Covered ++Software under the terms of either this License or such Secondary ++License(s). ++ ++3.4. Notices ++ ++You may not remove or alter the substance of any license notices ++(including copyright notices, patent notices, disclaimers of warranty, ++or limitations of liability) contained within the Source Code Form of ++the Covered Software, except that You may alter any license notices to ++the extent required to remedy known factual inaccuracies. ++ ++3.5. Application of Additional Terms ++ ++You may choose to offer, and to charge a fee for, warranty, support, ++indemnity or liability obligations to one or more recipients of Covered ++Software. However, You may do so only on Your own behalf, and not on ++behalf of any Contributor. You must make it absolutely clear that any ++such warranty, support, indemnity, or liability obligation is offered by ++You alone, and You hereby agree to indemnify every Contributor for any ++liability incurred by such Contributor as a result of warranty, support, ++indemnity or liability terms You offer. You may include additional ++disclaimers of warranty and limitations of liability specific to any ++jurisdiction. ++ ++4. Inability to Comply Due to Statute or Regulation ++--------------------------------------------------- ++ ++If it is impossible for You to comply with any of the terms of this ++License with respect to some or all of the Covered Software due to ++statute, judicial order, or regulation then You must: (a) comply with ++the terms of this License to the maximum extent possible; and (b) ++describe the limitations and the code they affect. Such description must ++be placed in a text file included with all distributions of the Covered ++Software under this License. Except to the extent prohibited by statute ++or regulation, such description must be sufficiently detailed for a ++recipient of ordinary skill to be able to understand it. ++ ++5. Termination ++-------------- ++ ++5.1. The rights granted under this License will terminate automatically ++if You fail to comply with any of its terms. However, if You become ++compliant, then the rights granted under this License from a particular ++Contributor are reinstated (a) provisionally, unless and until such ++Contributor explicitly and finally terminates Your grants, and (b) on an ++ongoing basis, if such Contributor fails to notify You of the ++non-compliance by some reasonable means prior to 60 days after You have ++come back into compliance. Moreover, Your grants from a particular ++Contributor are reinstated on an ongoing basis if such Contributor ++notifies You of the non-compliance by some reasonable means, this is the ++first time You have received notice of non-compliance with this License ++from such Contributor, and You become compliant prior to 30 days after ++Your receipt of the notice. ++ ++5.2. If You initiate litigation against any entity by asserting a patent ++infringement claim (excluding declaratory judgment actions, ++counter-claims, and cross-claims) alleging that a Contributor Version ++directly or indirectly infringes any patent, then the rights granted to ++You by any and all Contributors for the Covered Software under Section ++2.1 of this License shall terminate. ++ ++5.3. In the event of termination under Sections 5.1 or 5.2 above, all ++end user license agreements (excluding distributors and resellers) which ++have been validly granted by You or Your distributors under this License ++prior to termination shall survive termination. ++ ++************************************************************************ ++* * ++* 6. Disclaimer of Warranty * ++* ------------------------- * ++* * ++* Covered Software is provided under this License on an "as is" * ++* basis, without warranty of any kind, either expressed, implied, or * ++* statutory, including, without limitation, warranties that the * ++* Covered Software is free of defects, merchantable, fit for a * ++* particular purpose or non-infringing. The entire risk as to the * ++* quality and performance of the Covered Software is with You. * ++* Should any Covered Software prove defective in any respect, You * ++* (not any Contributor) assume the cost of any necessary servicing, * ++* repair, or correction. This disclaimer of warranty constitutes an * ++* essential part of this License. No use of any Covered Software is * ++* authorized under this License except under this disclaimer. * ++* * ++************************************************************************ ++ ++************************************************************************ ++* * ++* 7. Limitation of Liability * ++* -------------------------- * ++* * ++* Under no circumstances and under no legal theory, whether tort * ++* (including negligence), contract, or otherwise, shall any * ++* Contributor, or anyone who distributes Covered Software as * ++* permitted above, be liable to You for any direct, indirect, * ++* special, incidental, or consequential damages of any character * ++* including, without limitation, damages for lost profits, loss of * ++* goodwill, work stoppage, computer failure or malfunction, or any * ++* and all other commercial damages or losses, even if such party * ++* shall have been informed of the possibility of such damages. This * ++* limitation of liability shall not apply to liability for death or * ++* personal injury resulting from such party's negligence to the * ++* extent applicable law prohibits such limitation. Some * ++* jurisdictions do not allow the exclusion or limitation of * ++* incidental or consequential damages, so this exclusion and * ++* limitation may not apply to You. * ++* * ++************************************************************************ ++ ++8. Litigation ++------------- ++ ++Any litigation relating to this License may be brought only in the ++courts of a jurisdiction where the defendant maintains its principal ++place of business and such litigation shall be governed by laws of that ++jurisdiction, without reference to its conflict-of-law provisions. ++Nothing in this Section shall prevent a party's ability to bring ++cross-claims or counter-claims. ++ ++9. Miscellaneous ++---------------- ++ ++This License represents the complete agreement concerning the subject ++matter hereof. If any provision of this License is held to be ++unenforceable, such provision shall be reformed only to the extent ++necessary to make it enforceable. Any law or regulation which provides ++that the language of a contract shall be construed against the drafter ++shall not be used to construe this License against a Contributor. ++ ++10. Versions of the License ++--------------------------- ++ ++10.1. New Versions ++ ++Mozilla Foundation is the license steward. Except as provided in Section ++10.3, no one other than the license steward has the right to modify or ++publish new versions of this License. Each version will be given a ++distinguishing version number. ++ ++10.2. Effect of New Versions ++ ++You may distribute the Covered Software under the terms of the version ++of the License under which You originally received the Covered Software, ++or under the terms of any subsequent version published by the license ++steward. ++ ++10.3. Modified Versions ++ ++If you create software not governed by this License, and you want to ++create a new license for such software, you may create and use a ++modified version of this License if you rename the license and remove ++any references to the name of the license steward (except to note that ++such modified license differs from this License). ++ ++10.4. Distributing Source Code Form that is Incompatible With Secondary ++Licenses ++ ++If You choose to distribute Source Code Form that is Incompatible With ++Secondary Licenses under the terms of this version of the License, the ++notice described in Exhibit B of this License must be attached. ++ ++Exhibit A - Source Code Form License Notice ++------------------------------------------- ++ ++ This Source Code Form is subject to the terms of the Mozilla Public ++ License, v. 2.0. If a copy of the MPL was not distributed with this ++ file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++If it is not possible or desirable to put the notice in a particular ++file, then You may include the notice in a location (such as a LICENSE ++file in a relevant directory) where a recipient would be likely to look ++for such a notice. ++ ++You may add additional accurate notices of copyright ownership. ++ ++Exhibit B - "Incompatible With Secondary Licenses" Notice ++--------------------------------------------------------- ++ ++ This Source Code Form is "Incompatible With Secondary Licenses", as ++ defined by the Mozilla Public License, v. 2.0. +diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md +index eaeb53fc..6673abfc 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/README.md ++++ b/vendor/github.com/cyphar/filepath-securejoin/README.md +@@ -67,7 +67,8 @@ func SecureJoin(root, unsafePath string) (string, error) { + [libpathrs]: https://github.com/openSUSE/libpathrs + [go#20126]: https://github.com/golang/go/issues/20126 + +-### New API ### ++### New API ### ++[#new-api]: #new-api + + While we recommend users switch to [libpathrs][libpathrs] as soon as it has a + stable release, some methods implemented by libpathrs have been ported to this +@@ -165,5 +166,19 @@ after `MkdirAll`). + + ### License ### + +-The license of this project is the same as Go, which is a BSD 3-clause license +-available in the `LICENSE` file. ++`SPDX-License-Identifier: BSD-3-Clause AND MPL-2.0` ++ ++Some of the code in this project is derived from Go, and is licensed under a ++BSD 3-clause license (available in `LICENSE.BSD`). Other files (many of which ++are derived from [libpathrs][libpathrs]) are licensed under the Mozilla Public ++License version 2.0 (available in `LICENSE.MPL-2.0`). If you are using the ++["New API" described above][#new-api], you are probably using code from files ++released under this license. ++ ++Every source file in this project has a copyright header describing its ++license. Please check the license headers of each file to see what license ++applies to it. ++ ++See [COPYING.md](./COPYING.md) for some more details. ++ ++[umoci]: https://github.com/opencontainers/umoci +diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION +index 267577d4..8f0916f7 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/VERSION ++++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION +@@ -1 +1 @@ +-0.4.1 ++0.5.0 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/codecov.yml b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml +new file mode 100644 +index 00000000..ff284dbf +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/codecov.yml +@@ -0,0 +1,29 @@ ++# SPDX-License-Identifier: MPL-2.0 ++ ++# Copyright (C) 2025 Aleksa Sarai ++# Copyright (C) 2025 SUSE LLC ++# ++# This Source Code Form is subject to the terms of the Mozilla Public ++# License, v. 2.0. If a copy of the MPL was not distributed with this ++# file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++comment: ++ layout: "condensed_header, reach, diff, components, condensed_files, condensed_footer" ++ require_changes: true ++ branches: ++ - main ++ ++coverage: ++ range: 60..100 ++ status: ++ project: ++ default: ++ target: 85% ++ threshold: 0% ++ patch: ++ default: ++ target: auto ++ informational: true ++ ++github_checks: ++ annotations: false +diff --git a/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go b/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go +new file mode 100644 +index 00000000..3e427b16 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/deprecated_linux.go +@@ -0,0 +1,48 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package securejoin ++ ++import ( ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++) ++ ++var ( ++ // MkdirAll is a wrapper around [pathrs.MkdirAll]. ++ // ++ // Deprecated: You should use [pathrs.MkdirAll] directly instead. This ++ // wrapper will be removed in filepath-securejoin v0.6. ++ MkdirAll = pathrs.MkdirAll ++ ++ // MkdirAllHandle is a wrapper around [pathrs.MkdirAllHandle]. ++ // ++ // Deprecated: You should use [pathrs.MkdirAllHandle] directly instead. ++ // This wrapper will be removed in filepath-securejoin v0.6. ++ MkdirAllHandle = pathrs.MkdirAllHandle ++ ++ // OpenInRoot is a wrapper around [pathrs.OpenInRoot]. ++ // ++ // Deprecated: You should use [pathrs.OpenInRoot] directly instead. This ++ // wrapper will be removed in filepath-securejoin v0.6. ++ OpenInRoot = pathrs.OpenInRoot ++ ++ // OpenatInRoot is a wrapper around [pathrs.OpenatInRoot]. ++ // ++ // Deprecated: You should use [pathrs.OpenatInRoot] directly instead. This ++ // wrapper will be removed in filepath-securejoin v0.6. ++ OpenatInRoot = pathrs.OpenatInRoot ++ ++ // Reopen is a wrapper around [pathrs.Reopen]. ++ // ++ // Deprecated: You should use [pathrs.Reopen] directly instead. This ++ // wrapper will be removed in filepath-securejoin v0.6. ++ Reopen = pathrs.Reopen ++) +diff --git a/vendor/github.com/cyphar/filepath-securejoin/doc.go b/vendor/github.com/cyphar/filepath-securejoin/doc.go +index 1ec7d065..1438fc9c 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/doc.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/doc.go +@@ -1,3 +1,5 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ + // Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. + // Copyright (C) 2017-2024 SUSE LLC. All rights reserved. + // Use of this source code is governed by a BSD-style +@@ -14,14 +16,13 @@ + // **not** safe against race conditions where an attacker changes the + // filesystem after (or during) the [SecureJoin] operation. + // +-// The new API is made up of [OpenInRoot] and [MkdirAll] (and derived +-// functions). These are safe against racing attackers and have several other +-// protections that are not provided by the legacy API. There are many more +-// operations that most programs expect to be able to do safely, but we do not +-// provide explicit support for them because we want to encourage users to +-// switch to [libpathrs](https://github.com/openSUSE/libpathrs) which is a +-// cross-language next-generation library that is entirely designed around +-// operating on paths safely. ++// The new API is available in the [pathrs-lite] subpackage, and provide ++// protections against racing attackers as well as several other key ++// protections against attacks often seen by container runtimes. As the name ++// suggests, [pathrs-lite] is a stripped down (pure Go) reimplementation of ++// [libpathrs]. The main APIs provided are [OpenInRoot], [MkdirAll], and ++// [procfs.Handle] -- other APIs are not planned to be ported. The long-term ++// goal is for users to migrate to [libpathrs] which is more fully-featured. + // + // securejoin has been used by several container runtimes (Docker, runc, + // Kubernetes, etc) for quite a few years as a de-facto standard for operating +@@ -31,9 +32,16 @@ + // API as soon as possible (or even better, switch to libpathrs). + // + // This project was initially intended to be included in the Go standard +-// library, but [it was rejected](https://go.dev/issue/20126). There is now a +-// [new Go proposal](https://go.dev/issue/67002) for a safe path resolution API +-// that shares some of the goals of filepath-securejoin. However, that design +-// is intended to work like `openat2(RESOLVE_BENEATH)` which does not fit the +-// usecase of container runtimes and most system tools. ++// library, but it was rejected (see https://go.dev/issue/20126). Much later, ++// [os.Root] was added to the Go stdlib that shares some of the goals of ++// filepath-securejoin. However, its design is intended to work like ++// openat2(RESOLVE_BENEATH) which does not fit the usecase of container ++// runtimes and most system tools. ++// ++// [pathrs-lite]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite ++// [libpathrs]: https://github.com/openSUSE/libpathrs ++// [OpenInRoot]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#OpenInRoot ++// [MkdirAll]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite#MkdirAll ++// [procfs.Handle]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle ++// [os.Root]: https:///pkg.go.dev/os#Root + package securejoin +diff --git a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go b/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go +deleted file mode 100644 +index ddd6fa9a..00000000 +--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_go121.go ++++ /dev/null +@@ -1,32 +0,0 @@ +-//go:build linux && go1.21 +- +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package securejoin +- +-import ( +- "slices" +- "sync" +-) +- +-func slices_DeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S { +- return slices.DeleteFunc(slice, delFn) +-} +- +-func slices_Contains[S ~[]E, E comparable](slice S, val E) bool { +- return slices.Contains(slice, val) +-} +- +-func slices_Clone[S ~[]E, E any](slice S) S { +- return slices.Clone(slice) +-} +- +-func sync_OnceValue[T any](f func() T) func() T { +- return sync.OnceValue(f) +-} +- +-func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { +- return sync.OnceValues(f) +-} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go +deleted file mode 100644 +index f1e6fe7e..00000000 +--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_generics_unsupported.go ++++ /dev/null +@@ -1,124 +0,0 @@ +-//go:build linux && !go1.21 +- +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package securejoin +- +-import ( +- "sync" +-) +- +-// These are very minimal implementations of functions that appear in Go 1.21's +-// stdlib, included so that we can build on older Go versions. Most are +-// borrowed directly from the stdlib, and a few are modified to be "obviously +-// correct" without needing to copy too many other helpers. +- +-// clearSlice is equivalent to the builtin clear from Go 1.21. +-// Copied from the Go 1.24 stdlib implementation. +-func clearSlice[S ~[]E, E any](slice S) { +- var zero E +- for i := range slice { +- slice[i] = zero +- } +-} +- +-// Copied from the Go 1.24 stdlib implementation. +-func slices_IndexFunc[S ~[]E, E any](s S, f func(E) bool) int { +- for i := range s { +- if f(s[i]) { +- return i +- } +- } +- return -1 +-} +- +-// Copied from the Go 1.24 stdlib implementation. +-func slices_DeleteFunc[S ~[]E, E any](s S, del func(E) bool) S { +- i := slices_IndexFunc(s, del) +- if i == -1 { +- return s +- } +- // Don't start copying elements until we find one to delete. +- for j := i + 1; j < len(s); j++ { +- if v := s[j]; !del(v) { +- s[i] = v +- i++ +- } +- } +- clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC +- return s[:i] +-} +- +-// Similar to the stdlib slices.Contains, except that we don't have +-// slices.Index so we need to use slices.IndexFunc for this non-Func helper. +-func slices_Contains[S ~[]E, E comparable](s S, v E) bool { +- return slices_IndexFunc(s, func(e E) bool { return e == v }) >= 0 +-} +- +-// Copied from the Go 1.24 stdlib implementation. +-func slices_Clone[S ~[]E, E any](s S) S { +- // Preserve nil in case it matters. +- if s == nil { +- return nil +- } +- return append(S([]E{}), s...) +-} +- +-// Copied from the Go 1.24 stdlib implementation. +-func sync_OnceValue[T any](f func() T) func() T { +- var ( +- once sync.Once +- valid bool +- p any +- result T +- ) +- g := func() { +- defer func() { +- p = recover() +- if !valid { +- panic(p) +- } +- }() +- result = f() +- f = nil +- valid = true +- } +- return func() T { +- once.Do(g) +- if !valid { +- panic(p) +- } +- return result +- } +-} +- +-// Copied from the Go 1.24 stdlib implementation. +-func sync_OnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { +- var ( +- once sync.Once +- valid bool +- p any +- r1 T1 +- r2 T2 +- ) +- g := func() { +- defer func() { +- p = recover() +- if !valid { +- panic(p) +- } +- }() +- r1, r2 = f() +- f = nil +- valid = true +- } +- return func() (T1, T2) { +- once.Do(g) +- if !valid { +- panic(p) +- } +- return r1, r2 +- } +-} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go +new file mode 100644 +index 00000000..c69c4da9 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/internal/consts/consts.go +@@ -0,0 +1,15 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ ++// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. ++// Copyright (C) 2017-2025 SUSE LLC. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// Package consts contains the definitions of internal constants used ++// throughout filepath-securejoin. ++package consts ++ ++// MaxSymlinkLimit is the maximum number of symlinks that can be encountered ++// during a single lookup before returning -ELOOP. At time of writing, Linux ++// has an internal limit of 40. ++const MaxSymlinkLimit = 255 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go +index e6634d47..199c1d83 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/join.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/join.go +@@ -1,3 +1,5 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ + // Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. + // Copyright (C) 2017-2025 SUSE LLC. All rights reserved. + // Use of this source code is governed by a BSD-style +@@ -11,9 +13,9 @@ import ( + "path/filepath" + "strings" + "syscall" +-) + +-const maxSymlinkLimit = 255 ++ "github.com/cyphar/filepath-securejoin/internal/consts" ++) + + // IsNotExist tells you if err is an error that implies that either the path + // accessed does not exist (or path components don't exist). This is +@@ -49,12 +51,13 @@ func hasDotDot(path string) bool { + return strings.Contains("/"+path+"/", "/../") + } + +-// SecureJoinVFS joins the two given path components (similar to [filepath.Join]) except +-// that the returned path is guaranteed to be scoped inside the provided root +-// path (when evaluated). Any symbolic links in the path are evaluated with the +-// given root treated as the root of the filesystem, similar to a chroot. The +-// filesystem state is evaluated through the given [VFS] interface (if nil, the +-// standard [os].* family of functions are used). ++// SecureJoinVFS joins the two given path components (similar to ++// [filepath.Join]) except that the returned path is guaranteed to be scoped ++// inside the provided root path (when evaluated). Any symbolic links in the ++// path are evaluated with the given root treated as the root of the ++// filesystem, similar to a chroot. The filesystem state is evaluated through ++// the given [VFS] interface (if nil, the standard [os].* family of functions ++// are used). + // + // Note that the guarantees provided by this function only apply if the path + // components in the returned string are not modified (in other words are not +@@ -78,7 +81,7 @@ func hasDotDot(path string) bool { + // fully resolved using [filepath.EvalSymlinks] or otherwise constructed to + // avoid containing symlink components. Of course, the root also *must not* be + // attacker-controlled. +-func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { ++func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { //nolint:revive // name is part of public API + // The root path must not contain ".." components, otherwise when we join + // the subpath we will end up with a weird path. We could work around this + // in other ways but users shouldn't be giving us non-lexical root paths in +@@ -138,7 +141,7 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { + // It's a symlink, so get its contents and expand it by prepending it + // to the yet-unparsed path. + linksWalked++ +- if linksWalked > maxSymlinkLimit { ++ if linksWalked > consts.MaxSymlinkLimit { + return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP} + } + +diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go +deleted file mode 100644 +index f7a13e69..00000000 +--- a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go ++++ /dev/null +@@ -1,127 +0,0 @@ +-//go:build linux +- +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package securejoin +- +-import ( +- "errors" +- "fmt" +- "os" +- "path/filepath" +- "strings" +- +- "golang.org/x/sys/unix" +-) +- +-var hasOpenat2 = sync_OnceValue(func() bool { +- fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ +- Flags: unix.O_PATH | unix.O_CLOEXEC, +- Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, +- }) +- if err != nil { +- return false +- } +- _ = unix.Close(fd) +- return true +-}) +- +-func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { +- // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve +- // ".." while a mount or rename occurs anywhere on the system. This could +- // happen spuriously, or as the result of an attacker trying to mess with +- // us during lookup. +- // +- // In addition, scoped lookups have a "safety check" at the end of +- // complete_walk which will return -EXDEV if the final path is not in the +- // root. +- return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 && +- (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) +-} +- +-const scopedLookupMaxRetries = 10 +- +-func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) { +- fullPath := dir.Name() + "/" + path +- // Make sure we always set O_CLOEXEC. +- how.Flags |= unix.O_CLOEXEC +- var tries int +- for tries < scopedLookupMaxRetries { +- fd, err := unix.Openat2(int(dir.Fd()), path, how) +- if err != nil { +- if scopedLookupShouldRetry(how, err) { +- // We retry a couple of times to avoid the spurious errors, and +- // if we are being attacked then returning -EAGAIN is the best +- // we can do. +- tries++ +- continue +- } +- return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err} +- } +- // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong. +- // NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise +- // you'll get infinite recursion here. +- if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT { +- if actualPath, err := rawProcSelfFdReadlink(fd); err == nil { +- fullPath = actualPath +- } +- } +- return os.NewFile(uintptr(fd), fullPath), nil +- } +- return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack} +-} +- +-func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) { +- if !partial { +- file, err := openat2File(root, unsafePath, &unix.OpenHow{ +- Flags: unix.O_PATH | unix.O_CLOEXEC, +- Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, +- }) +- return file, "", err +- } +- return partialLookupOpenat2(root, unsafePath) +-} +- +-// partialLookupOpenat2 is an alternative implementation of +-// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a +-// handle to the deepest existing child of the requested path within the root. +-func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) { +- // TODO: Implement this as a git-bisect-like binary search. +- +- unsafePath = filepath.ToSlash(unsafePath) // noop +- endIdx := len(unsafePath) +- var lastError error +- for endIdx > 0 { +- subpath := unsafePath[:endIdx] +- +- handle, err := openat2File(root, subpath, &unix.OpenHow{ +- Flags: unix.O_PATH | unix.O_CLOEXEC, +- Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, +- }) +- if err == nil { +- // Jump over the slash if we have a non-"" remainingPath. +- if endIdx < len(unsafePath) { +- endIdx += 1 +- } +- // We found a subpath! +- return handle, unsafePath[endIdx:], lastError +- } +- if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) { +- // That path doesn't exist, let's try the next directory up. +- endIdx = strings.LastIndexByte(subpath, '/') +- lastError = err +- continue +- } +- return nil, "", fmt.Errorf("open subpath: %w", err) +- } +- // If we couldn't open anything, the whole subpath is missing. Return a +- // copy of the root fd so that the caller doesn't close this one by +- // accident. +- rootClone, err := dupFile(root) +- if err != nil { +- return nil, "", err +- } +- return rootClone, unsafePath, lastError +-} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go +deleted file mode 100644 +index 949fb5f2..00000000 +--- a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go ++++ /dev/null +@@ -1,59 +0,0 @@ +-//go:build linux +- +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package securejoin +- +-import ( +- "os" +- "path/filepath" +- +- "golang.org/x/sys/unix" +-) +- +-func dupFile(f *os.File) (*os.File, error) { +- fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) +- if err != nil { +- return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) +- } +- return os.NewFile(uintptr(fd), f.Name()), nil +-} +- +-func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) { +- // Make sure we always set O_CLOEXEC. +- flags |= unix.O_CLOEXEC +- fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode)) +- if err != nil { +- return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err} +- } +- // All of the paths we use with openatFile(2) are guaranteed to be +- // lexically safe, so we can use path.Join here. +- fullPath := filepath.Join(dir.Name(), path) +- return os.NewFile(uintptr(fd), fullPath), nil +-} +- +-func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) { +- var stat unix.Stat_t +- if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil { +- return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err} +- } +- return stat, nil +-} +- +-func readlinkatFile(dir *os.File, path string) (string, error) { +- size := 4096 +- for { +- linkBuf := make([]byte, size) +- n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf) +- if err != nil { +- return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err} +- } +- if n != size { +- return string(linkBuf[:n]), nil +- } +- // Possible truncation, resize the buffer. +- size *= 2 +- } +-} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md +new file mode 100644 +index 00000000..1be727e7 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/README.md +@@ -0,0 +1,33 @@ ++## `pathrs-lite` ## ++ ++`github.com/cyphar/filepath-securejoin/pathrs-lite` provides a minimal **pure ++Go** implementation of the core bits of [libpathrs][]. This is not intended to ++be a complete replacement for libpathrs, instead it is mainly intended to be ++useful as a transition tool for existing Go projects. ++ ++The long-term plan for `pathrs-lite` is to provide a build tag that will cause ++all `pathrs-lite` operations to call into libpathrs directly, thus removing ++code duplication for projects that wish to make use of libpathrs (and providing ++the ability for software packagers to opt-in to libpathrs support without ++needing to patch upstream). ++ ++[libpathrs]: https://github.com/cyphar/libpathrs ++ ++### License ### ++ ++Most of this subpackage is licensed under the Mozilla Public License (version ++2.0). For more information, see the top-level [COPYING.md][] and ++[LICENSE.MPL-2.0][] files, as well as the individual license headers for each ++file. ++ ++``` ++Copyright (C) 2024-2025 Aleksa Sarai ++Copyright (C) 2024-2025 SUSE LLC ++ ++This Source Code Form is subject to the terms of the Mozilla Public ++License, v. 2.0. If a copy of the MPL was not distributed with this ++file, You can obtain one at https://mozilla.org/MPL/2.0/. ++``` ++ ++[COPYING.md]: ../COPYING.md ++[LICENSE.MPL-2.0]: ../LICENSE.MPL-2.0 +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go +new file mode 100644 +index 00000000..d3d74517 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/doc.go +@@ -0,0 +1,14 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package pathrs (pathrs-lite) is a less complete pure Go implementation of ++// some of the APIs provided by [libpathrs]. ++package pathrs +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go +new file mode 100644 +index 00000000..595dfbf1 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert/assert.go +@@ -0,0 +1,30 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++// Copyright (C) 2025 Aleksa Sarai ++// Copyright (C) 2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package assert provides some basic assertion helpers for Go. ++package assert ++ ++import ( ++ "fmt" ++) ++ ++// Assert panics if the predicate is false with the provided argument. ++func Assert(predicate bool, msg any) { ++ if !predicate { ++ panic(msg) ++ } ++} ++ ++// Assertf panics if the predicate is false and formats the message using the ++// same formatting as [fmt.Printf]. ++// ++// [fmt.Printf]: https://pkg.go.dev/fmt#Printf ++func Assertf(predicate bool, fmtMsg string, args ...any) { ++ Assert(predicate, fmt.Sprintf(fmtMsg, args...)) ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go +new file mode 100644 +index 00000000..c26e440e +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go +@@ -0,0 +1,30 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package internal contains unexported common code for filepath-securejoin. ++package internal ++ ++import ( ++ "errors" ++) ++ ++var ( ++ // ErrPossibleAttack indicates that some attack was detected. ++ ErrPossibleAttack = errors.New("possible attack detected") ++ ++ // ErrPossibleBreakout indicates that during an operation we ended up in a ++ // state that could be a breakout but we detected it. ++ ErrPossibleBreakout = errors.New("possible breakout detected") ++ ++ // ErrInvalidDirectory indicates an unlinked directory. ++ ErrInvalidDirectory = errors.New("wandered into deleted directory") ++ ++ // ErrDeletedInode indicates an unlinked file (non-directory). ++ ErrDeletedInode = errors.New("cannot verify path of deleted inode") ++) +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go +new file mode 100644 +index 00000000..09105491 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/at_linux.go +@@ -0,0 +1,148 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package fd ++ ++import ( ++ "fmt" ++ "os" ++ "path/filepath" ++ "runtime" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++) ++ ++// prepareAtWith returns -EBADF (an invalid fd) if dir is nil, otherwise using ++// the dir.Fd(). We use -EBADF because in filepath-securejoin we generally ++// don't want to allow relative-to-cwd paths. The returned path is an ++// *informational* string that describes a reasonable pathname for the given ++// *at(2) arguments. You must not use the full path for any actual filesystem ++// operations. ++func prepareAt(dir Fd, path string) (dirFd int, unsafeUnmaskedPath string) { ++ dirFd, dirPath := -int(unix.EBADF), "." ++ if dir != nil { ++ dirFd, dirPath = int(dir.Fd()), dir.Name() ++ } ++ if !filepath.IsAbs(path) { ++ // only prepend the dirfd path for relative paths ++ path = dirPath + "/" + path ++ } ++ // NOTE: If path is "." or "", the returned path won't be filepath.Clean, ++ // but that's okay since this path is either used for errors (in which case ++ // a trailing "/" or "/." is important information) or will be ++ // filepath.Clean'd later (in the case of fd.Openat). ++ return dirFd, path ++} ++ ++// Openat is an [Fd]-based wrapper around unix.Openat. ++func Openat(dir Fd, path string, flags int, mode int) (*os.File, error) { //nolint:unparam // wrapper func ++ dirFd, fullPath := prepareAt(dir, path) ++ // Make sure we always set O_CLOEXEC. ++ flags |= unix.O_CLOEXEC ++ fd, err := unix.Openat(dirFd, path, flags, uint32(mode)) ++ if err != nil { ++ return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ // openat is only used with lexically-safe paths so we can use ++ // filepath.Clean here, and also the path itself is not going to be used ++ // for actual path operations. ++ fullPath = filepath.Clean(fullPath) ++ return os.NewFile(uintptr(fd), fullPath), nil ++} ++ ++// Fstatat is an [Fd]-based wrapper around unix.Fstatat. ++func Fstatat(dir Fd, path string, flags int) (unix.Stat_t, error) { ++ dirFd, fullPath := prepareAt(dir, path) ++ var stat unix.Stat_t ++ if err := unix.Fstatat(dirFd, path, &stat, flags); err != nil { ++ return stat, &os.PathError{Op: "fstatat", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ return stat, nil ++} ++ ++// Faccessat is an [Fd]-based wrapper around unix.Faccessat. ++func Faccessat(dir Fd, path string, mode uint32, flags int) error { ++ dirFd, fullPath := prepareAt(dir, path) ++ err := unix.Faccessat(dirFd, path, mode, flags) ++ if err != nil { ++ err = &os.PathError{Op: "faccessat", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ return err ++} ++ ++// Readlinkat is an [Fd]-based wrapper around unix.Readlinkat. ++func Readlinkat(dir Fd, path string) (string, error) { ++ dirFd, fullPath := prepareAt(dir, path) ++ size := 4096 ++ for { ++ linkBuf := make([]byte, size) ++ n, err := unix.Readlinkat(dirFd, path, linkBuf) ++ if err != nil { ++ return "", &os.PathError{Op: "readlinkat", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ if n != size { ++ return string(linkBuf[:n]), nil ++ } ++ // Possible truncation, resize the buffer. ++ size *= 2 ++ } ++} ++ ++const ( ++ // STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to ++ // avoid bumping the requirement for a single constant we can just define it ++ // ourselves. ++ _STATX_MNT_ID_UNIQUE = 0x4000 //nolint:revive // unix.* name ++ ++ // We don't care which mount ID we get. The kernel will give us the unique ++ // one if it is supported. If the kernel doesn't support ++ // STATX_MNT_ID_UNIQUE, the bit is ignored and the returned request mask ++ // will only contain STATX_MNT_ID (if supported). ++ wantStatxMntMask = _STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID ++) ++ ++var hasStatxMountID = gocompat.SyncOnceValue(func() bool { ++ var stx unix.Statx_t ++ err := unix.Statx(-int(unix.EBADF), "/", 0, wantStatxMntMask, &stx) ++ return err == nil && stx.Mask&wantStatxMntMask != 0 ++}) ++ ++// GetMountID gets the mount identifier associated with the fd and path ++// combination. It is effectively a wrapper around fetching ++// STATX_MNT_ID{,_UNIQUE} with unix.Statx, but with a fallback to 0 if the ++// kernel doesn't support the feature. ++func GetMountID(dir Fd, path string) (uint64, error) { ++ // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. ++ if !hasStatxMountID() { ++ return 0, nil ++ } ++ ++ dirFd, fullPath := prepareAt(dir, path) ++ ++ var stx unix.Statx_t ++ err := unix.Statx(dirFd, path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, wantStatxMntMask, &stx) ++ if stx.Mask&wantStatxMntMask == 0 { ++ // It's not a kernel limitation, for some reason we couldn't get a ++ // mount ID. Assume it's some kind of attack. ++ err = fmt.Errorf("could not get mount id: %w", err) ++ } ++ if err != nil { ++ return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ return stx.Mnt_id, nil ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go +new file mode 100644 +index 00000000..d2206a38 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd.go +@@ -0,0 +1,55 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++// Copyright (C) 2025 Aleksa Sarai ++// Copyright (C) 2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package fd provides a drop-in interface-based replacement of [*os.File] that ++// allows for things like noop-Close wrappers to be used. ++// ++// [*os.File]: https://pkg.go.dev/os#File ++package fd ++ ++import ( ++ "io" ++ "os" ++) ++ ++// Fd is an interface that mirrors most of the API of [*os.File], allowing you ++// to create wrappers that can be used in place of [*os.File]. ++// ++// [*os.File]: https://pkg.go.dev/os#File ++type Fd interface { ++ io.Closer ++ Name() string ++ Fd() uintptr ++} ++ ++// Compile-time interface checks. ++var ( ++ _ Fd = (*os.File)(nil) ++ _ Fd = noClose{} ++) ++ ++type noClose struct{ inner Fd } ++ ++func (f noClose) Name() string { return f.inner.Name() } ++func (f noClose) Fd() uintptr { return f.inner.Fd() } ++ ++func (f noClose) Close() error { return nil } ++ ++// NopCloser returns an [*os.File]-like object where the [Close] method is now ++// a no-op. ++// ++// Note that for [*os.File] and similar objects, the Go garbage collector will ++// still call [Close] on the underlying file unless you use ++// [runtime.SetFinalizer] to disable this behaviour. This is up to the caller ++// to do (if necessary). ++// ++// [*os.File]: https://pkg.go.dev/os#File ++// [Close]: https://pkg.go.dev/io#Closer ++// [runtime.SetFinalizer]: https://pkg.go.dev/runtime#SetFinalizer ++func NopCloser(f Fd) Fd { return noClose{inner: f} } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go +new file mode 100644 +index 00000000..e1ec3c0b +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/fd_linux.go +@@ -0,0 +1,78 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package fd ++ ++import ( ++ "fmt" ++ "os" ++ "runtime" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" ++) ++ ++// DupWithName creates a new file descriptor referencing the same underlying ++// file, but with the provided name instead of fd.Name(). ++func DupWithName(fd Fd, name string) (*os.File, error) { ++ fd2, err := unix.FcntlInt(fd.Fd(), unix.F_DUPFD_CLOEXEC, 0) ++ if err != nil { ++ return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) ++ } ++ runtime.KeepAlive(fd) ++ return os.NewFile(uintptr(fd2), name), nil ++} ++ ++// Dup creates a new file description referencing the same underlying file. ++func Dup(fd Fd) (*os.File, error) { ++ return DupWithName(fd, fd.Name()) ++} ++ ++// Fstat is an [Fd]-based wrapper around unix.Fstat. ++func Fstat(fd Fd) (unix.Stat_t, error) { ++ var stat unix.Stat_t ++ if err := unix.Fstat(int(fd.Fd()), &stat); err != nil { ++ return stat, &os.PathError{Op: "fstat", Path: fd.Name(), Err: err} ++ } ++ runtime.KeepAlive(fd) ++ return stat, nil ++} ++ ++// Fstatfs is an [Fd]-based wrapper around unix.Fstatfs. ++func Fstatfs(fd Fd) (unix.Statfs_t, error) { ++ var statfs unix.Statfs_t ++ if err := unix.Fstatfs(int(fd.Fd()), &statfs); err != nil { ++ return statfs, &os.PathError{Op: "fstatfs", Path: fd.Name(), Err: err} ++ } ++ runtime.KeepAlive(fd) ++ return statfs, nil ++} ++ ++// IsDeadInode detects whether the file has been unlinked from a filesystem and ++// is thus a "dead inode" from the kernel's perspective. ++func IsDeadInode(file Fd) error { ++ // If the nlink of a file drops to 0, there is an attacker deleting ++ // directories during our walk, which could result in weird /proc values. ++ // It's better to error out in this case. ++ stat, err := Fstat(file) ++ if err != nil { ++ return fmt.Errorf("check for dead inode: %w", err) ++ } ++ if stat.Nlink == 0 { ++ err := internal.ErrDeletedInode ++ if stat.Mode&unix.S_IFMT == unix.S_IFDIR { ++ err = internal.ErrInvalidDirectory ++ } ++ return fmt.Errorf("%w %q", err, file.Name()) ++ } ++ return nil ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go +new file mode 100644 +index 00000000..77549c7a +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/mount_linux.go +@@ -0,0 +1,54 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package fd ++ ++import ( ++ "os" ++ "runtime" ++ ++ "golang.org/x/sys/unix" ++) ++ ++// Fsopen is an [Fd]-based wrapper around unix.Fsopen. ++func Fsopen(fsName string, flags int) (*os.File, error) { ++ // Make sure we always set O_CLOEXEC. ++ flags |= unix.FSOPEN_CLOEXEC ++ fd, err := unix.Fsopen(fsName, flags) ++ if err != nil { ++ return nil, os.NewSyscallError("fsopen "+fsName, err) ++ } ++ return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil ++} ++ ++// Fsmount is an [Fd]-based wrapper around unix.Fsmount. ++func Fsmount(ctx Fd, flags, mountAttrs int) (*os.File, error) { ++ // Make sure we always set O_CLOEXEC. ++ flags |= unix.FSMOUNT_CLOEXEC ++ fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) ++ if err != nil { ++ return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) ++ } ++ return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil ++} ++ ++// OpenTree is an [Fd]-based wrapper around unix.OpenTree. ++func OpenTree(dir Fd, path string, flags uint) (*os.File, error) { ++ dirFd, fullPath := prepareAt(dir, path) ++ // Make sure we always set O_CLOEXEC. ++ flags |= unix.OPEN_TREE_CLOEXEC ++ fd, err := unix.OpenTree(dirFd, path, flags) ++ if err != nil { ++ return nil, &os.PathError{Op: "open_tree", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ return os.NewFile(uintptr(fd), fullPath), nil ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go +new file mode 100644 +index 00000000..23053083 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go +@@ -0,0 +1,62 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package fd ++ ++import ( ++ "errors" ++ "os" ++ "runtime" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" ++) ++ ++func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { ++ // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve ++ // ".." while a mount or rename occurs anywhere on the system. This could ++ // happen spuriously, or as the result of an attacker trying to mess with ++ // us during lookup. ++ // ++ // In addition, scoped lookups have a "safety check" at the end of ++ // complete_walk which will return -EXDEV if the final path is not in the ++ // root. ++ return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 && ++ (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) ++} ++ ++const scopedLookupMaxRetries = 32 ++ ++// Openat2 is an [Fd]-based wrapper around unix.Openat2, but with some retry ++// logic in case of EAGAIN errors. ++func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { ++ dirFd, fullPath := prepareAt(dir, path) ++ // Make sure we always set O_CLOEXEC. ++ how.Flags |= unix.O_CLOEXEC ++ var tries int ++ for tries < scopedLookupMaxRetries { ++ fd, err := unix.Openat2(dirFd, path, how) ++ if err != nil { ++ if scopedLookupShouldRetry(how, err) { ++ // We retry a couple of times to avoid the spurious errors, and ++ // if we are being attacked then returning -EAGAIN is the best ++ // we can do. ++ tries++ ++ continue ++ } ++ return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err} ++ } ++ runtime.KeepAlive(dir) ++ return os.NewFile(uintptr(fd), fullPath), nil ++ } ++ return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: internal.ErrPossibleAttack} ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md +new file mode 100644 +index 00000000..5dcb6ae0 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/README.md +@@ -0,0 +1,10 @@ ++## gocompat ## ++ ++This directory contains backports of stdlib functions from later Go versions so ++the filepath-securejoin can continue to be used by projects that are stuck with ++Go 1.18 support. Note that often filepath-securejoin is added in security ++patches for old releases, so avoiding the need to bump Go compiler requirements ++is a huge plus to downstreams. ++ ++The source code is licensed under the same license as the Go stdlib. See the ++source files for the precise license information. +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go +new file mode 100644 +index 00000000..4b1803f5 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/doc.go +@@ -0,0 +1,13 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++//go:build linux && go1.20 ++ ++// Copyright (C) 2025 SUSE LLC. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// Package gocompat includes compatibility shims (backported from future Go ++// stdlib versions) to permit filepath-securejoin to be used with older Go ++// versions (often filepath-securejoin is added in security patches for old ++// releases, so avoiding the need to bump Go compiler requirements is a huge ++// plus to downstreams). ++package gocompat +diff --git a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go +similarity index 69% +rename from vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go +index 42452bbf..4a114bd3 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_go120.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_go120.go +@@ -1,18 +1,19 @@ ++// SPDX-License-Identifier: BSD-3-Clause + //go:build linux && go1.20 + + // Copyright (C) 2024 SUSE LLC. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-package securejoin ++package gocompat + + import ( + "fmt" + ) + +-// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except ++// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except + // that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) + // is only guaranteed to give you baseErr. +-func wrapBaseError(baseErr, extraErr error) error { ++func WrapBaseError(baseErr, extraErr error) error { + return fmt.Errorf("%w: %w", extraErr, baseErr) + } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go +similarity index 80% +rename from vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go +index e7adca3f..3061016a 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/gocompat_errors_unsupported.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_errors_unsupported.go +@@ -1,10 +1,12 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ + //go:build linux && !go1.20 + + // Copyright (C) 2024 SUSE LLC. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-package securejoin ++package gocompat + + import ( + "fmt" +@@ -27,10 +29,10 @@ func (err wrappedError) Error() string { + return fmt.Sprintf("%v: %v", err.isError, err.inner) + } + +-// wrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except ++// WrapBaseError is a helper that is equivalent to fmt.Errorf("%w: %w"), except + // that on pre-1.20 Go versions only errors.Is() works properly (errors.Unwrap) + // is only guaranteed to give you baseErr. +-func wrapBaseError(baseErr, extraErr error) error { ++func WrapBaseError(baseErr, extraErr error) error { + return wrappedError{ + inner: baseErr, + isError: extraErr, +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go +new file mode 100644 +index 00000000..d4a93818 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_go121.go +@@ -0,0 +1,53 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ ++//go:build linux && go1.21 ++ ++// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package gocompat ++ ++import ( ++ "cmp" ++ "slices" ++ "sync" ++) ++ ++// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. ++func SlicesDeleteFunc[S ~[]E, E any](slice S, delFn func(E) bool) S { ++ return slices.DeleteFunc(slice, delFn) ++} ++ ++// SlicesContains is equivalent to Go 1.21's slices.Contains. ++func SlicesContains[S ~[]E, E comparable](slice S, val E) bool { ++ return slices.Contains(slice, val) ++} ++ ++// SlicesClone is equivalent to Go 1.21's slices.Clone. ++func SlicesClone[S ~[]E, E any](slice S) S { ++ return slices.Clone(slice) ++} ++ ++// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. ++func SyncOnceValue[T any](f func() T) func() T { ++ return sync.OnceValue(f) ++} ++ ++// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. ++func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { ++ return sync.OnceValues(f) ++} ++ ++// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. ++type CmpOrdered = cmp.Ordered ++ ++// CmpCompare is equivalent to Go 1.21's cmp.Compare. ++func CmpCompare[T CmpOrdered](x, y T) int { ++ return cmp.Compare(x, y) ++} ++ ++// Max2 is equivalent to Go 1.21's max builtin (but only for two parameters). ++func Max2[T CmpOrdered](x, y T) T { ++ return max(x, y) ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go +new file mode 100644 +index 00000000..0ea6218a +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat/gocompat_generics_unsupported.go +@@ -0,0 +1,187 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ ++//go:build linux && !go1.21 ++ ++// Copyright (C) 2021, 2022 The Go Authors. All rights reserved. ++// Copyright (C) 2024-2025 SUSE LLC. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE.BSD file. ++ ++package gocompat ++ ++import ( ++ "sync" ++) ++ ++// These are very minimal implementations of functions that appear in Go 1.21's ++// stdlib, included so that we can build on older Go versions. Most are ++// borrowed directly from the stdlib, and a few are modified to be "obviously ++// correct" without needing to copy too many other helpers. ++ ++// clearSlice is equivalent to Go 1.21's builtin clear. ++// Copied from the Go 1.24 stdlib implementation. ++func clearSlice[S ~[]E, E any](slice S) { ++ var zero E ++ for i := range slice { ++ slice[i] = zero ++ } ++} ++ ++// slicesIndexFunc is equivalent to Go 1.21's slices.IndexFunc. ++// Copied from the Go 1.24 stdlib implementation. ++func slicesIndexFunc[S ~[]E, E any](s S, f func(E) bool) int { ++ for i := range s { ++ if f(s[i]) { ++ return i ++ } ++ } ++ return -1 ++} ++ ++// SlicesDeleteFunc is equivalent to Go 1.21's slices.DeleteFunc. ++// Copied from the Go 1.24 stdlib implementation. ++func SlicesDeleteFunc[S ~[]E, E any](s S, del func(E) bool) S { ++ i := slicesIndexFunc(s, del) ++ if i == -1 { ++ return s ++ } ++ // Don't start copying elements until we find one to delete. ++ for j := i + 1; j < len(s); j++ { ++ if v := s[j]; !del(v) { ++ s[i] = v ++ i++ ++ } ++ } ++ clearSlice(s[i:]) // zero/nil out the obsolete elements, for GC ++ return s[:i] ++} ++ ++// SlicesContains is equivalent to Go 1.21's slices.Contains. ++// Similar to the stdlib slices.Contains, except that we don't have ++// slices.Index so we need to use slices.IndexFunc for this non-Func helper. ++func SlicesContains[S ~[]E, E comparable](s S, v E) bool { ++ return slicesIndexFunc(s, func(e E) bool { return e == v }) >= 0 ++} ++ ++// SlicesClone is equivalent to Go 1.21's slices.Clone. ++// Copied from the Go 1.24 stdlib implementation. ++func SlicesClone[S ~[]E, E any](s S) S { ++ // Preserve nil in case it matters. ++ if s == nil { ++ return nil ++ } ++ return append(S([]E{}), s...) ++} ++ ++// SyncOnceValue is equivalent to Go 1.21's sync.OnceValue. ++// Copied from the Go 1.25 stdlib implementation. ++func SyncOnceValue[T any](f func() T) func() T { ++ // Use a struct so that there's a single heap allocation. ++ d := struct { ++ f func() T ++ once sync.Once ++ valid bool ++ p any ++ result T ++ }{ ++ f: f, ++ } ++ return func() T { ++ d.once.Do(func() { ++ defer func() { ++ d.f = nil ++ d.p = recover() ++ if !d.valid { ++ panic(d.p) ++ } ++ }() ++ d.result = d.f() ++ d.valid = true ++ }) ++ if !d.valid { ++ panic(d.p) ++ } ++ return d.result ++ } ++} ++ ++// SyncOnceValues is equivalent to Go 1.21's sync.OnceValues. ++// Copied from the Go 1.25 stdlib implementation. ++func SyncOnceValues[T1, T2 any](f func() (T1, T2)) func() (T1, T2) { ++ // Use a struct so that there's a single heap allocation. ++ d := struct { ++ f func() (T1, T2) ++ once sync.Once ++ valid bool ++ p any ++ r1 T1 ++ r2 T2 ++ }{ ++ f: f, ++ } ++ return func() (T1, T2) { ++ d.once.Do(func() { ++ defer func() { ++ d.f = nil ++ d.p = recover() ++ if !d.valid { ++ panic(d.p) ++ } ++ }() ++ d.r1, d.r2 = d.f() ++ d.valid = true ++ }) ++ if !d.valid { ++ panic(d.p) ++ } ++ return d.r1, d.r2 ++ } ++} ++ ++// CmpOrdered is equivalent to Go 1.21's cmp.Ordered generic type definition. ++// Copied from the Go 1.25 stdlib implementation. ++type CmpOrdered interface { ++ ~int | ~int8 | ~int16 | ~int32 | ~int64 | ++ ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ++ ~float32 | ~float64 | ++ ~string ++} ++ ++// isNaN reports whether x is a NaN without requiring the math package. ++// This will always return false if T is not floating-point. ++// Copied from the Go 1.25 stdlib implementation. ++func isNaN[T CmpOrdered](x T) bool { ++ return x != x ++} ++ ++// CmpCompare is equivalent to Go 1.21's cmp.Compare. ++// Copied from the Go 1.25 stdlib implementation. ++func CmpCompare[T CmpOrdered](x, y T) int { ++ xNaN := isNaN(x) ++ yNaN := isNaN(y) ++ if xNaN { ++ if yNaN { ++ return 0 ++ } ++ return -1 ++ } ++ if yNaN { ++ return +1 ++ } ++ if x < y { ++ return -1 ++ } ++ if x > y { ++ return +1 ++ } ++ return 0 ++} ++ ++// Max2 is equivalent to Go 1.21's max builtin for two parameters. ++func Max2[T CmpOrdered](x, y T) T { ++ m := x ++ if y > m { ++ m = y ++ } ++ return m ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go +new file mode 100644 +index 00000000..cb6de418 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion/kernel_linux.go +@@ -0,0 +1,123 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ ++// Copyright (C) 2022 The Go Authors. All rights reserved. ++// Copyright (C) 2025 SUSE LLC. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE.BSD file. ++ ++// The parsing logic is very loosely based on the Go stdlib's ++// src/internal/syscall/unix/kernel_version_linux.go but with an API that looks ++// a bit like runc's libcontainer/system/kernelversion. ++// ++// TODO(cyphar): This API has been copied around to a lot of different projects ++// (Docker, containerd, runc, and now filepath-securejoin) -- maybe we should ++// put it in a separate project? ++ ++// Package kernelversion provides a simple mechanism for checking whether the ++// running kernel is at least as new as some baseline kernel version. This is ++// often useful when checking for features that would be too complicated to ++// test support for (or in cases where we know that some kernel features in ++// backport-heavy kernels are broken and need to be avoided). ++package kernelversion ++ ++import ( ++ "bytes" ++ "errors" ++ "fmt" ++ "strconv" ++ "strings" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++) ++ ++// KernelVersion is a numeric representation of the key numerical elements of a ++// kernel version (for instance, "4.1.2-default-1" would be represented as ++// KernelVersion{4, 1, 2}). ++type KernelVersion []uint64 ++ ++func (kver KernelVersion) String() string { ++ var str strings.Builder ++ for idx, elem := range kver { ++ if idx != 0 { ++ _, _ = str.WriteRune('.') ++ } ++ _, _ = str.WriteString(strconv.FormatUint(elem, 10)) ++ } ++ return str.String() ++} ++ ++var errInvalidKernelVersion = errors.New("invalid kernel version") ++ ++// parseKernelVersion parses a string and creates a KernelVersion based on it. ++func parseKernelVersion(kverStr string) (KernelVersion, error) { ++ kver := make(KernelVersion, 1, 3) ++ for idx, ch := range kverStr { ++ if '0' <= ch && ch <= '9' { ++ v := &kver[len(kver)-1] ++ *v = (*v * 10) + uint64(ch-'0') ++ } else { ++ if idx == 0 || kverStr[idx-1] < '0' || '9' < kverStr[idx-1] { ++ // "." must be preceded by a digit while in version section ++ return nil, fmt.Errorf("%w %q: kernel version has dot(s) followed by non-digit in version section", errInvalidKernelVersion, kverStr) ++ } ++ if ch != '.' { ++ break ++ } ++ kver = append(kver, 0) ++ } ++ } ++ if len(kver) < 2 { ++ return nil, fmt.Errorf("%w %q: kernel versions must contain at least two components", errInvalidKernelVersion, kverStr) ++ } ++ return kver, nil ++} ++ ++// getKernelVersion gets the current kernel version. ++var getKernelVersion = gocompat.SyncOnceValues(func() (KernelVersion, error) { ++ var uts unix.Utsname ++ if err := unix.Uname(&uts); err != nil { ++ return nil, err ++ } ++ // Remove the \x00 from the release. ++ release := uts.Release[:] ++ return parseKernelVersion(string(release[:bytes.IndexByte(release, 0)])) ++}) ++ ++// GreaterEqualThan returns true if the the host kernel version is greater than ++// or equal to the provided [KernelVersion]. When doing this comparison, any ++// non-numerical suffixes of the host kernel version are ignored. ++// ++// If the number of components provided is not equal to the number of numerical ++// components of the host kernel version, any missing components are treated as ++// 0. This means that GreaterEqualThan(KernelVersion{4}) will be treated the ++// same as GreaterEqualThan(KernelVersion{4, 0, 0, ..., 0, 0}), and that if the ++// host kernel version is "4" then GreaterEqualThan(KernelVersion{4, 1}) will ++// return false (because the host version will be treated as "4.0"). ++func GreaterEqualThan(wantKver KernelVersion) (bool, error) { ++ hostKver, err := getKernelVersion() ++ if err != nil { ++ return false, err ++ } ++ ++ // Pad out the kernel version lengths to match one another. ++ cmpLen := gocompat.Max2(len(hostKver), len(wantKver)) ++ hostKver = append(hostKver, make(KernelVersion, cmpLen-len(hostKver))...) ++ wantKver = append(wantKver, make(KernelVersion, cmpLen-len(wantKver))...) ++ ++ for i := 0; i < cmpLen; i++ { ++ switch gocompat.CmpCompare(hostKver[i], wantKver[i]) { ++ case -1: ++ // host < want ++ return false, nil ++ case +1: ++ // host > want ++ return true, nil ++ case 0: ++ continue ++ } ++ } ++ // equal version values ++ return true, nil ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go +new file mode 100644 +index 00000000..4635714f +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/doc.go +@@ -0,0 +1,12 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package linux returns information about what features are supported on the ++// running kernel. ++package linux +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go +new file mode 100644 +index 00000000..b29905bf +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/mount_linux.go +@@ -0,0 +1,47 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package linux ++ ++import ( ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion" ++) ++ ++// HasNewMountAPI returns whether the new fsopen(2) mount API is supported on ++// the running kernel. ++var HasNewMountAPI = gocompat.SyncOnceValue(func() bool { ++ // All of the pieces of the new mount API we use (fsopen, fsconfig, ++ // fsmount, open_tree) were added together in Linux 5.2[1,2], so we can ++ // just check for one of the syscalls and the others should also be ++ // available. ++ // ++ // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. ++ // This is equivalent to openat(2), but tells us if open_tree is ++ // available (and thus all of the other basic new mount API syscalls). ++ // open_tree(2) is most light-weight syscall to test here. ++ // ++ // [1]: merge commit 400913252d09 ++ // [2]: ++ fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) ++ if err != nil { ++ return false ++ } ++ _ = unix.Close(fd) ++ ++ // RHEL 8 has a backport of fsopen(2) that appears to have some very ++ // difficult to debug performance pathology. As such, it seems prudent to ++ // simply reject pre-5.2 kernels. ++ isNotBackport, _ := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{5, 2}) ++ return isNotBackport ++}) +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go +new file mode 100644 +index 00000000..399609dc +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux/openat2_linux.go +@@ -0,0 +1,31 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package linux ++ ++import ( ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++) ++ ++// HasOpenat2 returns whether openat2(2) is supported on the running kernel. ++var HasOpenat2 = gocompat.SyncOnceValue(func() bool { ++ fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{ ++ Flags: unix.O_PATH | unix.O_CLOEXEC, ++ Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT, ++ }) ++ if err != nil { ++ return false ++ } ++ _ = unix.Close(fd) ++ return true ++}) +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go +new file mode 100644 +index 00000000..21e0a62e +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_linux.go +@@ -0,0 +1,544 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package procfs provides a safe API for operating on /proc on Linux. Note ++// that this is the *internal* procfs API, mainy needed due to Go's ++// restrictions on cyclic dependencies and its incredibly minimal visibility ++// system without making a separate internal/ package. ++package procfs ++ ++import ( ++ "errors" ++ "fmt" ++ "io" ++ "os" ++ "runtime" ++ "strconv" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" ++) ++ ++// The kernel guarantees that the root inode of a procfs mount has an ++// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO. ++const ( ++ procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC ++ procRootIno = 1 // PROC_ROOT_INO ++) ++ ++// verifyProcHandle checks that the handle is from a procfs filesystem. ++// Contrast this to [verifyProcRoot], which also verifies that the handle is ++// the root of a procfs mount. ++func verifyProcHandle(procHandle fd.Fd) error { ++ if statfs, err := fd.Fstatfs(procHandle); err != nil { ++ return err ++ } else if statfs.Type != procSuperMagic { ++ return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type) ++ } ++ return nil ++} ++ ++// verifyProcRoot verifies that the handle is the root of a procfs filesystem. ++// Contrast this to [verifyProcHandle], which only verifies if the handle is ++// some file on procfs (regardless of what file it is). ++func verifyProcRoot(procRoot fd.Fd) error { ++ if err := verifyProcHandle(procRoot); err != nil { ++ return err ++ } ++ if stat, err := fd.Fstat(procRoot); err != nil { ++ return err ++ } else if stat.Ino != procRootIno { ++ return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino) ++ } ++ return nil ++} ++ ++type procfsFeatures struct { ++ // hasSubsetPid was added in Linux 5.8, along with hidepid=ptraceable (and ++ // string-based hidepid= values). Before this patchset, it was not really ++ // safe to try to modify procfs superblock flags because the superblock was ++ // shared -- so if this feature is not available, **you should not set any ++ // superblock flags**. ++ // ++ // 6814ef2d992a ("proc: add option to mount only a pids subset") ++ // fa10fed30f25 ("proc: allow to mount many instances of proc in one pid namespace") ++ // 24a71ce5c47f ("proc: instantiate only pids that we can ptrace on 'hidepid=4' mount option") ++ // 1c6c4d112e81 ("proc: use human-readable values for hidepid") ++ // 9ff7258575d5 ("Merge branch 'proc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") ++ hasSubsetPid bool ++} ++ ++var getProcfsFeatures = gocompat.SyncOnceValue(func() procfsFeatures { ++ if !linux.HasNewMountAPI() { ++ return procfsFeatures{} ++ } ++ procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) ++ if err != nil { ++ return procfsFeatures{} ++ } ++ defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here ++ ++ return procfsFeatures{ ++ hasSubsetPid: unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") == nil, ++ } ++}) ++ ++func newPrivateProcMount(subset bool) (_ *Handle, Err error) { ++ procfsCtx, err := fd.Fsopen("proc", unix.FSOPEN_CLOEXEC) ++ if err != nil { ++ return nil, err ++ } ++ defer procfsCtx.Close() //nolint:errcheck // close failures aren't critical here ++ ++ if subset && getProcfsFeatures().hasSubsetPid { ++ // Try to configure hidepid=ptraceable,subset=pid if possible, but ++ // ignore errors. ++ _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable") ++ _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") ++ } ++ ++ // Get an actual handle. ++ if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil { ++ return nil, os.NewSyscallError("fsconfig create procfs", err) ++ } ++ // TODO: Output any information from the fscontext log to debug logs. ++ procRoot, err := fd.Fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID) ++ if err != nil { ++ return nil, err ++ } ++ defer func() { ++ if Err != nil { ++ _ = procRoot.Close() ++ } ++ }() ++ return newHandle(procRoot) ++} ++ ++func clonePrivateProcMount() (_ *Handle, Err error) { ++ // Try to make a clone without using AT_RECURSIVE if we can. If this works, ++ // we can be sure there are no over-mounts and so if the root is valid then ++ // we're golden. Otherwise, we have to deal with over-mounts. ++ procRoot, err := fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE) ++ if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procRoot) { ++ procRoot, err = fd.OpenTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE) ++ } ++ if err != nil { ++ return nil, fmt.Errorf("creating a detached procfs clone: %w", err) ++ } ++ defer func() { ++ if Err != nil { ++ _ = procRoot.Close() ++ } ++ }() ++ return newHandle(procRoot) ++} ++ ++func privateProcRoot(subset bool) (*Handle, error) { ++ if !linux.HasNewMountAPI() || hookForceGetProcRootUnsafe() { ++ return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP) ++ } ++ // Try to create a new procfs mount from scratch if we can. This ensures we ++ // can get a procfs mount even if /proc is fake (for whatever reason). ++ procRoot, err := newPrivateProcMount(subset) ++ if err != nil || hookForcePrivateProcRootOpenTree(procRoot) { ++ // Try to clone /proc then... ++ procRoot, err = clonePrivateProcMount() ++ } ++ return procRoot, err ++} ++ ++func unsafeHostProcRoot() (_ *Handle, Err error) { ++ procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return nil, err ++ } ++ defer func() { ++ if Err != nil { ++ _ = procRoot.Close() ++ } ++ }() ++ return newHandle(procRoot) ++} ++ ++// Handle is a wrapper around an *os.File handle to "/proc", which can be used ++// to do further procfs-related operations in a safe way. ++type Handle struct { ++ Inner fd.Fd ++ // Does this handle have subset=pid set? ++ isSubset bool ++} ++ ++func newHandle(procRoot fd.Fd) (*Handle, error) { ++ if err := verifyProcRoot(procRoot); err != nil { ++ // This is only used in methods that ++ _ = procRoot.Close() ++ return nil, err ++ } ++ proc := &Handle{Inner: procRoot} ++ // With subset=pid we can be sure that /proc/uptime will not exist. ++ if err := fd.Faccessat(proc.Inner, "uptime", unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil { ++ proc.isSubset = errors.Is(err, os.ErrNotExist) ++ } ++ return proc, nil ++} ++ ++// Close closes the underlying file for the Handle. ++func (proc *Handle) Close() error { return proc.Inner.Close() } ++ ++var getCachedProcRoot = gocompat.SyncOnceValue(func() *Handle { ++ procRoot, err := getProcRoot(true) ++ if err != nil { ++ return nil // just don't cache if we see an error ++ } ++ if !procRoot.isSubset { ++ return nil // we only cache verified subset=pid handles ++ } ++ ++ // Disarm (*Handle).Close() to stop someone from accidentally closing ++ // the global handle. ++ procRoot.Inner = fd.NopCloser(procRoot.Inner) ++ return procRoot ++}) ++ ++// OpenProcRoot tries to open a "safer" handle to "/proc". ++func OpenProcRoot() (*Handle, error) { ++ if proc := getCachedProcRoot(); proc != nil { ++ return proc, nil ++ } ++ return getProcRoot(true) ++} ++ ++// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or ++// masked paths (but also without "subset=pid"). ++func OpenUnsafeProcRoot() (*Handle, error) { return getProcRoot(false) } ++ ++func getProcRoot(subset bool) (*Handle, error) { ++ proc, err := privateProcRoot(subset) ++ if err != nil { ++ // Fall back to using a /proc handle if making a private mount failed. ++ // If we have openat2, at least we can avoid some kinds of over-mount ++ // attacks, but without openat2 there's not much we can do. ++ proc, err = unsafeHostProcRoot() ++ } ++ return proc, err ++} ++ ++var hasProcThreadSelf = gocompat.SyncOnceValue(func() bool { ++ return unix.Access("/proc/thread-self/", unix.F_OK) == nil ++}) ++ ++var errUnsafeProcfs = errors.New("unsafe procfs detected") ++ ++// lookup is a very minimal wrapper around [procfsLookupInRoot] which is ++// intended to be called from the external API. ++func (proc *Handle) lookup(subpath string) (*os.File, error) { ++ handle, err := procfsLookupInRoot(proc.Inner, subpath) ++ if err != nil { ++ return nil, err ++ } ++ return handle, nil ++} ++ ++// procfsBase is an enum indicating the prefix of a subpath in operations ++// involving [Handle]s. ++type procfsBase string ++ ++const ( ++ // ProcRoot refers to the root of the procfs (i.e., "/proc/"). ++ ProcRoot procfsBase = "/proc" ++ // ProcSelf refers to the current process' subdirectory (i.e., ++ // "/proc/self/"). ++ ProcSelf procfsBase = "/proc/self" ++ // ProcThreadSelf refers to the current thread's subdirectory (i.e., ++ // "/proc/thread-self/"). In multi-threaded programs (i.e., all Go ++ // programs) where one thread has a different CLONE_FS, it is possible for ++ // "/proc/self" to point the wrong thread and so "/proc/thread-self" may be ++ // necessary. Note that on pre-3.17 kernels, "/proc/thread-self" doesn't ++ // exist and so a fallback will be used in that case. ++ ProcThreadSelf procfsBase = "/proc/thread-self" ++ // TODO: Switch to an interface setup so we can have a more type-safe ++ // version of ProcPid and remove the need to worry about invalid string ++ // values. ++) ++ ++// prefix returns a prefix that can be used with the given [Handle]. ++func (base procfsBase) prefix(proc *Handle) (string, error) { ++ switch base { ++ case ProcRoot: ++ return ".", nil ++ case ProcSelf: ++ return "self", nil ++ case ProcThreadSelf: ++ threadSelf := "thread-self" ++ if !hasProcThreadSelf() || hookForceProcSelfTask() { ++ // Pre-3.17 kernels don't have /proc/thread-self, so do it ++ // manually. ++ threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) ++ if err := fd.Faccessat(proc.Inner, threadSelf, unix.F_OK, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() { ++ // In this case, we running in a pid namespace that doesn't ++ // match the /proc mount we have. This can happen inside runc. ++ // ++ // Unfortunately, there is no nice way to get the correct TID ++ // to use here because of the age of the kernel, so we have to ++ // just use /proc/self and hope that it works. ++ threadSelf = "self" ++ } ++ } ++ return threadSelf, nil ++ } ++ return "", fmt.Errorf("invalid procfs base %q", base) ++} ++ ++// ProcThreadSelfCloser is a callback that needs to be called when you are done ++// operating on an [os.File] fetched using [ProcThreadSelf]. ++// ++// [os.File]: https://pkg.go.dev/os#File ++type ProcThreadSelfCloser func() ++ ++// open is the core lookup operation for [Handle]. It returns a handle to ++// "/proc//". If the returned [ProcThreadSelfCloser] is non-nil, ++// you should call it after you are done interacting with the returned handle. ++// ++// In general you should use prefer to use the other helpers, as they remove ++// the need to interact with [procfsBase] and do not return a nil ++// [ProcThreadSelfCloser] for [procfsBase] values other than [ProcThreadSelf] ++// where it is necessary. ++func (proc *Handle) open(base procfsBase, subpath string) (_ *os.File, closer ProcThreadSelfCloser, Err error) { ++ prefix, err := base.prefix(proc) ++ if err != nil { ++ return nil, nil, err ++ } ++ subpath = prefix + "/" + subpath ++ ++ switch base { ++ case ProcRoot: ++ file, err := proc.lookup(subpath) ++ if errors.Is(err, os.ErrNotExist) { ++ // The Handle handle in use might be a subset=pid one, which will ++ // result in spurious errors. In this case, just open a temporary ++ // unmasked procfs handle for this operation. ++ proc, err2 := OpenUnsafeProcRoot() // !subset=pid ++ if err2 != nil { ++ return nil, nil, err ++ } ++ defer proc.Close() //nolint:errcheck // close failures aren't critical here ++ ++ file, err = proc.lookup(subpath) ++ } ++ return file, nil, err ++ ++ case ProcSelf: ++ file, err := proc.lookup(subpath) ++ return file, nil, err ++ ++ case ProcThreadSelf: ++ // We need to lock our thread until the caller is done with the handle ++ // because between getting the handle and using it we could get ++ // interrupted by the Go runtime and hit the case where the underlying ++ // thread is swapped out and the original thread is killed, resulting ++ // in pull-your-hair-out-hard-to-debug issues in the caller. ++ runtime.LockOSThread() ++ defer func() { ++ if Err != nil { ++ runtime.UnlockOSThread() ++ closer = nil ++ } ++ }() ++ ++ file, err := proc.lookup(subpath) ++ return file, runtime.UnlockOSThread, err ++ } ++ // should never be reached ++ return nil, nil, fmt.Errorf("[internal error] invalid procfs base %q", base) ++} ++ ++// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an ++// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). ++// Once finished with the handle, you must call the returned closer function ++// (runtime.UnlockOSThread). You must not pass the returned *os.File to other ++// Go threads or use the handle after calling the closer. ++func (proc *Handle) OpenThreadSelf(subpath string) (_ *os.File, _ ProcThreadSelfCloser, Err error) { ++ return proc.open(ProcThreadSelf, subpath) ++} ++ ++// OpenSelf returns a handle to /proc/self/. ++func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { ++ file, closer, err := proc.open(ProcSelf, subpath) ++ assert.Assert(closer == nil, "closer for ProcSelf must be nil") ++ return file, err ++} ++ ++// OpenRoot returns a handle to /proc/. ++func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { ++ file, closer, err := proc.open(ProcRoot, subpath) ++ assert.Assert(closer == nil, "closer for ProcRoot must be nil") ++ return file, err ++} ++ ++// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). ++// This is mainly intended for usage when operating on other processes. ++func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { ++ return proc.OpenRoot(strconv.Itoa(pid) + "/" + subpath) ++} ++ ++// checkSubpathOvermount checks if the dirfd and path combination is on the ++// same mount as the given root. ++func checkSubpathOvermount(root, dir fd.Fd, path string) error { ++ // Get the mntID of our procfs handle. ++ expectedMountID, err := fd.GetMountID(root, "") ++ if err != nil { ++ return fmt.Errorf("get root mount id: %w", err) ++ } ++ // Get the mntID of the target magic-link. ++ gotMountID, err := fd.GetMountID(dir, path) ++ if err != nil { ++ return fmt.Errorf("get subpath mount id: %w", err) ++ } ++ // As long as the directory mount is alive, even with wrapping mount IDs, ++ // we would expect to see a different mount ID here. (Of course, if we're ++ // using unsafeHostProcRoot() then an attaker could change this after we ++ // did this check.) ++ if expectedMountID != gotMountID { ++ return fmt.Errorf("%w: subpath %s/%s has an overmount obscuring the real path (mount ids do not match %d != %d)", ++ errUnsafeProcfs, dir.Name(), path, expectedMountID, gotMountID) ++ } ++ return nil ++} ++ ++// Readlink performs a readlink operation on "/proc//" in a way ++// that should be free from race attacks. This is most commonly used to get the ++// real path of a file by looking at "/proc/self/fd/$n", with the same safety ++// protections as [Open] (as well as some additional checks against ++// overmounts). ++func (proc *Handle) Readlink(base procfsBase, subpath string) (string, error) { ++ link, closer, err := proc.open(base, subpath) ++ if closer != nil { ++ defer closer() ++ } ++ if err != nil { ++ return "", fmt.Errorf("get safe %s/%s handle: %w", base, subpath, err) ++ } ++ defer link.Close() //nolint:errcheck // close failures aren't critical here ++ ++ // Try to detect if there is a mount on top of the magic-link. This should ++ // be safe in general (a mount on top of the path afterwards would not ++ // affect the handle itself) and will definitely be safe if we are using ++ // privateProcRoot() (at least since Linux 5.12[1], when anonymous mount ++ // namespaces were completely isolated from external mounts including mount ++ // propagation events). ++ // ++ // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts ++ // onto targets that reside on shared mounts"). ++ if err := checkSubpathOvermount(proc.Inner, link, ""); err != nil { ++ return "", fmt.Errorf("check safety of %s/%s magiclink: %w", base, subpath, err) ++ } ++ ++ // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit ++ // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty ++ // relative pathnames"). ++ return fd.Readlinkat(link, "") ++} ++ ++// ProcSelfFdReadlink gets the real path of the given file by looking at ++// readlink(/proc/thread-self/fd/$n). ++// ++// This is just a wrapper around [Handle.Readlink]. ++func ProcSelfFdReadlink(fd fd.Fd) (string, error) { ++ procRoot, err := OpenProcRoot() // subset=pid ++ if err != nil { ++ return "", err ++ } ++ defer procRoot.Close() //nolint:errcheck // close failures aren't critical here ++ ++ fdPath := "fd/" + strconv.Itoa(int(fd.Fd())) ++ return procRoot.Readlink(ProcThreadSelf, fdPath) ++} ++ ++// CheckProcSelfFdPath returns whether the given file handle matches the ++// expected path. (This is inherently racy.) ++func CheckProcSelfFdPath(path string, file fd.Fd) error { ++ if err := fd.IsDeadInode(file); err != nil { ++ return err ++ } ++ actualPath, err := ProcSelfFdReadlink(file) ++ if err != nil { ++ return fmt.Errorf("get path of handle: %w", err) ++ } ++ if actualPath != path { ++ return fmt.Errorf("%w: handle path %q doesn't match expected path %q", internal.ErrPossibleBreakout, actualPath, path) ++ } ++ return nil ++} ++ ++// ReopenFd takes an existing file descriptor and "re-opens" it through ++// /proc/thread-self/fd/. This allows for O_PATH file descriptors to be ++// upgraded to regular file descriptors, as well as changing the open mode of a ++// regular file descriptor. Some filesystems have unique handling of open(2) ++// which make this incredibly useful (such as /dev/ptmx). ++func ReopenFd(handle fd.Fd, flags int) (*os.File, error) { ++ procRoot, err := OpenProcRoot() // subset=pid ++ if err != nil { ++ return nil, err ++ } ++ defer procRoot.Close() //nolint:errcheck // close failures aren't critical here ++ ++ // We can't operate on /proc/thread-self/fd/$n directly when doing a ++ // re-open, so we need to open /proc/thread-self/fd and then open a single ++ // final component. ++ procFdDir, closer, err := procRoot.OpenThreadSelf("fd/") ++ if err != nil { ++ return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err) ++ } ++ defer procFdDir.Close() //nolint:errcheck // close failures aren't critical here ++ defer closer() ++ ++ // Try to detect if there is a mount on top of the magic-link we are about ++ // to open. If we are using unsafeHostProcRoot(), this could change after ++ // we check it (and there's nothing we can do about that) but for ++ // privateProcRoot() this should be guaranteed to be safe (at least since ++ // Linux 5.12[1], when anonymous mount namespaces were completely isolated ++ // from external mounts including mount propagation events). ++ // ++ // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts ++ // onto targets that reside on shared mounts"). ++ fdStr := strconv.Itoa(int(handle.Fd())) ++ if err := checkSubpathOvermount(procRoot.Inner, procFdDir, fdStr); err != nil { ++ return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err) ++ } ++ ++ flags |= unix.O_CLOEXEC ++ // Rather than just wrapping fd.Openat, open-code it so we can copy ++ // handle.Name(). ++ reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0) ++ if err != nil { ++ return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err) ++ } ++ return os.NewFile(uintptr(reopenFd), handle.Name()), nil ++} ++ ++// Test hooks used in the procfs tests to verify that the fallback logic works. ++// See testing_mocks_linux_test.go and procfs_linux_test.go for more details. ++var ( ++ hookForcePrivateProcRootOpenTree = hookDummyFile ++ hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile ++ hookForceGetProcRootUnsafe = hookDummy ++ ++ hookForceProcSelfTask = hookDummy ++ hookForceProcSelf = hookDummy ++) ++ ++func hookDummy() bool { return false } ++func hookDummyFile(_ io.Closer) bool { return false } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go +new file mode 100644 +index 00000000..1ad1f18e +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs/procfs_lookup_linux.go +@@ -0,0 +1,222 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// This code is adapted to be a minimal version of the libpathrs proc resolver ++// . ++// As we only need O_PATH|O_NOFOLLOW support, this is not too much to port. ++ ++package procfs ++ ++import ( ++ "fmt" ++ "os" ++ "path" ++ "path/filepath" ++ "strings" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/internal/consts" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" ++) ++ ++// procfsLookupInRoot is a stripped down version of completeLookupInRoot, ++// entirely designed to support the very small set of features necessary to ++// make procfs handling work. Unlike completeLookupInRoot, we always have ++// O_PATH|O_NOFOLLOW behaviour for trailing symlinks. ++// ++// The main restrictions are: ++// ++// - ".." is not supported (as it requires either os.Root-style replays, ++// which is more bug-prone; or procfs verification, which is not possible ++// due to re-entrancy issues). ++// - Absolute symlinks for the same reason (and all absolute symlinks in ++// procfs are magic-links, which we want to skip anyway). ++// - If statx is supported (checkSymlinkOvermount), any mount-point crossings ++// (which is the main attack of concern against /proc). ++// - Partial lookups are not supported, so the symlink stack is not needed. ++// - Trailing slash special handling is not necessary in most cases (if we ++// operating on procfs, it's usually with programmer-controlled strings ++// that will then be re-opened), so we skip it since whatever re-opens it ++// can deal with it. It's a creature comfort anyway. ++// ++// If the system supports openat2(), this is implemented using equivalent flags ++// (RESOLVE_BENEATH | RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS). ++func procfsLookupInRoot(procRoot fd.Fd, unsafePath string) (Handle *os.File, _ error) { ++ unsafePath = filepath.ToSlash(unsafePath) // noop ++ ++ // Make sure that an empty unsafe path still returns something sane, even ++ // with openat2 (which doesn't have AT_EMPTY_PATH semantics yet). ++ if unsafePath == "" { ++ unsafePath = "." ++ } ++ ++ // This is already checked by getProcRoot, but make sure here since the ++ // core security of this lookup is based on this assumption. ++ if err := verifyProcRoot(procRoot); err != nil { ++ return nil, err ++ } ++ ++ if linux.HasOpenat2() { ++ // We prefer being able to use RESOLVE_NO_XDEV if we can, to be ++ // absolutely sure we are operating on a clean /proc handle that ++ // doesn't have any cheeky overmounts that could trick us (including ++ // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't ++ // strictly needed, but just use it since we have it. ++ // ++ // NOTE: /proc/self is technically a magic-link (the contents of the ++ // symlink are generated dynamically), but it doesn't use ++ // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it. ++ // ++ // TODO: It would be nice to have RESOLVE_NO_DOTDOT, purely for ++ // self-consistency with the backup O_PATH resolver. ++ handle, err := fd.Openat2(procRoot, unsafePath, &unix.OpenHow{ ++ Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC, ++ Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS, ++ }) ++ if err != nil { ++ // TODO: Once we bump the minimum Go version to 1.20, we can use ++ // multiple %w verbs for this wrapping. For now we need to use a ++ // compatibility shim for older Go versions. ++ // err = fmt.Errorf("%w: %w", errUnsafeProcfs, err) ++ return nil, gocompat.WrapBaseError(err, errUnsafeProcfs) ++ } ++ return handle, nil ++ } ++ ++ // To mirror openat2(RESOLVE_BENEATH), we need to return an error if the ++ // path is absolute. ++ if path.IsAbs(unsafePath) { ++ return nil, fmt.Errorf("%w: cannot resolve absolute paths in procfs resolver", internal.ErrPossibleBreakout) ++ } ++ ++ currentDir, err := fd.Dup(procRoot) ++ if err != nil { ++ return nil, fmt.Errorf("clone root fd: %w", err) ++ } ++ defer func() { ++ // If a handle is not returned, close the internal handle. ++ if Handle == nil { ++ _ = currentDir.Close() ++ } ++ }() ++ ++ var ( ++ linksWalked int ++ currentPath string ++ remainingPath = unsafePath ++ ) ++ for remainingPath != "" { ++ // Get the next path component. ++ var part string ++ if i := strings.IndexByte(remainingPath, '/'); i == -1 { ++ part, remainingPath = remainingPath, "" ++ } else { ++ part, remainingPath = remainingPath[:i], remainingPath[i+1:] ++ } ++ if part == "" { ++ // no-op component, but treat it the same as "." ++ part = "." ++ } ++ if part == ".." { ++ // not permitted ++ return nil, fmt.Errorf("%w: cannot walk into '..' in procfs resolver", internal.ErrPossibleBreakout) ++ } ++ ++ // Apply the component lexically to the path we are building. ++ // currentPath does not contain any symlinks, and we are lexically ++ // dealing with a single component, so it's okay to do a filepath.Clean ++ // here. (Not to mention that ".." isn't allowed.) ++ nextPath := path.Join("/", currentPath, part) ++ // If we logically hit the root, just clone the root rather than ++ // opening the part and doing all of the other checks. ++ if nextPath == "/" { ++ // Jump to root. ++ rootClone, err := fd.Dup(procRoot) ++ if err != nil { ++ return nil, fmt.Errorf("clone root fd: %w", err) ++ } ++ _ = currentDir.Close() ++ currentDir = rootClone ++ currentPath = nextPath ++ continue ++ } ++ ++ // Try to open the next component. ++ nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return nil, err ++ } ++ ++ // Make sure we are still on procfs and haven't crossed mounts. ++ if err := verifyProcHandle(nextDir); err != nil { ++ _ = nextDir.Close() ++ return nil, fmt.Errorf("check %q component is on procfs: %w", part, err) ++ } ++ if err := checkSubpathOvermount(procRoot, nextDir, ""); err != nil { ++ _ = nextDir.Close() ++ return nil, fmt.Errorf("check %q component is not overmounted: %w", part, err) ++ } ++ ++ // We are emulating O_PATH|O_NOFOLLOW, so we only need to traverse into ++ // trailing symlinks if we are not the final component. Otherwise we ++ // can just return the currentDir. ++ if remainingPath != "" { ++ st, err := nextDir.Stat() ++ if err != nil { ++ _ = nextDir.Close() ++ return nil, fmt.Errorf("stat component %q: %w", part, err) ++ } ++ ++ if st.Mode()&os.ModeType == os.ModeSymlink { ++ // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See ++ // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and ++ // fstatat() with empty relative pathnames"). ++ linkDest, err := fd.Readlinkat(nextDir, "") ++ // We don't need the handle anymore. ++ _ = nextDir.Close() ++ if err != nil { ++ return nil, err ++ } ++ ++ linksWalked++ ++ if linksWalked > consts.MaxSymlinkLimit { ++ return nil, &os.PathError{Op: "securejoin.procfsLookupInRoot", Path: "/proc/" + unsafePath, Err: unix.ELOOP} ++ } ++ ++ // Update our logical remaining path. ++ remainingPath = linkDest + "/" + remainingPath ++ // Absolute symlinks are probably magiclinks, we reject them. ++ if path.IsAbs(linkDest) { ++ return nil, fmt.Errorf("%w: cannot jump to / in procfs resolver -- possible magiclink", internal.ErrPossibleBreakout) ++ } ++ continue ++ } ++ } ++ ++ // Walk into the next component. ++ _ = currentDir.Close() ++ currentDir = nextDir ++ currentPath = nextPath ++ } ++ ++ // One final sanity-check. ++ if err := verifyProcHandle(currentDir); err != nil { ++ return nil, fmt.Errorf("check final handle is on procfs: %w", err) ++ } ++ if err := checkSubpathOvermount(procRoot, currentDir, ""); err != nil { ++ return nil, fmt.Errorf("check final handle is not overmounted: %w", err) ++ } ++ return currentDir, nil ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go +similarity index 86% +rename from vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go +index be81e498..f47504e6 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/lookup_linux.go +@@ -1,10 +1,15 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ + //go:build linux + +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +-package securejoin ++package pathrs + + import ( + "errors" +@@ -15,6 +20,12 @@ import ( + "strings" + + "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/internal/consts" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" + ) + + type symlinkStackEntry struct { +@@ -112,12 +123,12 @@ func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) erro + return nil + } + // Split the link target and clean up any "" parts. +- linkTargetParts := slices_DeleteFunc( ++ linkTargetParts := gocompat.SlicesDeleteFunc( + strings.Split(linkTarget, "/"), + func(part string) bool { return part == "" || part == "." }) + + // Copy the directory so the caller doesn't close our copy. +- dirCopy, err := dupFile(dir) ++ dirCopy, err := fd.Dup(dir) + if err != nil { + return err + } +@@ -159,11 +170,11 @@ func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) { + // within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing + // component of the requested path, returning a file handle to the final + // existing component and a string containing the remaining path components. +-func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) { ++func partialLookupInRoot(root fd.Fd, unsafePath string) (*os.File, string, error) { + return lookupInRoot(root, unsafePath, true) + } + +-func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) { ++func completeLookupInRoot(root fd.Fd, unsafePath string) (*os.File, error) { + handle, remainingPath, err := lookupInRoot(root, unsafePath, false) + if remainingPath != "" && err == nil { + // should never happen +@@ -174,7 +185,7 @@ func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) { + return handle, err + } + +-func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) { ++func lookupInRoot(root fd.Fd, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) { + unsafePath = filepath.ToSlash(unsafePath) // noop + + // This is very similar to SecureJoin, except that we operate on the +@@ -182,20 +193,20 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + // managed open, along with the remaining path components not opened. + + // Try to use openat2 if possible. +- if hasOpenat2() { ++ if linux.HasOpenat2() { + return lookupOpenat2(root, unsafePath, partial) + } + + // Get the "actual" root path from /proc/self/fd. This is necessary if the + // root is some magic-link like /proc/$pid/root, in which case we want to +- // make sure when we do checkProcSelfFdPath that we are using the correct +- // root path. +- logicalRootPath, err := procSelfFdReadlink(root) ++ // make sure when we do procfs.CheckProcSelfFdPath that we are using the ++ // correct root path. ++ logicalRootPath, err := procfs.ProcSelfFdReadlink(root) + if err != nil { + return nil, "", fmt.Errorf("get real root path: %w", err) + } + +- currentDir, err := dupFile(root) ++ currentDir, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } +@@ -260,7 +271,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err) + } + // Jump to root. +- rootClone, err := dupFile(root) ++ rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } +@@ -271,21 +282,21 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + } + + // Try to open the next component. +- nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) +- switch { +- case err == nil: ++ nextDir, err := fd.Openat(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) ++ switch err { ++ case nil: + st, err := nextDir.Stat() + if err != nil { + _ = nextDir.Close() + return nil, "", fmt.Errorf("stat component %q: %w", part, err) + } + +- switch st.Mode() & os.ModeType { ++ switch st.Mode() & os.ModeType { //nolint:exhaustive // just a glorified if statement + case os.ModeSymlink: + // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See + // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and + // fstatat() with empty relative pathnames"). +- linkDest, err := readlinkatFile(nextDir, "") ++ linkDest, err := fd.Readlinkat(nextDir, "") + // We don't need the handle anymore. + _ = nextDir.Close() + if err != nil { +@@ -293,7 +304,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + } + + linksWalked++ +- if linksWalked > maxSymlinkLimit { ++ if linksWalked > consts.MaxSymlinkLimit { + return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP} + } + +@@ -307,7 +318,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + // Absolute symlinks reset any work we've already done. + if path.IsAbs(linkDest) { + // Jump to root. +- rootClone, err := dupFile(root) ++ rootClone, err := fd.Dup(root) + if err != nil { + return nil, "", fmt.Errorf("clone root fd: %w", err) + } +@@ -335,12 +346,12 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + // rename or mount on the system. + if part == ".." { + // Make sure the root hasn't moved. +- if err := checkProcSelfFdPath(logicalRootPath, root); err != nil { ++ if err := procfs.CheckProcSelfFdPath(logicalRootPath, root); err != nil { + return nil, "", fmt.Errorf("root path moved during lookup: %w", err) + } + // Make sure the path is what we expect. + fullPath := logicalRootPath + nextPath +- if err := checkProcSelfFdPath(fullPath, currentDir); err != nil { ++ if err := procfs.CheckProcSelfFdPath(fullPath, currentDir); err != nil { + return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err) + } + } +@@ -371,7 +382,7 @@ func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.Fi + // context of openat2, a trailing slash and a trailing "/." are completely + // equivalent. + if strings.HasSuffix(unsafePath, "/") { +- nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) ++ nextDir, err := fd.Openat(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + if !partial { + _ = currentDir.Close() +diff --git a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go +similarity index 86% +rename from vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go +index a17ae3b0..f3c62b0d 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/mkdir_linux.go +@@ -1,10 +1,15 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ + //go:build linux + +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +-package securejoin ++package pathrs + + import ( + "errors" +@@ -14,13 +19,14 @@ import ( + "strings" + + "golang.org/x/sys/unix" +-) + +-var ( +- errInvalidMode = errors.New("invalid permission mode") +- errPossibleAttack = errors.New("possible attack detected") ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux" + ) + ++var errInvalidMode = errors.New("invalid permission mode") ++ + // modePermExt is like os.ModePerm except that it also includes the set[ug]id + // and sticky bits. + const modePermExt = os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky +@@ -66,6 +72,8 @@ func toUnixMode(mode os.FileMode) (uint32, error) { + // a brand new lookup of unsafePath (such as with [SecureJoin] or openat2) after + // doing [MkdirAll]. If you intend to open the directory after creating it, you + // should use MkdirAllHandle. ++// ++// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin + func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.File, Err error) { + unixMode, err := toUnixMode(mode) + if err != nil { +@@ -102,7 +110,7 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.F + // + // This is mostly a quality-of-life check, because mkdir will simply fail + // later if the attacker deletes the tree after this check. +- if err := isDeadInode(currentDir); err != nil { ++ if err := fd.IsDeadInode(currentDir); err != nil { + return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err) + } + +@@ -113,13 +121,13 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.F + return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR) + } else if err != nil { + return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err) +- } else { ++ } else { //nolint:revive // indent-error-flow lint doesn't make sense here + _ = currentDir.Close() + currentDir = reopenDir + } + + remainingParts := strings.Split(remainingPath, string(filepath.Separator)) +- if slices_Contains(remainingParts, "..") { ++ if gocompat.SlicesContains(remainingParts, "..") { + // The path contained ".." components after the end of the "real" + // components. We could try to safely resolve ".." here but that would + // add a bunch of extra logic for something that it's not clear even +@@ -150,12 +158,12 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.F + if err := unix.Mkdirat(int(currentDir.Fd()), part, unixMode); err != nil && !errors.Is(err, unix.EEXIST) { + err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err} + // Make the error a bit nicer if the directory is dead. +- if deadErr := isDeadInode(currentDir); deadErr != nil { ++ if deadErr := fd.IsDeadInode(currentDir); deadErr != nil { + // TODO: Once we bump the minimum Go version to 1.20, we can use + // multiple %w verbs for this wrapping. For now we need to use a + // compatibility shim for older Go versions. +- //err = fmt.Errorf("%w (%w)", err, deadErr) +- err = wrapBaseError(err, deadErr) ++ // err = fmt.Errorf("%w (%w)", err, deadErr) ++ err = gocompat.WrapBaseError(err, deadErr) + } + return nil, err + } +@@ -163,13 +171,13 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.F + // Get a handle to the next component. O_DIRECTORY means we don't need + // to use O_PATH. + var nextDir *os.File +- if hasOpenat2() { +- nextDir, err = openat2File(currentDir, part, &unix.OpenHow{ ++ if linux.HasOpenat2() { ++ nextDir, err = openat2(currentDir, part, &unix.OpenHow{ + Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC, + Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV, + }) + } else { +- nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) ++ nextDir, err = fd.Openat(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + } + if err != nil { + return nil, err +@@ -220,12 +228,14 @@ func MkdirAllHandle(root *os.File, unsafePath string, mode os.FileMode) (_ *os.F + // If you plan to open the directory after you have created it or want to use + // an open directory handle as the root, you should use [MkdirAllHandle] instead. + // This function is a wrapper around [MkdirAllHandle]. ++// ++// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin + func MkdirAll(root, unsafePath string, mode os.FileMode) error { + rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return err + } +- defer rootDir.Close() ++ defer rootDir.Close() //nolint:errcheck // close failures aren't critical here + + f, err := MkdirAllHandle(rootDir, unsafePath, mode) + if err != nil { +diff --git a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go +similarity index 56% +rename from vendor/github.com/cyphar/filepath-securejoin/open_linux.go +rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go +index 230be73f..7492d8cf 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/open_linux.go +@@ -1,17 +1,22 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ + //go:build linux + +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +-package securejoin ++package pathrs + + import ( +- "fmt" + "os" +- "strconv" + + "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" + ) + + // OpenatInRoot is equivalent to [OpenInRoot], except that the root is provided +@@ -40,12 +45,14 @@ func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) { + // disconnected TTY that could cause a DoS, or some other issue). In order to + // use the returned handle, you can "upgrade" it to a proper handle using + // [Reopen]. ++// ++// [SecureJoin]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin#SecureJoin + func OpenInRoot(root, unsafePath string) (*os.File, error) { + rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, err + } +- defer rootDir.Close() ++ defer rootDir.Close() //nolint:errcheck // close failures aren't critical here + return OpenatInRoot(rootDir, unsafePath) + } + +@@ -63,41 +70,5 @@ func OpenInRoot(root, unsafePath string) (*os.File, error) { + // + // [CVE-2019-19921]: https://github.com/advisories/GHSA-fh74-hm69-rqjw + func Reopen(handle *os.File, flags int) (*os.File, error) { +- procRoot, err := getProcRoot() +- if err != nil { +- return nil, err +- } +- +- // We can't operate on /proc/thread-self/fd/$n directly when doing a +- // re-open, so we need to open /proc/thread-self/fd and then open a single +- // final component. +- procFdDir, closer, err := procThreadSelf(procRoot, "fd/") +- if err != nil { +- return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err) +- } +- defer procFdDir.Close() +- defer closer() +- +- // Try to detect if there is a mount on top of the magic-link we are about +- // to open. If we are using unsafeHostProcRoot(), this could change after +- // we check it (and there's nothing we can do about that) but for +- // privateProcRoot() this should be guaranteed to be safe (at least since +- // Linux 5.12[1], when anonymous mount namespaces were completely isolated +- // from external mounts including mount propagation events). +- // +- // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts +- // onto targets that reside on shared mounts"). +- fdStr := strconv.Itoa(int(handle.Fd())) +- if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil { +- return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err) +- } +- +- flags |= unix.O_CLOEXEC +- // Rather than just wrapping openatFile, open-code it so we can copy +- // handle.Name(). +- reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0) +- if err != nil { +- return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err) +- } +- return os.NewFile(uintptr(reopenFd), handle.Name()), nil ++ return procfs.ReopenFd(handle, flags) + } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go +new file mode 100644 +index 00000000..937bc435 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/openat2_linux.go +@@ -0,0 +1,101 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++package pathrs ++ ++import ( ++ "errors" ++ "fmt" ++ "os" ++ "path/filepath" ++ "strings" ++ ++ "golang.org/x/sys/unix" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" ++) ++ ++func openat2(dir fd.Fd, path string, how *unix.OpenHow) (*os.File, error) { ++ file, err := fd.Openat2(dir, path, how) ++ if err != nil { ++ return nil, err ++ } ++ // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong. ++ if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT { ++ if actualPath, err := procfs.ProcSelfFdReadlink(file); err == nil { ++ // TODO: Ideally we would not need to dup the fd, but you cannot ++ // easily just swap an *os.File with one from the same fd ++ // (the GC will close the old one, and you cannot clear the ++ // finaliser easily because it is associated with an internal ++ // field of *os.File not *os.File itself). ++ newFile, err := fd.DupWithName(file, actualPath) ++ if err != nil { ++ return nil, err ++ } ++ file = newFile ++ } ++ } ++ return file, nil ++} ++ ++func lookupOpenat2(root fd.Fd, unsafePath string, partial bool) (*os.File, string, error) { ++ if !partial { ++ file, err := openat2(root, unsafePath, &unix.OpenHow{ ++ Flags: unix.O_PATH | unix.O_CLOEXEC, ++ Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, ++ }) ++ return file, "", err ++ } ++ return partialLookupOpenat2(root, unsafePath) ++} ++ ++// partialLookupOpenat2 is an alternative implementation of ++// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a ++// handle to the deepest existing child of the requested path within the root. ++func partialLookupOpenat2(root fd.Fd, unsafePath string) (*os.File, string, error) { ++ // TODO: Implement this as a git-bisect-like binary search. ++ ++ unsafePath = filepath.ToSlash(unsafePath) // noop ++ endIdx := len(unsafePath) ++ var lastError error ++ for endIdx > 0 { ++ subpath := unsafePath[:endIdx] ++ ++ handle, err := openat2(root, subpath, &unix.OpenHow{ ++ Flags: unix.O_PATH | unix.O_CLOEXEC, ++ Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS, ++ }) ++ if err == nil { ++ // Jump over the slash if we have a non-"" remainingPath. ++ if endIdx < len(unsafePath) { ++ endIdx++ ++ } ++ // We found a subpath! ++ return handle, unsafePath[endIdx:], lastError ++ } ++ if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) { ++ // That path doesn't exist, let's try the next directory up. ++ endIdx = strings.LastIndexByte(subpath, '/') ++ lastError = err ++ continue ++ } ++ return nil, "", fmt.Errorf("open subpath: %w", err) ++ } ++ // If we couldn't open anything, the whole subpath is missing. Return a ++ // copy of the root fd so that the caller doesn't close this one by ++ // accident. ++ rootClone, err := fd.Dup(root) ++ if err != nil { ++ return nil, "", err ++ } ++ return rootClone, unsafePath, lastError ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go +new file mode 100644 +index 00000000..ec187a41 +--- /dev/null ++++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs/procfs_linux.go +@@ -0,0 +1,157 @@ ++// SPDX-License-Identifier: MPL-2.0 ++ ++//go:build linux ++ ++// Copyright (C) 2024-2025 Aleksa Sarai ++// Copyright (C) 2024-2025 SUSE LLC ++// ++// This Source Code Form is subject to the terms of the Mozilla Public ++// License, v. 2.0. If a copy of the MPL was not distributed with this ++// file, You can obtain one at https://mozilla.org/MPL/2.0/. ++ ++// Package procfs provides a safe API for operating on /proc on Linux. ++package procfs ++ ++import ( ++ "os" ++ ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs" ++) ++ ++// This package mostly just wraps internal/procfs APIs. This is necessary ++// because we are forced to export some things from internal/procfs in order to ++// avoid some dependency cycle issues, but we don't want users to see or use ++// them. ++ ++// ProcThreadSelfCloser is a callback that needs to be called when you are done ++// operating on an [os.File] fetched using [Handle.OpenThreadSelf]. ++// ++// [os.File]: https://pkg.go.dev/os#File ++type ProcThreadSelfCloser = procfs.ProcThreadSelfCloser ++ ++// Handle is a wrapper around an *os.File handle to "/proc", which can be used ++// to do further procfs-related operations in a safe way. ++type Handle struct { ++ inner *procfs.Handle ++} ++ ++// Close close the resources associated with this [Handle]. Note that if this ++// [Handle] was created with [OpenProcRoot], on some kernels the underlying ++// procfs handle is cached and so this Close operation may be a no-op. However, ++// you should always call Close on [Handle]s once you are done with them. ++func (proc *Handle) Close() error { return proc.inner.Close() } ++ ++// OpenProcRoot tries to open a "safer" handle to "/proc" (i.e., one with the ++// "subset=pid" mount option applied, available from Linux 5.8). Unless you ++// plan to do many [Handle.OpenRoot] operations, users should prefer to use ++// this over [OpenUnsafeProcRoot] which is far more dangerous to keep open. ++// ++// If a safe handle cannot be opened, OpenProcRoot will fall back to opening a ++// regular "/proc" handle. ++// ++// Note that using [Handle.OpenRoot] will still work with handles returned by ++// this function. If a subpath cannot be operated on with a safe "/proc" ++// handle, then [OpenUnsafeProcRoot] will be called internally and a temporary ++// unsafe handle will be used. ++func OpenProcRoot() (*Handle, error) { ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ return &Handle{inner: proc}, nil ++} ++ ++// OpenUnsafeProcRoot opens a handle to "/proc" without any overmounts or ++// masked paths. You must be extremely careful to make sure this handle is ++// never leaked to a container and that you program cannot be tricked into ++// writing to arbitrary paths within it. ++// ++// This is not necessary if you just wish to use [Handle.OpenRoot], as handles ++// returned by [OpenProcRoot] will fall back to using a *temporary* unsafe ++// handle in that case. You should only really use this if you need to do many ++// operations with [Handle.OpenRoot] and the performance overhead of making ++// many procfs handles is an issue. If you do use OpenUnsafeProcRoot, you ++// should make sure to close the handle as soon as possible to avoid ++// known-fd-number attacks. ++func OpenUnsafeProcRoot() (*Handle, error) { ++ proc, err := procfs.OpenUnsafeProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ return &Handle{inner: proc}, nil ++} ++ ++// OpenThreadSelf returns a handle to "/proc/thread-self/" (or an ++// equivalent handle on older kernels where "/proc/thread-self" doesn't exist). ++// Once finished with the handle, you must call the returned closer function ++// ([runtime.UnlockOSThread]). You must not pass the returned *os.File to other ++// Go threads or use the handle after calling the closer. ++// ++// [runtime.UnlockOSThread]: https://pkg.go.dev/runtime#UnlockOSThread ++func (proc *Handle) OpenThreadSelf(subpath string) (*os.File, ProcThreadSelfCloser, error) { ++ return proc.inner.OpenThreadSelf(subpath) ++} ++ ++// OpenSelf returns a handle to /proc/self/. ++// ++// Note that in Go programs with non-homogenous threads, this may result in ++// spurious errors. If you are monkeying around with APIs that are ++// thread-specific, you probably want to use [Handle.OpenThreadSelf] instead ++// which will guarantee that the handle refers to the same thread as the caller ++// is executing on. ++func (proc *Handle) OpenSelf(subpath string) (*os.File, error) { ++ return proc.inner.OpenSelf(subpath) ++} ++ ++// OpenRoot returns a handle to /proc/. ++// ++// You should only use this when you need to operate on global procfs files ++// (such as sysctls in /proc/sys). Unlike [Handle.OpenThreadSelf], ++// [Handle.OpenSelf], and [Handle.OpenPid], the procfs handle used internally ++// for this operation will never use "subset=pid", which makes it a more juicy ++// target for [CVE-2024-21626]-style attacks (and doing something like opening ++// a directory with OpenRoot effectively leaks [OpenUnsafeProcRoot] as long as ++// the file descriptor is open). ++// ++// [CVE-2024-21626]: https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv ++func (proc *Handle) OpenRoot(subpath string) (*os.File, error) { ++ return proc.inner.OpenRoot(subpath) ++} ++ ++// OpenPid returns a handle to /proc/$pid/ (pid can be a pid or tid). ++// This is mainly intended for usage when operating on other processes. ++// ++// You should not use this for the current thread, as special handling is ++// needed for /proc/thread-self (or /proc/self/task/) when dealing with ++// goroutine scheduling -- use [Handle.OpenThreadSelf] instead. ++// ++// To refer to the current thread-group, you should use prefer ++// [Handle.OpenSelf] to passing os.Getpid as the pid argument. ++func (proc *Handle) OpenPid(pid int, subpath string) (*os.File, error) { ++ return proc.inner.OpenPid(pid, subpath) ++} ++ ++// ProcSelfFdReadlink gets the real path of the given file by looking at ++// /proc/self/fd/ with [readlink]. It is effectively just shorthand for ++// something along the lines of: ++// ++// proc, err := procfs.OpenProcRoot() ++// if err != nil { ++// return err ++// } ++// link, err := proc.OpenThreadSelf(fmt.Sprintf("fd/%d", f.Fd())) ++// if err != nil { ++// return err ++// } ++// defer link.Close() ++// var buf [4096]byte ++// n, err := unix.Readlinkat(int(link.Fd()), "", buf[:]) ++// if err != nil { ++// return err ++// } ++// pathname := buf[:n] ++// ++// [readlink]: https://pkg.go.dev/golang.org/x/sys/unix#Readlinkat ++func ProcSelfFdReadlink(f *os.File) (string, error) { ++ return procfs.ProcSelfFdReadlink(f) ++} +diff --git a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go +deleted file mode 100644 +index 809a579c..00000000 +--- a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go ++++ /dev/null +@@ -1,452 +0,0 @@ +-//go:build linux +- +-// Copyright (C) 2024 SUSE LLC. All rights reserved. +-// Use of this source code is governed by a BSD-style +-// license that can be found in the LICENSE file. +- +-package securejoin +- +-import ( +- "errors" +- "fmt" +- "os" +- "runtime" +- "strconv" +- +- "golang.org/x/sys/unix" +-) +- +-func fstat(f *os.File) (unix.Stat_t, error) { +- var stat unix.Stat_t +- if err := unix.Fstat(int(f.Fd()), &stat); err != nil { +- return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err} +- } +- return stat, nil +-} +- +-func fstatfs(f *os.File) (unix.Statfs_t, error) { +- var statfs unix.Statfs_t +- if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil { +- return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err} +- } +- return statfs, nil +-} +- +-// The kernel guarantees that the root inode of a procfs mount has an +-// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO. +-const ( +- procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC +- procRootIno = 1 // PROC_ROOT_INO +-) +- +-func verifyProcRoot(procRoot *os.File) error { +- if statfs, err := fstatfs(procRoot); err != nil { +- return err +- } else if statfs.Type != procSuperMagic { +- return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type) +- } +- if stat, err := fstat(procRoot); err != nil { +- return err +- } else if stat.Ino != procRootIno { +- return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino) +- } +- return nil +-} +- +-var hasNewMountApi = sync_OnceValue(func() bool { +- // All of the pieces of the new mount API we use (fsopen, fsconfig, +- // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can +- // just check for one of the syscalls and the others should also be +- // available. +- // +- // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE. +- // This is equivalent to openat(2), but tells us if open_tree is +- // available (and thus all of the other basic new mount API syscalls). +- // open_tree(2) is most light-weight syscall to test here. +- // +- // [1]: merge commit 400913252d09 +- // [2]: +- fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC) +- if err != nil { +- return false +- } +- _ = unix.Close(fd) +- return true +-}) +- +-func fsopen(fsName string, flags int) (*os.File, error) { +- // Make sure we always set O_CLOEXEC. +- flags |= unix.FSOPEN_CLOEXEC +- fd, err := unix.Fsopen(fsName, flags) +- if err != nil { +- return nil, os.NewSyscallError("fsopen "+fsName, err) +- } +- return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil +-} +- +-func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) { +- // Make sure we always set O_CLOEXEC. +- flags |= unix.FSMOUNT_CLOEXEC +- fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs) +- if err != nil { +- return nil, os.NewSyscallError("fsmount "+ctx.Name(), err) +- } +- return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil +-} +- +-func newPrivateProcMount() (*os.File, error) { +- procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC) +- if err != nil { +- return nil, err +- } +- defer procfsCtx.Close() +- +- // Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors. +- _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable") +- _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid") +- +- // Get an actual handle. +- if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil { +- return nil, os.NewSyscallError("fsconfig create procfs", err) +- } +- return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID) +-} +- +-func openTree(dir *os.File, path string, flags uint) (*os.File, error) { +- dirFd := -int(unix.EBADF) +- dirName := "." +- if dir != nil { +- dirFd = int(dir.Fd()) +- dirName = dir.Name() +- } +- // Make sure we always set O_CLOEXEC. +- flags |= unix.OPEN_TREE_CLOEXEC +- fd, err := unix.OpenTree(dirFd, path, flags) +- if err != nil { +- return nil, &os.PathError{Op: "open_tree", Path: path, Err: err} +- } +- return os.NewFile(uintptr(fd), dirName+"/"+path), nil +-} +- +-func clonePrivateProcMount() (_ *os.File, Err error) { +- // Try to make a clone without using AT_RECURSIVE if we can. If this works, +- // we can be sure there are no over-mounts and so if the root is valid then +- // we're golden. Otherwise, we have to deal with over-mounts. +- procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE) +- if err != nil || hookForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) { +- procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE) +- } +- if err != nil { +- return nil, fmt.Errorf("creating a detached procfs clone: %w", err) +- } +- defer func() { +- if Err != nil { +- _ = procfsHandle.Close() +- } +- }() +- if err := verifyProcRoot(procfsHandle); err != nil { +- return nil, err +- } +- return procfsHandle, nil +-} +- +-func privateProcRoot() (*os.File, error) { +- if !hasNewMountApi() || hookForceGetProcRootUnsafe() { +- return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP) +- } +- // Try to create a new procfs mount from scratch if we can. This ensures we +- // can get a procfs mount even if /proc is fake (for whatever reason). +- procRoot, err := newPrivateProcMount() +- if err != nil || hookForcePrivateProcRootOpenTree(procRoot) { +- // Try to clone /proc then... +- procRoot, err = clonePrivateProcMount() +- } +- return procRoot, err +-} +- +-func unsafeHostProcRoot() (_ *os.File, Err error) { +- procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) +- if err != nil { +- return nil, err +- } +- defer func() { +- if Err != nil { +- _ = procRoot.Close() +- } +- }() +- if err := verifyProcRoot(procRoot); err != nil { +- return nil, err +- } +- return procRoot, nil +-} +- +-func doGetProcRoot() (*os.File, error) { +- procRoot, err := privateProcRoot() +- if err != nil { +- // Fall back to using a /proc handle if making a private mount failed. +- // If we have openat2, at least we can avoid some kinds of over-mount +- // attacks, but without openat2 there's not much we can do. +- procRoot, err = unsafeHostProcRoot() +- } +- return procRoot, err +-} +- +-var getProcRoot = sync_OnceValues(func() (*os.File, error) { +- return doGetProcRoot() +-}) +- +-var hasProcThreadSelf = sync_OnceValue(func() bool { +- return unix.Access("/proc/thread-self/", unix.F_OK) == nil +-}) +- +-var errUnsafeProcfs = errors.New("unsafe procfs detected") +- +-type procThreadSelfCloser func() +- +-// procThreadSelf returns a handle to /proc/thread-self/ (or an +-// equivalent handle on older kernels where /proc/thread-self doesn't exist). +-// Once finished with the handle, you must call the returned closer function +-// (runtime.UnlockOSThread). You must not pass the returned *os.File to other +-// Go threads or use the handle after calling the closer. +-// +-// This is similar to ProcThreadSelf from runc, but with extra hardening +-// applied and using *os.File. +-func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) { +- // We need to lock our thread until the caller is done with the handle +- // because between getting the handle and using it we could get interrupted +- // by the Go runtime and hit the case where the underlying thread is +- // swapped out and the original thread is killed, resulting in +- // pull-your-hair-out-hard-to-debug issues in the caller. +- runtime.LockOSThread() +- defer func() { +- if Err != nil { +- runtime.UnlockOSThread() +- } +- }() +- +- // Figure out what prefix we want to use. +- threadSelf := "thread-self/" +- if !hasProcThreadSelf() || hookForceProcSelfTask() { +- /// Pre-3.17 kernels don't have /proc/thread-self, so do it manually. +- threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/" +- if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || hookForceProcSelf() { +- // In this case, we running in a pid namespace that doesn't match +- // the /proc mount we have. This can happen inside runc. +- // +- // Unfortunately, there is no nice way to get the correct TID to +- // use here because of the age of the kernel, so we have to just +- // use /proc/self and hope that it works. +- threadSelf = "self/" +- } +- } +- +- // Grab the handle. +- var ( +- handle *os.File +- err error +- ) +- if hasOpenat2() { +- // We prefer being able to use RESOLVE_NO_XDEV if we can, to be +- // absolutely sure we are operating on a clean /proc handle that +- // doesn't have any cheeky overmounts that could trick us (including +- // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't +- // strictly needed, but just use it since we have it. +- // +- // NOTE: /proc/self is technically a magic-link (the contents of the +- // symlink are generated dynamically), but it doesn't use +- // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it. +- // +- // NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses +- // procSelfFdReadlink to clean up the returned f.Name() if we use +- // RESOLVE_IN_ROOT (which would lead to an infinite recursion). +- handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{ +- Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC, +- Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS, +- }) +- if err != nil { +- // TODO: Once we bump the minimum Go version to 1.20, we can use +- // multiple %w verbs for this wrapping. For now we need to use a +- // compatibility shim for older Go versions. +- //err = fmt.Errorf("%w: %w", errUnsafeProcfs, err) +- return nil, nil, wrapBaseError(err, errUnsafeProcfs) +- } +- } else { +- handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) +- if err != nil { +- // TODO: Once we bump the minimum Go version to 1.20, we can use +- // multiple %w verbs for this wrapping. For now we need to use a +- // compatibility shim for older Go versions. +- //err = fmt.Errorf("%w: %w", errUnsafeProcfs, err) +- return nil, nil, wrapBaseError(err, errUnsafeProcfs) +- } +- defer func() { +- if Err != nil { +- _ = handle.Close() +- } +- }() +- // We can't detect bind-mounts of different parts of procfs on top of +- // /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we +- // aren't on the wrong filesystem here. +- if statfs, err := fstatfs(handle); err != nil { +- return nil, nil, err +- } else if statfs.Type != procSuperMagic { +- return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type) +- } +- } +- return handle, runtime.UnlockOSThread, nil +-} +- +-// STATX_MNT_ID_UNIQUE is provided in golang.org/x/sys@v0.20.0, but in order to +-// avoid bumping the requirement for a single constant we can just define it +-// ourselves. +-const STATX_MNT_ID_UNIQUE = 0x4000 +- +-var hasStatxMountId = sync_OnceValue(func() bool { +- var ( +- stx unix.Statx_t +- // We don't care which mount ID we get. The kernel will give us the +- // unique one if it is supported. +- wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID +- ) +- err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx) +- return err == nil && stx.Mask&wantStxMask != 0 +-}) +- +-func getMountId(dir *os.File, path string) (uint64, error) { +- // If we don't have statx(STATX_MNT_ID*) support, we can't do anything. +- if !hasStatxMountId() { +- return 0, nil +- } +- +- var ( +- stx unix.Statx_t +- // We don't care which mount ID we get. The kernel will give us the +- // unique one if it is supported. +- wantStxMask uint32 = STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID +- ) +- +- err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx) +- if stx.Mask&wantStxMask == 0 { +- // It's not a kernel limitation, for some reason we couldn't get a +- // mount ID. Assume it's some kind of attack. +- err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs) +- } +- if err != nil { +- return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err} +- } +- return stx.Mnt_id, nil +-} +- +-func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error { +- // Get the mntId of our procfs handle. +- expectedMountId, err := getMountId(procRoot, "") +- if err != nil { +- return err +- } +- // Get the mntId of the target magic-link. +- gotMountId, err := getMountId(dir, path) +- if err != nil { +- return err +- } +- // As long as the directory mount is alive, even with wrapping mount IDs, +- // we would expect to see a different mount ID here. (Of course, if we're +- // using unsafeHostProcRoot() then an attaker could change this after we +- // did this check.) +- if expectedMountId != gotMountId { +- return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId) +- } +- return nil +-} +- +-func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) { +- fdPath := fmt.Sprintf("fd/%d", fd) +- procFdLink, closer, err := procThreadSelf(procRoot, fdPath) +- if err != nil { +- return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err) +- } +- defer procFdLink.Close() +- defer closer() +- +- // Try to detect if there is a mount on top of the magic-link. Since we use the handle directly +- // provide to the closure. If the closure uses the handle directly, this +- // should be safe in general (a mount on top of the path afterwards would +- // not affect the handle itself) and will definitely be safe if we are +- // using privateProcRoot() (at least since Linux 5.12[1], when anonymous +- // mount namespaces were completely isolated from external mounts including +- // mount propagation events). +- // +- // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts +- // onto targets that reside on shared mounts"). +- if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil { +- return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err) +- } +- +- // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit +- // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty +- // relative pathnames"). +- return readlinkatFile(procFdLink, "") +-} +- +-func rawProcSelfFdReadlink(fd int) (string, error) { +- procRoot, err := getProcRoot() +- if err != nil { +- return "", err +- } +- return doRawProcSelfFdReadlink(procRoot, fd) +-} +- +-func procSelfFdReadlink(f *os.File) (string, error) { +- return rawProcSelfFdReadlink(int(f.Fd())) +-} +- +-var ( +- errPossibleBreakout = errors.New("possible breakout detected") +- errInvalidDirectory = errors.New("wandered into deleted directory") +- errDeletedInode = errors.New("cannot verify path of deleted inode") +-) +- +-func isDeadInode(file *os.File) error { +- // If the nlink of a file drops to 0, there is an attacker deleting +- // directories during our walk, which could result in weird /proc values. +- // It's better to error out in this case. +- stat, err := fstat(file) +- if err != nil { +- return fmt.Errorf("check for dead inode: %w", err) +- } +- if stat.Nlink == 0 { +- err := errDeletedInode +- if stat.Mode&unix.S_IFMT == unix.S_IFDIR { +- err = errInvalidDirectory +- } +- return fmt.Errorf("%w %q", err, file.Name()) +- } +- return nil +-} +- +-func checkProcSelfFdPath(path string, file *os.File) error { +- if err := isDeadInode(file); err != nil { +- return err +- } +- actualPath, err := procSelfFdReadlink(file) +- if err != nil { +- return fmt.Errorf("get path of handle: %w", err) +- } +- if actualPath != path { +- return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path) +- } +- return nil +-} +- +-// Test hooks used in the procfs tests to verify that the fallback logic works. +-// See testing_mocks_linux_test.go and procfs_linux_test.go for more details. +-var ( +- hookForcePrivateProcRootOpenTree = hookDummyFile +- hookForcePrivateProcRootOpenTreeAtRecursive = hookDummyFile +- hookForceGetProcRootUnsafe = hookDummy +- +- hookForceProcSelfTask = hookDummy +- hookForceProcSelf = hookDummy +-) +- +-func hookDummy() bool { return false } +-func hookDummyFile(_ *os.File) bool { return false } +diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go +index 36373f8c..4d89a481 100644 +--- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go ++++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go +@@ -1,3 +1,5 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++ + // Copyright (C) 2017-2024 SUSE LLC. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go +index 07e0f77d..884a8b80 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label.go +@@ -6,78 +6,11 @@ import ( + "github.com/opencontainers/selinux/go-selinux" + ) + +-// Deprecated: use selinux.ROFileLabel +-var ROMountLabel = selinux.ROFileLabel +- +-// SetProcessLabel takes a process label and tells the kernel to assign the +-// label to the next program executed by the current process. +-// Deprecated: use selinux.SetExecLabel +-var SetProcessLabel = selinux.SetExecLabel +- +-// ProcessLabel returns the process label that the kernel will assign +-// to the next program executed by the current process. If "" is returned +-// this indicates that the default labeling will happen for the process. +-// Deprecated: use selinux.ExecLabel +-var ProcessLabel = selinux.ExecLabel +- +-// SetSocketLabel takes a process label and tells the kernel to assign the +-// label to the next socket that gets created +-// Deprecated: use selinux.SetSocketLabel +-var SetSocketLabel = selinux.SetSocketLabel +- +-// SocketLabel retrieves the current default socket label setting +-// Deprecated: use selinux.SocketLabel +-var SocketLabel = selinux.SocketLabel +- +-// SetKeyLabel takes a process label and tells the kernel to assign the +-// label to the next kernel keyring that gets created +-// Deprecated: use selinux.SetKeyLabel +-var SetKeyLabel = selinux.SetKeyLabel +- +-// KeyLabel retrieves the current default kernel keyring label setting +-// Deprecated: use selinux.KeyLabel +-var KeyLabel = selinux.KeyLabel +- +-// FileLabel returns the label for specified path +-// Deprecated: use selinux.FileLabel +-var FileLabel = selinux.FileLabel +- +-// PidLabel will return the label of the process running with the specified pid +-// Deprecated: use selinux.PidLabel +-var PidLabel = selinux.PidLabel +- + // Init initialises the labeling system + func Init() { + _ = selinux.GetEnabled() + } + +-// ClearLabels will clear all reserved labels +-// Deprecated: use selinux.ClearLabels +-var ClearLabels = selinux.ClearLabels +- +-// ReserveLabel will record the fact that the MCS label has already been used. +-// This will prevent InitLabels from using the MCS label in a newly created +-// container +-// Deprecated: use selinux.ReserveLabel +-func ReserveLabel(label string) error { +- selinux.ReserveLabel(label) +- return nil +-} +- +-// ReleaseLabel will remove the reservation of the MCS label. +-// This will allow InitLabels to use the MCS label in a newly created +-// containers +-// Deprecated: use selinux.ReleaseLabel +-func ReleaseLabel(label string) error { +- selinux.ReleaseLabel(label) +- return nil +-} +- +-// DupSecOpt takes a process label and returns security options that +-// can be used to set duplicate labels on future container processes +-// Deprecated: use selinux.DupSecOpt +-var DupSecOpt = selinux.DupSecOpt +- + // FormatMountLabel returns a string to be used by the mount command. Using + // the SELinux `context` mount option. Changing labels of files on mount + // points with this option can never be changed. +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go +index f61a5601..95f29e21 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_linux.go +@@ -18,7 +18,7 @@ var validOptions = map[string]bool{ + "level": true, + } + +-var ErrIncompatibleLabel = errors.New("Bad SELinux option z and Z can not be used together") ++var ErrIncompatibleLabel = errors.New("bad SELinux option: z and Z can not be used together") + + // InitLabels returns the process label and file labels to be used within + // the container. A list of options can be passed into this function to alter +@@ -52,11 +52,11 @@ func InitLabels(options []string) (plabel string, mlabel string, retErr error) { + return "", selinux.PrivContainerMountLabel(), nil + } + if i := strings.Index(opt, ":"); i == -1 { +- return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) ++ return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt) + } + con := strings.SplitN(opt, ":", 2) + if !validOptions[con[0]] { +- return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) ++ return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0]) + } + if con[0] == "filetype" { + mcon["type"] = con[1] +@@ -79,12 +79,6 @@ func InitLabels(options []string) (plabel string, mlabel string, retErr error) { + return processLabel, mountLabel, nil + } + +-// Deprecated: The GenLabels function is only to be used during the transition +-// to the official API. Use InitLabels(strings.Fields(options)) instead. +-func GenLabels(options string) (string, string, error) { +- return InitLabels(strings.Fields(options)) +-} +- + // SetFileLabel modifies the "path" label to the specified file label + func SetFileLabel(path string, fileLabel string) error { + if !selinux.GetEnabled() || fileLabel == "" { +@@ -120,17 +114,9 @@ func Relabel(path string, fileLabel string, shared bool) error { + c["level"] = "s0" + fileLabel = c.Get() + } +- if err := selinux.Chcon(path, fileLabel, true); err != nil { +- return err +- } +- return nil ++ return selinux.Chcon(path, fileLabel, true) + } + +-// DisableSecOpt returns a security opt that can disable labeling +-// support for future container processes +-// Deprecated: use selinux.DisableSecOpt +-var DisableSecOpt = selinux.DisableSecOpt +- + // Validate checks that the label does not include unexpected options + func Validate(label string) error { + if strings.Contains(label, "z") && strings.Contains(label, "Z") { +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go +index f21c80c5..7a54afc5 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/label/label_stub.go +@@ -6,25 +6,19 @@ package label + // InitLabels returns the process label and file labels to be used within + // the container. A list of options can be passed into this function to alter + // the labels. +-func InitLabels(options []string) (string, string, error) { ++func InitLabels([]string) (string, string, error) { + return "", "", nil + } + +-// Deprecated: The GenLabels function is only to be used during the transition +-// to the official API. Use InitLabels(strings.Fields(options)) instead. +-func GenLabels(options string) (string, string, error) { +- return "", "", nil +-} +- +-func SetFileLabel(path string, fileLabel string) error { ++func SetFileLabel(string, string) error { + return nil + } + +-func SetFileCreateLabel(fileLabel string) error { ++func SetFileCreateLabel(string) error { + return nil + } + +-func Relabel(path string, fileLabel string, shared bool) error { ++func Relabel(string, string, bool) error { + return nil + } + +@@ -35,16 +29,16 @@ func DisableSecOpt() []string { + } + + // Validate checks that the label does not include unexpected options +-func Validate(label string) error { ++func Validate(string) error { + return nil + } + + // RelabelNeeded checks whether the user requested a relabel +-func RelabelNeeded(label string) bool { ++func RelabelNeeded(string) bool { + return false + } + + // IsShared checks that the label includes a "shared" mark +-func IsShared(label string) bool { ++func IsShared(string) bool { + return false + } +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go +index af058b84..15150d47 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go +@@ -41,6 +41,10 @@ var ( + // ErrVerifierNil is returned when a context verifier function is nil. + ErrVerifierNil = errors.New("verifier function is nil") + ++ // ErrNotTGLeader is returned by [SetKeyLabel] if the calling thread ++ // is not the thread group leader. ++ ErrNotTGLeader = errors.New("calling thread is not the thread group leader") ++ + // CategoryRange allows the upper bound on the category range to be adjusted + CategoryRange = DefaultCategoryRange + +@@ -149,7 +153,7 @@ func CalculateGlbLub(sourceRange, targetRange string) (string, error) { + // of the program is finished to guarantee another goroutine does not migrate to the current + // thread before execution is complete. + func SetExecLabel(label string) error { +- return writeCon(attrPath("exec"), label) ++ return writeConThreadSelf("attr/exec", label) + } + + // SetTaskLabel sets the SELinux label for the current thread, or an error. +@@ -157,7 +161,7 @@ func SetExecLabel(label string) error { + // be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() to guarantee + // the current thread does not run in a new mislabeled thread. + func SetTaskLabel(label string) error { +- return writeCon(attrPath("current"), label) ++ return writeConThreadSelf("attr/current", label) + } + + // SetSocketLabel takes a process label and tells the kernel to assign the +@@ -166,12 +170,12 @@ func SetTaskLabel(label string) error { + // the socket is created to guarantee another goroutine does not migrate + // to the current thread before execution is complete. + func SetSocketLabel(label string) error { +- return writeCon(attrPath("sockcreate"), label) ++ return writeConThreadSelf("attr/sockcreate", label) + } + + // SocketLabel retrieves the current socket label setting + func SocketLabel() (string, error) { +- return readCon(attrPath("sockcreate")) ++ return readConThreadSelf("attr/sockcreate") + } + + // PeerLabel retrieves the label of the client on the other side of a socket +@@ -180,17 +184,21 @@ func PeerLabel(fd uintptr) (string, error) { + } + + // SetKeyLabel takes a process label and tells the kernel to assign the +-// label to the next kernel keyring that gets created. Calls to SetKeyLabel +-// should be wrapped in runtime.LockOSThread()/runtime.UnlockOSThread() until +-// the kernel keyring is created to guarantee another goroutine does not migrate +-// to the current thread before execution is complete. ++// label to the next kernel keyring that gets created. ++// ++// Calls to SetKeyLabel should be wrapped in ++// runtime.LockOSThread()/runtime.UnlockOSThread() until the kernel keyring is ++// created to guarantee another goroutine does not migrate to the current ++// thread before execution is complete. ++// ++// Only the thread group leader can set key label. + func SetKeyLabel(label string) error { + return setKeyLabel(label) + } + + // KeyLabel retrieves the current kernel keyring label setting + func KeyLabel() (string, error) { +- return readCon("/proc/self/attr/keycreate") ++ return keyLabel() + } + + // Get returns the Context as a string +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go +index f1e95977..70392d98 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_linux.go +@@ -17,8 +17,11 @@ import ( + "strings" + "sync" + +- "github.com/opencontainers/selinux/pkg/pwalkdir" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite" ++ "github.com/cyphar/filepath-securejoin/pathrs-lite/procfs" + "golang.org/x/sys/unix" ++ ++ "github.com/opencontainers/selinux/pkg/pwalkdir" + ) + + const ( +@@ -45,7 +48,7 @@ type selinuxState struct { + + type level struct { + cats *big.Int +- sens uint ++ sens int + } + + type mlsRange struct { +@@ -73,10 +76,6 @@ var ( + mcsList: make(map[string]bool), + } + +- // for attrPath() +- attrPathOnce sync.Once +- haveThreadSelf bool +- + // for policyRoot() + policyRootOnce sync.Once + policyRootVal string +@@ -132,12 +131,13 @@ func verifySELinuxfsMount(mnt string) bool { + if err == nil { + break + } +- if err == unix.EAGAIN || err == unix.EINTR { //nolint:errorlint // unix errors are bare ++ if err == unix.EAGAIN || err == unix.EINTR { + continue + } + return false + } + ++ //#nosec G115 -- there is no overflow here. + if uint32(buf.Type) != uint32(unix.SELINUX_MAGIC) { + return false + } +@@ -255,48 +255,187 @@ func readConfig(target string) string { + return "" + } + +-func isProcHandle(fh *os.File) error { +- var buf unix.Statfs_t ++func readConFd(in *os.File) (string, error) { ++ data, err := io.ReadAll(in) ++ if err != nil { ++ return "", err ++ } ++ return string(bytes.TrimSuffix(data, []byte{0})), nil ++} + +- for { +- err := unix.Fstatfs(int(fh.Fd()), &buf) +- if err == nil { +- break +- } +- if err != unix.EINTR { //nolint:errorlint // unix errors are bare +- return &os.PathError{Op: "fstatfs", Path: fh.Name(), Err: err} +- } ++func writeConFd(out *os.File, val string) error { ++ var err error ++ if val != "" { ++ _, err = out.Write([]byte(val)) ++ } else { ++ _, err = out.Write(nil) + } +- if buf.Type != unix.PROC_SUPER_MAGIC { +- return fmt.Errorf("file %q is not on procfs", fh.Name()) ++ return err ++} ++ ++// openProcThreadSelf is a small wrapper around [OpenThreadSelf] and ++// [pathrs.Reopen] to make "one-shot opens" slightly more ergonomic. The ++// provided mode must be os.O_* flags to indicate what mode the returned file ++// should be opened with (flags like os.O_CREAT and os.O_EXCL are not ++// supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/thread-self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenThreadSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenThreadSelf ++func openProcThreadSelf(subpath string, mode int) (*os.File, procfs.ProcThreadSelfCloser, error) { ++ if subpath == "" { ++ return nil, nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, nil, err + } ++ defer proc.Close() + +- return nil +-} ++ handle, closer, err := proc.OpenThreadSelf(subpath) ++ if err != nil { ++ return nil, nil, fmt.Errorf("open /proc/thread-self/%s handle: %w", subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle + +-func readCon(fpath string) (string, error) { +- if fpath == "" { +- return "", ErrEmptyPath ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ closer() ++ return nil, nil, fmt.Errorf("reopen /proc/thread-self/%s handle (%#x): %w", subpath, mode, err) + } ++ return file, closer, nil ++} + +- in, err := os.Open(fpath) ++// Read the contents of /proc/thread-self/. ++func readConThreadSelf(fpath string) (string, error) { ++ in, closer, err := openProcThreadSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) + if err != nil { + return "", err + } ++ defer closer() + defer in.Close() + +- if err := isProcHandle(in); err != nil { ++ return readConFd(in) ++} ++ ++// Write to /proc/thread-self/. ++func writeConThreadSelf(fpath, val string) error { ++ if val == "" { ++ if !getEnabled() { ++ return nil ++ } ++ } ++ ++ out, closer, err := openProcThreadSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return err ++ } ++ defer closer() ++ defer out.Close() ++ ++ return writeConFd(out, val) ++} ++ ++// openProcSelf is a small wrapper around [OpenSelf] and [pathrs.Reopen] to ++// make "one-shot opens" slightly more ergonomic. The provided mode must be ++// os.O_* flags to indicate what mode the returned file should be opened with ++// (flags like os.O_CREAT and os.O_EXCL are not supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenSelf]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenSelf ++func openProcSelf(subpath string, mode int) (*os.File, error) { ++ if subpath == "" { ++ return nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ ++ handle, err := proc.OpenSelf(subpath) ++ if err != nil { ++ return nil, fmt.Errorf("open /proc/self/%s handle: %w", subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle ++ ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ return nil, fmt.Errorf("reopen /proc/self/%s handle (%#x): %w", subpath, mode, err) ++ } ++ return file, nil ++} ++ ++// Read the contents of /proc/self/. ++func readConSelf(fpath string) (string, error) { ++ in, err := openProcSelf(fpath, os.O_RDONLY|unix.O_CLOEXEC) ++ if err != nil { + return "", err + } ++ defer in.Close() ++ + return readConFd(in) + } + +-func readConFd(in *os.File) (string, error) { +- data, err := io.ReadAll(in) ++// Write to /proc/self/. ++func writeConSelf(fpath, val string) error { ++ if val == "" { ++ if !getEnabled() { ++ return nil ++ } ++ } ++ ++ out, err := openProcSelf(fpath, os.O_WRONLY|unix.O_CLOEXEC) + if err != nil { +- return "", err ++ return err + } +- return string(bytes.TrimSuffix(data, []byte{0})), nil ++ defer out.Close() ++ ++ return writeConFd(out, val) ++} ++ ++// openProcPid is a small wrapper around [OpenPid] and [pathrs.Reopen] to make ++// "one-shot opens" slightly more ergonomic. The provided mode must be os.O_* ++// flags to indicate what mode the returned file should be opened with (flags ++// like os.O_CREAT and os.O_EXCL are not supported). ++// ++// If no error occurred, the returned handle is guaranteed to be exactly ++// /proc/self/ with no tricky mounts or symlinks causing you to ++// operate on an unexpected path (with some caveats on pre-openat2 or ++// pre-fsopen kernels). ++// ++// [OpenPid]: https://pkg.go.dev/github.com/cyphar/filepath-securejoin/pathrs-lite/procfs#Handle.OpenPid ++func openProcPid(pid int, subpath string, mode int) (*os.File, error) { ++ if subpath == "" { ++ return nil, ErrEmptyPath ++ } ++ ++ proc, err := procfs.OpenProcRoot() ++ if err != nil { ++ return nil, err ++ } ++ defer proc.Close() ++ ++ handle, err := proc.OpenPid(pid, subpath) ++ if err != nil { ++ return nil, fmt.Errorf("open /proc/%d/%s handle: %w", pid, subpath, err) ++ } ++ defer handle.Close() // we will return a re-opened handle ++ ++ file, err := pathrs.Reopen(handle, mode) ++ if err != nil { ++ return nil, fmt.Errorf("reopen /proc/%d/%s handle (%#x): %w", pid, subpath, mode, err) ++ } ++ return file, nil + } + + // classIndex returns the int index for an object class in the loaded policy, +@@ -328,8 +467,8 @@ func lSetFileLabel(fpath string, label string) error { + if err == nil { + break + } +- if err != unix.EINTR { //nolint:errorlint // unix errors are bare +- return &os.PathError{Op: "lsetxattr", Path: fpath, Err: err} ++ if err != unix.EINTR { ++ return &os.PathError{Op: fmt.Sprintf("lsetxattr(label=%s)", label), Path: fpath, Err: err} + } + } + +@@ -347,8 +486,8 @@ func setFileLabel(fpath string, label string) error { + if err == nil { + break + } +- if err != unix.EINTR { //nolint:errorlint // unix errors are bare +- return &os.PathError{Op: "setxattr", Path: fpath, Err: err} ++ if err != unix.EINTR { ++ return &os.PathError{Op: fmt.Sprintf("setxattr(label=%s)", label), Path: fpath, Err: err} + } + } + +@@ -392,78 +531,34 @@ func lFileLabel(fpath string) (string, error) { + } + + func setFSCreateLabel(label string) error { +- return writeCon(attrPath("fscreate"), label) ++ return writeConThreadSelf("attr/fscreate", label) + } + + // fsCreateLabel returns the default label the kernel which the kernel is using + // for file system objects created by this task. "" indicates default. + func fsCreateLabel() (string, error) { +- return readCon(attrPath("fscreate")) ++ return readConThreadSelf("attr/fscreate") + } + + // currentLabel returns the SELinux label of the current process thread, or an error. + func currentLabel() (string, error) { +- return readCon(attrPath("current")) ++ return readConThreadSelf("attr/current") + } + + // pidLabel returns the SELinux label of the given pid, or an error. + func pidLabel(pid int) (string, error) { +- return readCon(fmt.Sprintf("/proc/%d/attr/current", pid)) ++ it, err := openProcPid(pid, "attr/current", os.O_RDONLY|unix.O_CLOEXEC) ++ if err != nil { ++ return "", nil ++ } ++ defer it.Close() ++ return readConFd(it) + } + + // ExecLabel returns the SELinux label that the kernel will use for any programs + // that are executed by the current process thread, or an error. + func execLabel() (string, error) { +- return readCon(attrPath("exec")) +-} +- +-func writeCon(fpath, val string) error { +- if fpath == "" { +- return ErrEmptyPath +- } +- if val == "" { +- if !getEnabled() { +- return nil +- } +- } +- +- out, err := os.OpenFile(fpath, os.O_WRONLY, 0) +- if err != nil { +- return err +- } +- defer out.Close() +- +- if err := isProcHandle(out); err != nil { +- return err +- } +- +- if val != "" { +- _, err = out.Write([]byte(val)) +- } else { +- _, err = out.Write(nil) +- } +- if err != nil { +- return err +- } +- return nil +-} +- +-func attrPath(attr string) string { +- // Linux >= 3.17 provides this +- const threadSelfPrefix = "/proc/thread-self/attr" +- +- attrPathOnce.Do(func() { +- st, err := os.Stat(threadSelfPrefix) +- if err == nil && st.Mode().IsDir() { +- haveThreadSelf = true +- } +- }) +- +- if haveThreadSelf { +- return filepath.Join(threadSelfPrefix, attr) +- } +- +- return filepath.Join("/proc/self/task", strconv.Itoa(unix.Gettid()), "attr", attr) ++ return readConThreadSelf("exec") + } + + // canonicalizeContext takes a context string and writes it to the kernel +@@ -501,14 +596,14 @@ func catsToBitset(cats string) (*big.Int, error) { + return nil, err + } + for i := catstart; i <= catend; i++ { +- bitset.SetBit(bitset, int(i), 1) ++ bitset.SetBit(bitset, i, 1) + } + } else { + cat, err := parseLevelItem(ranges[0], category) + if err != nil { + return nil, err + } +- bitset.SetBit(bitset, int(cat), 1) ++ bitset.SetBit(bitset, cat, 1) + } + } + +@@ -516,16 +611,17 @@ func catsToBitset(cats string) (*big.Int, error) { + } + + // parseLevelItem parses and verifies that a sensitivity or category are valid +-func parseLevelItem(s string, sep levelItem) (uint, error) { ++func parseLevelItem(s string, sep levelItem) (int, error) { + if len(s) < minSensLen || levelItem(s[0]) != sep { + return 0, ErrLevelSyntax + } +- val, err := strconv.ParseUint(s[1:], 10, 32) ++ const bitSize = 31 // Make sure the result fits into signed int32. ++ val, err := strconv.ParseUint(s[1:], 10, bitSize) + if err != nil { + return 0, err + } + +- return uint(val), nil ++ return int(val), nil + } + + // parseLevel fills a level from a string that contains +@@ -582,7 +678,8 @@ func bitsetToStr(c *big.Int) string { + var str string + + length := 0 +- for i := int(c.TrailingZeroBits()); i < c.BitLen(); i++ { ++ i0 := int(c.TrailingZeroBits()) //#nosec G115 -- don't expect TralingZeroBits to return values with highest bit set. ++ for i := i0; i < c.BitLen(); i++ { + if c.Bit(i) == 0 { + continue + } +@@ -622,7 +719,7 @@ func (l *level) equal(l2 *level) bool { + + // String returns an mlsRange as a string. + func (m mlsRange) String() string { +- low := "s" + strconv.Itoa(int(m.low.sens)) ++ low := "s" + strconv.Itoa(m.low.sens) + if m.low.cats != nil && m.low.cats.BitLen() > 0 { + low += ":" + bitsetToStr(m.low.cats) + } +@@ -631,7 +728,7 @@ func (m mlsRange) String() string { + return low + } + +- high := "s" + strconv.Itoa(int(m.high.sens)) ++ high := "s" + strconv.Itoa(m.high.sens) + if m.high.cats != nil && m.high.cats.BitLen() > 0 { + high += ":" + bitsetToStr(m.high.cats) + } +@@ -639,14 +736,16 @@ func (m mlsRange) String() string { + return low + "-" + high + } + +-func max(a, b uint) uint { ++// TODO: remove these in favor of built-in min/max ++// once we stop supporting Go < 1.21. ++func maxInt(a, b int) int { + if a > b { + return a + } + return b + } + +-func min(a, b uint) uint { ++func minInt(a, b int) int { + if a < b { + return a + } +@@ -675,10 +774,10 @@ func calculateGlbLub(sourceRange, targetRange string) (string, error) { + outrange := &mlsRange{low: &level{}, high: &level{}} + + /* take the greatest of the low */ +- outrange.low.sens = max(s.low.sens, t.low.sens) ++ outrange.low.sens = maxInt(s.low.sens, t.low.sens) + + /* take the least of the high */ +- outrange.high.sens = min(s.high.sens, t.high.sens) ++ outrange.high.sens = minInt(s.high.sens, t.high.sens) + + /* find the intersecting categories */ + if s.low.cats != nil && t.low.cats != nil { +@@ -723,16 +822,29 @@ func peerLabel(fd uintptr) (string, error) { + // setKeyLabel takes a process label and tells the kernel to assign the + // label to the next kernel keyring that gets created + func setKeyLabel(label string) error { +- err := writeCon("/proc/self/attr/keycreate", label) ++ // Rather than using /proc/thread-self, we want to use /proc/self to ++ // operate on the thread-group leader. ++ err := writeConSelf("attr/keycreate", label) + if errors.Is(err, os.ErrNotExist) { + return nil + } + if label == "" && errors.Is(err, os.ErrPermission) { + return nil + } ++ if errors.Is(err, unix.EACCES) && unix.Getpid() != unix.Gettid() { ++ return ErrNotTGLeader ++ } + return err + } + ++// KeyLabel retrieves the current kernel keyring label setting for this ++// thread-group. ++func keyLabel() (string, error) { ++ // Rather than using /proc/thread-self, we want to use /proc/self to ++ // operate on the thread-group leader. ++ return readConSelf("attr/keycreate") ++} ++ + // get returns the Context as a string + func (c Context) get() string { + if l := c["level"]; l != "" { +@@ -808,8 +920,7 @@ func enforceMode() int { + // setEnforceMode sets the current SELinux mode Enforcing, Permissive. + // Disabled is not valid, since this needs to be set at boot time. + func setEnforceMode(mode int) error { +- //nolint:gosec // ignore G306: permissions to be 0600 or less. +- return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0o644) ++ return os.WriteFile(selinuxEnforcePath(), []byte(strconv.Itoa(mode)), 0) + } + + // defaultEnforceMode returns the systems default SELinux mode Enforcing, +@@ -1016,8 +1127,7 @@ func addMcs(processLabel, fileLabel string) (string, string) { + + // securityCheckContext validates that the SELinux label is understood by the kernel + func securityCheckContext(val string) error { +- //nolint:gosec // ignore G306: permissions to be 0600 or less. +- return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0o644) ++ return os.WriteFile(filepath.Join(getSelinuxMountPoint(), "context"), []byte(val), 0) + } + + // copyLevel returns a label with the MLS/MCS level from src label replaced on +@@ -1134,7 +1244,7 @@ func rchcon(fpath, label string) error { //revive:disable:cognitive-complexity + } + return pwalkdir.Walk(fpath, func(p string, _ fs.DirEntry, _ error) error { + if fastMode { +- if cLabel, err := lFileLabel(fpath); err == nil && cLabel == label { ++ if cLabel, err := lFileLabel(p); err == nil && cLabel == label { + return nil + } + } +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go +index bc3fd3b3..26792123 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux_stub.go +@@ -7,11 +7,11 @@ func attrPath(string) string { + return "" + } + +-func readCon(fpath string) (string, error) { ++func readConThreadSelf(string) (string, error) { + return "", nil + } + +-func writeCon(string, string) error { ++func writeConThreadSelf(string, string) error { + return nil + } + +@@ -21,27 +21,27 @@ func getEnabled() bool { + return false + } + +-func classIndex(class string) (int, error) { ++func classIndex(string) (int, error) { + return -1, nil + } + +-func setFileLabel(fpath string, label string) error { ++func setFileLabel(string, string) error { + return nil + } + +-func lSetFileLabel(fpath string, label string) error { ++func lSetFileLabel(string, string) error { + return nil + } + +-func fileLabel(fpath string) (string, error) { ++func fileLabel(string) (string, error) { + return "", nil + } + +-func lFileLabel(fpath string) (string, error) { ++func lFileLabel(string) (string, error) { + return "", nil + } + +-func setFSCreateLabel(label string) error { ++func setFSCreateLabel(string) error { + return nil + } + +@@ -53,7 +53,7 @@ func currentLabel() (string, error) { + return "", nil + } + +-func pidLabel(pid int) (string, error) { ++func pidLabel(int) (string, error) { + return "", nil + } + +@@ -61,38 +61,42 @@ func execLabel() (string, error) { + return "", nil + } + +-func canonicalizeContext(val string) (string, error) { ++func canonicalizeContext(string) (string, error) { + return "", nil + } + +-func computeCreateContext(source string, target string, class string) (string, error) { ++func computeCreateContext(string, string, string) (string, error) { + return "", nil + } + +-func calculateGlbLub(sourceRange, targetRange string) (string, error) { ++func calculateGlbLub(string, string) (string, error) { + return "", nil + } + +-func peerLabel(fd uintptr) (string, error) { ++func peerLabel(uintptr) (string, error) { + return "", nil + } + +-func setKeyLabel(label string) error { ++func setKeyLabel(string) error { + return nil + } + ++func keyLabel() (string, error) { ++ return "", nil ++} ++ + func (c Context) get() string { + return "" + } + +-func newContext(label string) (Context, error) { ++func newContext(string) (Context, error) { + return Context{}, nil + } + + func clearLabels() { + } + +-func reserveLabel(label string) { ++func reserveLabel(string) { + } + + func isMLSEnabled() bool { +@@ -103,7 +107,7 @@ func enforceMode() int { + return Disabled + } + +-func setEnforceMode(mode int) error { ++func setEnforceMode(int) error { + return nil + } + +@@ -111,7 +115,7 @@ func defaultEnforceMode() int { + return Disabled + } + +-func releaseLabel(label string) { ++func releaseLabel(string) { + } + + func roFileLabel() string { +@@ -126,27 +130,27 @@ func initContainerLabels() (string, string) { + return "", "" + } + +-func containerLabels() (processLabel string, fileLabel string) { ++func containerLabels() (string, string) { + return "", "" + } + +-func securityCheckContext(val string) error { ++func securityCheckContext(string) error { + return nil + } + +-func copyLevel(src, dest string) (string, error) { ++func copyLevel(string, string) (string, error) { + return "", nil + } + +-func chcon(fpath string, label string, recurse bool) error { ++func chcon(string, string, bool) error { + return nil + } + +-func dupSecOpt(src string) ([]string, error) { ++func dupSecOpt(string) ([]string, error) { + return nil, nil + } + +-func getDefaultContextWithLevel(user, level, scon string) (string, error) { ++func getDefaultContextWithLevel(string, string, string) (string, error) { + return "", nil + } + +diff --git a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go +index 9e473ca1..559c8510 100644 +--- a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go ++++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs_linux.go +@@ -31,7 +31,7 @@ func lgetxattr(path, attr string) ([]byte, error) { + func doLgetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Lgetxattr(path, attr, dest) +- if err != unix.EINTR { //nolint:errorlint // unix errors are bare ++ if err != unix.EINTR { + return sz, err + } + } +@@ -64,7 +64,7 @@ func getxattr(path, attr string) ([]byte, error) { + func dogetxattr(path, attr string, dest []byte) (int, error) { + for { + sz, err := unix.Getxattr(path, attr, dest) +- if err != unix.EINTR { //nolint:errorlint // unix errors are bare ++ if err != unix.EINTR { + return sz, err + } + } +diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md +index 068ac400..b827e7dd 100644 +--- a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md ++++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/README.md +@@ -28,7 +28,9 @@ Please note the following limitations of this code: + + * fs.SkipDir is not supported; + +- * no errors are ever passed to WalkDirFunc; ++ * ErrNotExist errors from filepath.WalkDir are silently ignored for any path ++ except the top directory (WalkDir argument); any other error is returned to ++ the caller of WalkDir; + + * once any error is returned from any walkDirFunc instance, no more calls + to WalkDirFunc are made, and the error is returned to the caller of WalkDir; +@@ -51,4 +53,4 @@ filepath.WalkDir. + Otherwise (if a WalkDirFunc is actually doing something) this is usually + faster, except when the WalkDirN(..., 1) is used. Run `go test -bench .` + to see how different operations can benefit from it, as well as how the +-level of paralellism affects the speed. ++level of parallelism affects the speed. +diff --git a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go +index 0f5d9f58..5d2d09a2 100644 +--- a/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go ++++ b/vendor/github.com/opencontainers/selinux/pkg/pwalkdir/pwalkdir.go +@@ -4,6 +4,7 @@ + package pwalkdir + + import ( ++ "errors" + "fmt" + "io/fs" + "path/filepath" +@@ -60,6 +61,12 @@ func WalkN(root string, walkFn fs.WalkDirFunc, num int) error { + go func() { + err = filepath.WalkDir(root, func(p string, entry fs.DirEntry, err error) error { + if err != nil { ++ // Walking a file tree can race with removal, ++ // so ignore ENOENT, except for root. ++ // https://github.com/opencontainers/selinux/issues/199. ++ if errors.Is(err, fs.ErrNotExist) && len(p) != rootLen { ++ return nil ++ } + close(files) + return err + } +diff --git a/vendor/modules.txt b/vendor/modules.txt +index d5aeb5f2..4e7e0ef8 100644 +--- a/vendor/modules.txt ++++ b/vendor/modules.txt +@@ -15,7 +15,7 @@ github.com/cilium/ebpf/internal/sysenc + github.com/cilium/ebpf/internal/tracefs + github.com/cilium/ebpf/internal/unix + github.com/cilium/ebpf/link +-# github.com/containerd/console v1.0.4 ++# github.com/containerd/console v1.0.5 + ## explicit; go 1.13 + github.com/containerd/console + # github.com/coreos/go-systemd/v22 v22.5.0 +@@ -25,9 +25,19 @@ github.com/coreos/go-systemd/v22/dbus + # github.com/cpuguy83/go-md2man/v2 v2.0.2 + ## explicit; go 1.11 + github.com/cpuguy83/go-md2man/v2/md2man +-# github.com/cyphar/filepath-securejoin v0.4.1 ++# github.com/cyphar/filepath-securejoin v0.5.0 + ## explicit; go 1.18 + github.com/cyphar/filepath-securejoin ++github.com/cyphar/filepath-securejoin/internal/consts ++github.com/cyphar/filepath-securejoin/pathrs-lite ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/assert ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/gocompat ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/kernelversion ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/linux ++github.com/cyphar/filepath-securejoin/pathrs-lite/internal/procfs ++github.com/cyphar/filepath-securejoin/pathrs-lite/procfs + # github.com/docker/go-units v0.5.0 + ## explicit + github.com/docker/go-units +@@ -50,7 +60,7 @@ github.com/mrunalp/fileutils + ## explicit + github.com/opencontainers/runtime-spec/specs-go + github.com/opencontainers/runtime-spec/specs-go/features +-# github.com/opencontainers/selinux v1.11.0 ++# github.com/opencontainers/selinux v1.12.0 => ./internal/third_party/selinux + ## explicit; go 1.19 + github.com/opencontainers/selinux/go-selinux + github.com/opencontainers/selinux/go-selinux/label +@@ -116,3 +126,4 @@ google.golang.org/protobuf/reflect/protoreflect + google.golang.org/protobuf/reflect/protoregistry + google.golang.org/protobuf/runtime/protoiface + google.golang.org/protobuf/runtime/protoimpl ++# github.com/opencontainers/selinux => ./internal/third_party/selinux +-- +2.51.1 + diff --git a/SOURCES/0001-Bump-runtime-spec-to-latest-git-HEAD.patch b/SOURCES/0001-Bump-runtime-spec-to-latest-git-HEAD.patch new file mode 100644 index 0000000..8268405 --- /dev/null +++ b/SOURCES/0001-Bump-runtime-spec-to-latest-git-HEAD.patch @@ -0,0 +1,103 @@ +From c6dad73d617864f3a281ac1fdaacd5ed971fa317 Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Thu, 27 Jun 2024 09:00:51 -0700 +Subject: [PATCH 1/2] Bump runtime-spec to latest git HEAD + +This is to include + - https://github.com/opencontainers/runtime-spec/pull/1261 + - https://github.com/opencontainers/runtime-spec/pull/1253 + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit 2cac22b1e29e6be4c004f35ce582aa2b7e1c2fda) +Signed-off-by: Kir Kolyshkin +--- + go.mod | 2 +- + go.sum | 4 ++-- + .../opencontainers/runtime-spec/specs-go/config.go | 8 ++++++++ + .../opencontainers/runtime-spec/specs-go/version.go | 2 +- + vendor/modules.txt | 2 +- + 5 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/go.mod b/go.mod +index 348bc9c6..db2d7ef1 100644 +--- a/go.mod ++++ b/go.mod +@@ -19,7 +19,7 @@ require ( + github.com/moby/sys/user v0.3.0 + github.com/moby/sys/userns v0.1.0 + github.com/mrunalp/fileutils v0.5.1 +- github.com/opencontainers/runtime-spec v1.2.0 ++ github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 + github.com/opencontainers/selinux v1.11.0 + github.com/seccomp/libseccomp-golang v0.10.0 + github.com/sirupsen/logrus v1.9.3 +diff --git a/go.sum b/go.sum +index 225d5860..4c863cc9 100644 +--- a/go.sum ++++ b/go.sum +@@ -46,8 +46,8 @@ github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g + github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= + github.com/mrunalp/fileutils v0.5.1 h1:F+S7ZlNKnrwHfSwdlgNSkKo67ReVf8o9fel6C3dkm/Q= + github.com/mrunalp/fileutils v0.5.1/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= +-github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= +-github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= ++github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 h1:Ghl8Z3l+yPQUDSxAp7Kg7fJLRNNXjOsR6ooDcca7PjU= ++github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= + github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= + github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= + github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +index d1236ba7..671f0d01 100644 +--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go ++++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/config.go +@@ -94,6 +94,8 @@ type Process struct { + SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` + // IOPriority contains the I/O priority settings for the cgroup. + IOPriority *LinuxIOPriority `json:"ioPriority,omitempty" platform:"linux"` ++ // ExecCPUAffinity specifies CPU affinity for exec processes. ++ ExecCPUAffinity *CPUAffinity `json:"execCPUAffinity,omitempty" platform:"linux"` + } + + // LinuxCapabilities specifies the list of allowed capabilities that are kept for a process. +@@ -127,6 +129,12 @@ const ( + IOPRIO_CLASS_IDLE IOPriorityClass = "IOPRIO_CLASS_IDLE" + ) + ++// CPUAffinity specifies process' CPU affinity. ++type CPUAffinity struct { ++ Initial string `json:"initial,omitempty"` ++ Final string `json:"final,omitempty"` ++} ++ + // Box specifies dimensions of a rectangle. Used for specifying the size of a console. + type Box struct { + // Height is the vertical dimension of a box. +diff --git a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go +index 503971e0..f6c15f6c 100644 +--- a/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go ++++ b/vendor/github.com/opencontainers/runtime-spec/specs-go/version.go +@@ -11,7 +11,7 @@ const ( + VersionPatch = 0 + + // VersionDev indicates development branch. Releases will be empty string. +- VersionDev = "" ++ VersionDev = "+dev" + ) + + // Version is the specification version that the package types support. +diff --git a/vendor/modules.txt b/vendor/modules.txt +index 3b245e0d..df520923 100644 +--- a/vendor/modules.txt ++++ b/vendor/modules.txt +@@ -46,7 +46,7 @@ github.com/moby/sys/userns + # github.com/mrunalp/fileutils v0.5.1 + ## explicit; go 1.13 + github.com/mrunalp/fileutils +-# github.com/opencontainers/runtime-spec v1.2.0 ++# github.com/opencontainers/runtime-spec v1.2.1-0.20240625190033-701738418b95 + ## explicit + github.com/opencontainers/runtime-spec/specs-go + github.com/opencontainers/runtime-spec/specs-go/features +-- +2.47.1 + diff --git a/SOURCES/0002-1.2-rootfs-re-allow-dangling-symlinks-in-mount-targe.patch b/SOURCES/0002-1.2-rootfs-re-allow-dangling-symlinks-in-mount-targe.patch new file mode 100644 index 0000000..20db404 --- /dev/null +++ b/SOURCES/0002-1.2-rootfs-re-allow-dangling-symlinks-in-mount-targe.patch @@ -0,0 +1,49 @@ +From e949092d469c3ee3ea9bf1002649b6a692895da9 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Wed, 5 Nov 2025 02:04:02 +1100 +Subject: [PATCH 2/2] [1.2] rootfs: re-allow dangling symlinks in mount targets + +It seems there are a fair few images where dangling symlinks are used as +path components for mount targets, which pathrs-lite does not support +(and it would be difficult to fully support this in a race-free way). + +This was actually meant to be blocked by commit 63c2908164f3 ("rootfs: +try to scope MkdirAll to stay inside the rootfs"), followed by commit +dd827f7b715a ("utils: switch to securejoin.MkdirAllHandle"). However, we +still used SecureJoin to construct mountpoint targets, which means that +dangling symlinks were "resolved" before reaching pathrs-lite. + +This patch basically re-adds this hack in order to reduce the breakages +we've seen so far. + +Signed-off-by: Aleksa Sarai +Signed-off-by: Kir Kolyshkin +--- + libcontainer/rootfs_linux.go | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index 377642c9..6ea7cd47 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -518,6 +518,17 @@ func (m *mountEntry) createOpenMountpoint(rootfs string) (Err error) { + dstIsFile = !fi.IsDir() + } + ++ // In previous runc versions, we would tolerate nonsense paths with ++ // dangling symlinks as path components. pathrs-lite does not support ++ // this, so instead we have to emulate this behaviour by doing ++ // SecureJoin *purely to get a semi-reasonable path to use* and then we ++ // use pathrs-lite to operate on the path safely. ++ newUnsafePath, err := securejoin.SecureJoin(rootfs, unsafePath) ++ if err != nil { ++ return err ++ } ++ unsafePath = utils.StripRoot(rootfs, newUnsafePath) ++ + if dstIsFile { + dstFile, err = pathrs.CreateInRoot(rootfs, unsafePath, unix.O_CREAT|unix.O_EXCL|unix.O_NOFOLLOW, 0o644) + } else { +-- +2.51.1 + diff --git a/SOURCES/0002-1.1-runc-exec-implement-CPU-affinity.patch b/SOURCES/0002-runc-exec-implement-CPU-affinity.patch similarity index 75% rename from SOURCES/0002-1.1-runc-exec-implement-CPU-affinity.patch rename to SOURCES/0002-runc-exec-implement-CPU-affinity.patch index d04d619..90de1a6 100644 --- a/SOURCES/0002-1.1-runc-exec-implement-CPU-affinity.patch +++ b/SOURCES/0002-runc-exec-implement-CPU-affinity.patch @@ -1,7 +1,7 @@ -From 1af672a2635628ca24ce3b5ed3344d316548f1ca Mon Sep 17 00:00:00 2001 +From 73786942b7176eae1e676cf2f78af548f090e418 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 21 Oct 2024 15:50:38 -0700 -Subject: [PATCH 2/2] [1.1] runc exec: implement CPU affinity +Subject: [PATCH 2/2] runc exec: implement CPU affinity As per - https://github.com/opencontainers/runtime-spec/pull/1253 @@ -27,25 +27,29 @@ Because of the above, - exec's final CPU affinity, if not specified, can be different depending on the kernel, therefore we don't test it. +Signed-off-by: Kir Kolyshkin +(cherry picked from commit 57237b31de367a722c5d49088912d57c28c6fb46) Signed-off-by: Kir Kolyshkin --- - libcontainer/configs/config.go | 73 ++++++++++++++++++++ + libcontainer/configs/config.go | 72 ++++++++++++++++++++ libcontainer/container_linux.go | 4 ++ - libcontainer/init_linux.go | 1 + - libcontainer/nsenter/nsexec.c | 36 +++++++++- + libcontainer/init_linux.go | 3 +- + libcontainer/nsenter/log.c | 9 ++- + libcontainer/nsenter/log.h | 3 + + libcontainer/nsenter/nsexec.c | 29 ++++++++ libcontainer/process.go | 2 + - libcontainer/process_linux.go | 51 +++++++++++++- + libcontainer/process_linux.go | 49 +++++++++++++- libcontainer/specconv/spec_linux.go | 5 ++ tests/integration/cpu_affinity.bats | 101 ++++++++++++++++++++++++++++ utils_linux.go | 6 ++ - 9 files changed, 275 insertions(+), 4 deletions(-) + 11 files changed, 277 insertions(+), 6 deletions(-) create mode 100644 tests/integration/cpu_affinity.bats diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go -index 6ebf5ec7..997f2724 100644 +index 22fe0f9b..daffd130 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go -@@ -3,11 +3,15 @@ package configs +@@ -3,8 +3,11 @@ package configs import ( "bytes" "encoding/json" @@ -57,19 +61,20 @@ index 6ebf5ec7..997f2724 100644 "time" "github.com/sirupsen/logrus" -+ "golang.org/x/sys/unix" +@@ -225,6 +228,9 @@ type Config struct { - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runtime-spec/specs-go" -@@ -211,6 +215,75 @@ type Config struct { - // RootlessCgroups is set when unlikely to have the full access to cgroups. - // When RootlessCgroups is set, cgroups errors are ignored. - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + // IOPriority is the container's I/O priority. + IOPriority *IOPriority `json:"io_priority,omitempty"` + + // ExecCPUAffinity is CPU affinity for a non-init process to be run in the container. + ExecCPUAffinity *CPUAffinity `json:"exec_cpu_affinity,omitempty"` -+} -+ + } + + // Scheduler is based on the Linux sched_setattr(2) syscall. +@@ -294,6 +300,72 @@ var IOPrioClassMapping = map[specs.IOPriorityClass]int{ + + type IOPriority = specs.LinuxIOPriority + +type CPUAffinity struct { + Initial, Final *unix.CPUSet +} @@ -134,14 +139,16 @@ index 6ebf5ec7..997f2724 100644 + Initial: initial, + Final: final, + }, nil - } - ++} ++ type ( + HookName string + HookList []Hook diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go -index 40b332f9..68b6a74f 100644 +index c0211617..1fc590a5 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go -@@ -692,6 +692,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { +@@ -692,6 +692,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, @@ -149,7 +156,7 @@ index 40b332f9..68b6a74f 100644 CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, -@@ -708,6 +709,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { +@@ -708,6 +709,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } @@ -160,43 +167,80 @@ index 40b332f9..68b6a74f 100644 cfg.Cgroup2Path = c.cgroupManager.Path("") } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go -index d9f18139..1f8562ec 100644 +index 1eb0279d..eddbfba6 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go -@@ -70,6 +70,7 @@ type initConfig struct { +@@ -72,6 +72,7 @@ type initConfig struct { RootlessCgroups bool `json:"rootless_cgroups,omitempty"` SpecState *specs.State `json:"spec_state,omitempty"` Cgroup2Path string `json:"cgroup2_path,omitempty"` + CPUAffinity *configs.CPUAffinity `json:"cpu_affinity,omitempty"` } - type initer interface { -diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c -index 2d224bab..6f70aa87 100644 ---- a/libcontainer/nsenter/nsexec.c -+++ b/libcontainer/nsenter/nsexec.c -@@ -149,13 +149,18 @@ int setns(int fd, int nstype) + // Init is part of "runc init" implementation. +@@ -151,7 +152,7 @@ func startInitialization() (retErr error) { + + logrus.SetOutput(logPipe) + logrus.SetFormatter(new(logrus.JSONFormatter)) +- logrus.Debug("child process in init()") ++ logrus.Debugf("child process in init()") + + // Only init processes have FIFOFD. + var fifoFile *os.File +diff --git a/libcontainer/nsenter/log.c b/libcontainer/nsenter/log.c +index 086b5398..72774cb0 100644 +--- a/libcontainer/nsenter/log.c ++++ b/libcontainer/nsenter/log.c +@@ -31,6 +31,11 @@ void setup_logpipe(void) + loglevel = i; } - #endif +bool log_enabled_for(int level) +{ + return (logfd >= 0 && level <= loglevel); +} + - static void write_log(int level, const char *format, ...) - { - char *message = NULL, *stage = NULL, *json = NULL; + /* Defined in nsexec.c */ + extern int current_stage; + +@@ -40,8 +45,8 @@ void write_log(int level, const char *format, ...) va_list args; int ret; - if (logfd < 0 || level > loglevel) +- goto out; + if (!log_enabled_for(level)) - goto out; ++ return; va_start(args, format); -@@ -851,6 +856,25 @@ void try_unshare(int flags, const char *msg) - bail("failed to unshare %s", msg); + ret = vasprintf(&message, format, args); +diff --git a/libcontainer/nsenter/log.h b/libcontainer/nsenter/log.h +index 1fe95a11..3e18de68 100644 +--- a/libcontainer/nsenter/log.h ++++ b/libcontainer/nsenter/log.h +@@ -1,6 +1,7 @@ + #ifndef NSENTER_LOG_H + #define NSENTER_LOG_H + ++#include + #include + + /* +@@ -20,6 +21,8 @@ + */ + void setup_logpipe(void); + ++bool log_enabled_for(int level); ++ + void write_log(int level, const char *format, ...) __attribute__((format(printf, 2, 3))); + + extern int logfd; +diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c +index 565b2ca2..aa4976d6 100644 +--- a/libcontainer/nsenter/nsexec.c ++++ b/libcontainer/nsenter/nsexec.c +@@ -558,6 +558,25 @@ static void update_timens_offsets(pid_t pid, char *map, size_t map_len) + bail("failed to update /proc/%d/timens_offsets", pid); } +void print_cpu_affinity() @@ -221,7 +265,7 @@ index 2d224bab..6f70aa87 100644 void nsexec(void) { int pipenum; -@@ -892,6 +916,16 @@ void nsexec(void) +@@ -584,6 +603,16 @@ void nsexec(void) write_log(DEBUG, "=> nsexec container setup"); @@ -239,31 +283,23 @@ index 2d224bab..6f70aa87 100644 nl_parse(pipenum, &config); diff --git a/libcontainer/process.go b/libcontainer/process.go -index 8a5d340d..99167274 100644 +index 114b3f2b..5339583f 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go -@@ -89,6 +89,8 @@ type Process struct { - // - // For cgroup v2, the only key allowed is "". - SubCgroupPaths map[string]string +@@ -102,6 +102,8 @@ type Process struct { + Scheduler *configs.Scheduler + + IOPriority *configs.IOPriority + + CPUAffinity *configs.CPUAffinity } // Wait waits for the process to exit. diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go -index 0d9ceb9c..3b48ae76 100644 +index fcbb54a3..477c8a77 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go -@@ -9,6 +9,7 @@ import ( - "os" - "os/exec" - "path/filepath" -+ "runtime" - "strconv" - "time" - -@@ -78,12 +79,52 @@ func (p *setnsProcess) signal(sig os.Signal) error { +@@ -122,6 +122,46 @@ func (p *setnsProcess) signal(sig os.Signal) error { return unix.Kill(p.pid(), s) } @@ -308,18 +344,20 @@ index 0d9ceb9c..3b48ae76 100644 +} + func (p *setnsProcess) start() (retErr error) { - defer p.messageSockPair.parent.Close() -- // get the "before" value of oom kill count -+ // Get the "before" value of oom kill count. + defer p.comm.closeParent() + +@@ -133,8 +173,8 @@ func (p *setnsProcess) start() (retErr error) { + + // get the "before" value of oom kill count oom, _ := p.manager.OOMKillCount() - err := p.cmd.Start() -- // close the write-side of the pipes (controlled by child) +- // close the child-side of the pipes (controlled by child) + err := p.startWithCPUAffinity() + // Close the child-side of the pipes (controlled by child). - p.messageSockPair.child.Close() - p.logFilePair.child.Close() + p.comm.closeChild() if err != nil { -@@ -143,6 +184,10 @@ func (p *setnsProcess) start() (retErr error) { + return fmt.Errorf("error starting setns process: %w", err) +@@ -184,6 +224,10 @@ func (p *setnsProcess) start() (retErr error) { } } } @@ -330,13 +368,21 @@ index 0d9ceb9c..3b48ae76 100644 if p.intelRdtPath != "" { // if Intel RDT "resource control" filesystem path exists _, err := os.Stat(p.intelRdtPath) +@@ -193,7 +237,6 @@ func (p *setnsProcess) start() (retErr error) { + } + } + } +- + if err := utils.WriteJSON(p.comm.initSockParent, p.config); err != nil { + return fmt.Errorf("error writing config to pipe: %w", err) + } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go -index 7dbfb869..b59e0d59 100644 +index 95ada499..2d0db342 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go -@@ -493,6 +493,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { - Ambient: spec.Process.Capabilities.Ambient, - } +@@ -556,6 +556,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { + ioPriority := *spec.Process.IOPriority + config.IOPriority = &ioPriority } + config.ExecCPUAffinity, err = configs.ConvertCPUAffinity(spec.Process.ExecCPUAffinity) + if err != nil { @@ -454,10 +500,10 @@ index 00000000..f6adfa2a + [[ "$output" == *"Cpus_allowed_list: $final"* ]] # Mind the literal tab. +} diff --git a/utils_linux.go b/utils_linux.go -index 60d534e8..30204133 100644 +index feb6ef80..013dbcf4 100644 --- a/utils_linux.go +++ b/utils_linux.go -@@ -109,6 +109,12 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { +@@ -90,6 +90,12 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) { } lp.Rlimits = append(lp.Rlimits, rl) } diff --git a/SPECS/runc.spec b/SPECS/runc.spec index e90fd43..0898523 100644 --- a/SPECS/runc.spec +++ b/SPECS/runc.spec @@ -1,8 +1,5 @@ %global with_check 0 -%global _find_debuginfo_dwz_opts %{nil} -%global _dwz_low_mem_die_limit 0 - %if 0%{?rhel} > 7 && ! 0%{?fedora} %define gobuild(o:) \ go build -buildmode pie -compiler gc -tags="rpm_crashtraceback libtrust_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -linkmode=external -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v %{?**}; @@ -20,10 +17,10 @@ go build -buildmode pie -compiler gc -tags="rpm_crashtraceback libtrust_openssl %global import_path %{provider}.%{provider_tld}/%{project}/%{repo} %global git0 https://%{import_path} -Epoch: 1 +Epoch: 4 Name: %{repo} -Version: 1.1.12 -Release: 6%{?dist} +Version: 1.2.5 +Release: 2%{?dist} Summary: CLI for running Open Containers # https://fedoraproject.org/wiki/PackagingDrafts/Go#Go_Language_Architectures #ExclusiveArch: %%{go_arches} @@ -33,15 +30,21 @@ ExcludeArch: %{ix86} License: ASL 2.0 URL: %{git0} Source0: %{git0}/archive/v%{version}.tar.gz -Patch0: 0001-1.1-Bump-runtime-spec-to-latest-git-HEAD.patch -Patch1: 0002-1.1-runc-exec-implement-CPU-affinity.patch +Patch0: 0001-Bump-runtime-spec-to-latest-git-HEAD.patch +Patch1: 0002-runc-exec-implement-CPU-affinity.patch +Patch2: 0001-1.2.5-1.el9-CVEs-mega-patch.patch +Patch3: 0001-1.2-openat2-improve-resilience-on-busy-systems.patch +Patch4: 0002-1.2-rootfs-re-allow-dangling-symlinks-in-mount-targe.patch +Patch5: 0001-1.2-rootfs-only-set-mode-for-tmpfs-mount-if-target-alrea.patch Provides: oci-runtime -BuildRequires: golang >= 1.21.4 +BuildRequires: golang >= 1.22.4 BuildRequires: git BuildRequires: /usr/bin/go-md2man BuildRequires: libseccomp-devel >= 2.5 +BuildRequires: container-selinux >= 2.224.0 Requires: libseccomp >= 2.5 -Requires: criu +Recommends: criu +Requires: container-selinux >= 2.224.0 %description The runc command can be used to start containers which are packaged @@ -63,7 +66,7 @@ pushd GOPATH/src/%{import_path} export GO111MODULE=off export GOPATH=%{gopath}:$(pwd)/GOPATH export CGO_CFLAGS="%{optflags} -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64" -export BUILDTAGS="selinux seccomp no_openssl" +export BUILDTAGS="selinux seccomp runc_dmz_selinux_nocompat no_openssl" export LDFLAGS="-X main.gitCommit= -X main.version=%{version}" %gobuild -o %{name} %{import_path} @@ -87,6 +90,14 @@ make install install-man install-bash DESTDIR=$RPM_BUILD_ROOT PREFIX=%{_prefix} %{_datadir}/bash-completion/completions/%{name} %changelog +* Wed Nov 12 2025 Jindrich Novy - 4:1.2.5-2 +- fix permission regression +- Related: RHEL-122384 + +* Fri Nov 07 2025 Jindrich Novy - 4:1.2.5-1 +- fix CVE-2025-31133 CVE-2025-52565 CVE-2025-52881 +- Resolves: RHEL-122384 + * Mon Jan 20 2025 Jindrich Novy - 1:1.1.12-6 - Add CPU affinity feature from Kir Kolishkin - Resolves: RHEL-74865