From 2df42d4db6bc57ee914fa9cc4455ad3b8daff1d9 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 1 Nov 2025 17:21:36 +1100 Subject: [PATCH 1/2] [1.3] openat2: improve resilience on busy systems Previously, we would see a ~3% failure rate when starting containers with mounts that contain ".." (which can trigger -EAGAIN). To counteract this, filepath-securejoin v0.5.1 includes a bump of the internal retry limit from 32 to 128, which lowers the failure rate to 0.12%. However, there is still a risk of spurious failure on regular systems. In order to try to provide more resilience (while avoiding DoS attacks), this patch also includes an additional retry loop that terminates based on a deadline rather than retry count. The deadline is 2ms, as my testing found that ~800us for a single pathrs operation was the longest latency due to -EAGAIN retries, and that was an outlier compared to the more common ~400us latencies -- so 2ms should be more than enough for any real system. The failure rates above were based on more 50k runs of runc with an attack script (from libpathrs) running a rename attack on all cores of a 16-core system, which is arguably a worst-case but heavily utilised servers could likely approach similar results. Signed-off-by: Aleksa Sarai Signed-off-by: Kir Kolyshkin --- go.mod | 2 +- go.sum | 4 +- internal/pathrs/mkdirall_pathrslite.go | 4 +- internal/pathrs/procfs_pathrslite.go | 22 ++++--- internal/pathrs/retry.go | 66 +++++++++++++++++++ internal/pathrs/root_pathrslite.go | 7 +- .../cyphar/filepath-securejoin/CHANGELOG.md | 34 +++++++++- .../cyphar/filepath-securejoin/VERSION | 2 +- .../internal/{errors.go => errors_linux.go} | 15 ++++- .../pathrs-lite/internal/fd/openat2_linux.go | 12 ++-- vendor/modules.txt | 2 +- 11 files changed, 144 insertions(+), 26 deletions(-) create mode 100644 internal/pathrs/retry.go rename vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/{errors.go => errors_linux.go} (70%) diff --git a/go.mod b/go.mod index f2deafc3..a551a4ec 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/checkpoint-restore/go-criu/v6 v6.3.0 github.com/containerd/console v1.0.5 github.com/coreos/go-systemd/v22 v22.5.0 - github.com/cyphar/filepath-securejoin v0.5.0 + github.com/cyphar/filepath-securejoin v0.5.1 github.com/docker/go-units v0.5.0 github.com/godbus/dbus/v5 v5.1.0 github.com/moby/sys/capability v0.4.0 diff --git a/go.sum b/go.sum index ba395bf0..fb357b43 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cyphar/filepath-securejoin v0.5.0 h1:hIAhkRBMQ8nIeuVwcAoymp7MY4oherZdAxD+m0u9zaw= -github.com/cyphar/filepath-securejoin v0.5.0/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= +github.com/cyphar/filepath-securejoin v0.5.1 h1:eYgfMq5yryL4fbWfkLpFFy2ukSELzaJOTaUTuh+oF48= +github.com/cyphar/filepath-securejoin v0.5.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/internal/pathrs/mkdirall_pathrslite.go b/internal/pathrs/mkdirall_pathrslite.go index fb4f7842..a9a0157c 100644 --- a/internal/pathrs/mkdirall_pathrslite.go +++ b/internal/pathrs/mkdirall_pathrslite.go @@ -83,7 +83,9 @@ func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (*os.File, er } defer rootDir.Close() - return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) + return retryEAGAIN(func() (*os.File, error) { + return pathrs.MkdirAllHandle(rootDir, unsafePath, mode) + }) } // MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the diff --git a/internal/pathrs/procfs_pathrslite.go b/internal/pathrs/procfs_pathrslite.go index a02b0d39..37450a0e 100644 --- a/internal/pathrs/procfs_pathrslite.go +++ b/internal/pathrs/procfs_pathrslite.go @@ -27,13 +27,15 @@ import ( ) func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath string, flags int) (*os.File, error) { - handle, err := openFn(subpath) + handle, err := retryEAGAIN(func() (*os.File, error) { + return openFn(subpath) + }) if err != nil { return nil, err } defer handle.Close() - f, err := pathrs.Reopen(handle, flags) + f, err := Reopen(handle, flags) if err != nil { return nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) } @@ -44,7 +46,7 @@ func procOpenReopen(openFn func(subpath string) (*os.File, error), subpath strin // [pathrs.Reopen], to let you one-shot open a procfs file with the given // flags. func ProcSelfOpen(subpath string, flags int) (*os.File, error) { - proc, err := procfs.OpenProcRoot() + proc, err := retryEAGAIN(procfs.OpenProcRoot) if err != nil { return nil, err } @@ -55,7 +57,7 @@ func ProcSelfOpen(subpath string, flags int) (*os.File, error) { // ProcPidOpen is a wrapper around [procfs.Handle.OpenPid] and [pathrs.Reopen], // to let you one-shot open a procfs file with the given flags. func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { - proc, err := procfs.OpenProcRoot() + proc, err := retryEAGAIN(procfs.OpenProcRoot) if err != nil { return nil, err } @@ -70,13 +72,15 @@ func ProcPidOpen(pid int, subpath string, flags int) (*os.File, error) { // flags. The returned [procfs.ProcThreadSelfCloser] needs the same handling as // when using pathrs-lite. func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThreadSelfCloser, Err error) { - proc, err := procfs.OpenProcRoot() + proc, err := retryEAGAIN(procfs.OpenProcRoot) if err != nil { return nil, nil, err } defer proc.Close() - handle, closer, err := proc.OpenThreadSelf(subpath) + handle, closer, err := retryEAGAIN2(func() (*os.File, procfs.ProcThreadSelfCloser, error) { + return proc.OpenThreadSelf(subpath) + }) if err != nil { return nil, nil, err } @@ -89,7 +93,7 @@ func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThr } defer handle.Close() - f, err := pathrs.Reopen(handle, flags) + f, err := Reopen(handle, flags) if err != nil { return nil, nil, fmt.Errorf("reopen %s: %w", handle.Name(), err) } @@ -98,5 +102,7 @@ func ProcThreadSelfOpen(subpath string, flags int) (_ *os.File, _ procfs.ProcThr // Reopen is a wrapper around pathrs.Reopen. func Reopen(file *os.File, flags int) (*os.File, error) { - return pathrs.Reopen(file, flags) + return retryEAGAIN(func() (*os.File, error) { + return pathrs.Reopen(file, flags) + }) } diff --git a/internal/pathrs/retry.go b/internal/pathrs/retry.go new file mode 100644 index 00000000..a51d335c --- /dev/null +++ b/internal/pathrs/retry.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright (C) 2024-2025 Aleksa Sarai + * Copyright (C) 2024-2025 SUSE LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pathrs + +import ( + "errors" + "fmt" + "time" + + "golang.org/x/sys/unix" +) + +// Based on >50k tests running "runc run" on a 16-core system with very heavy +// rename(2) load, the single longest latency caused by -EAGAIN retries was +// ~800us (with the vast majority being closer to 400us). So, a 2ms limit +// should give more than enough headroom for any real system in practice. +const retryDeadline = 2 * time.Millisecond + +// retryEAGAIN is a top-level retry loop for pathrs to try to returning +// spurious errors in most normal user cases when using openat2 (libpathrs +// itself does up to 128 retries already, but this method takes a +// wallclock-deadline approach to simply retry until a timer elapses). +func retryEAGAIN[T any](fn func() (T, error)) (T, error) { + deadline := time.After(retryDeadline) + for { + v, err := fn() + if !errors.Is(err, unix.EAGAIN) { + return v, err + } + select { + case <-deadline: + return *new(T), fmt.Errorf("%v retry deadline exceeded: %w", retryDeadline, err) + default: + // retry + } + } +} + +// retryEAGAIN2 is like retryEAGAIN except it returns two values. +func retryEAGAIN2[T1, T2 any](fn func() (T1, T2, error)) (T1, T2, error) { + type ret struct { + v1 T1 + v2 T2 + } + v, err := retryEAGAIN(func() (ret, error) { + v1, v2, err := fn() + return ret{v1: v1, v2: v2}, err + }) + return v.v1, v.v2, err +} diff --git a/internal/pathrs/root_pathrslite.go b/internal/pathrs/root_pathrslite.go index 0ef81fae..899af270 100644 --- a/internal/pathrs/root_pathrslite.go +++ b/internal/pathrs/root_pathrslite.go @@ -31,12 +31,15 @@ import ( // is effectively shorthand for [securejoin.OpenInRoot] followed by // [securejoin.Reopen]. func OpenInRoot(root, subpath string, flags int) (*os.File, error) { - handle, err := pathrs.OpenInRoot(root, subpath) + handle, err := retryEAGAIN(func() (*os.File, error) { + return pathrs.OpenInRoot(root, subpath) + }) if err != nil { return nil, err } defer handle.Close() - return pathrs.Reopen(handle, flags) + + return Reopen(handle, flags) } // CreateInRoot creates a new file inside a root (as well as any missing parent diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md index 6862467c..3faee0bc 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md +++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md @@ -4,7 +4,36 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## [Unreleased] ## +## [Unreleased 0.5.z] ## + +## [0.5.1] - 2025-10-31 ## + +> Spooky scary skeletons send shivers down your spine! + +### Changed ### +- `openat2` can return `-EAGAIN` if it detects a possible attack in certain + scenarios (namely if there was a rename or mount while walking a path with a + `..` component). While this is necessary to avoid a denial-of-service in the + kernel, it does require retry loops in userspace. + + In previous versions, `pathrs-lite` would retry `openat2` 32 times before + returning an error, but we've received user reports that this limit can be + hit on systems with very heavy load. In some synthetic benchmarks (testing + the worst-case of an attacker doing renames in a tight loop on every core of + a 16-core machine) we managed to get a ~3% failure rate in runc. We have + improved this situation in two ways: + + * We have now increased this limit to 128, which should be good enough for + most use-cases without becoming a denial-of-service vector (the number of + syscalls called by the `O_PATH` resolver in a typical case is within the + same ballpark). The same benchmarks show a failure rate of ~0.12% which + (while not zero) is probably sufficient for most users. + + * In addition, we now return a `unix.EAGAIN` error that is bubbled up and can + be detected by callers. This means that callers with stricter requirements + to avoid spurious errors can choose to do their own infinite `EAGAIN` retry + loop (though we would strongly recommend users use time-based deadlines in + such retry loops to avoid potentially unbounded denials-of-service). ## [0.5.0] - 2025-09-26 ## @@ -354,7 +383,8 @@ This is our first release of `github.com/cyphar/filepath-securejoin`, containing a full implementation with a coverage of 93.5% (the only missing cases are the error cases, which are hard to mocktest at the moment). -[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...HEAD +[Unreleased 0.5.z]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.1...release-0.5 +[0.5.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.5.0...v0.5.1 [0.5.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.1...v0.5.0 [0.4.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.4.0...v0.4.1 [0.4.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.6...v0.4.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION index 8f0916f7..4b9fcbec 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/VERSION +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -1 +1 @@ -0.5.0 +0.5.1 diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go similarity index 70% rename from vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go rename to vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go index c26e440e..d0b200f4 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors.go +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/errors_linux.go @@ -1,5 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 +//go:build linux + // Copyright (C) 2024-2025 Aleksa Sarai // Copyright (C) 2024-2025 SUSE LLC // @@ -12,15 +14,24 @@ package internal import ( "errors" + + "golang.org/x/sys/unix" ) +type xdevErrorish struct { + description string +} + +func (err xdevErrorish) Error() string { return err.description } +func (err xdevErrorish) Is(target error) bool { return target == unix.EXDEV } + var ( // ErrPossibleAttack indicates that some attack was detected. - ErrPossibleAttack = errors.New("possible attack detected") + ErrPossibleAttack error = xdevErrorish{"possible attack detected"} // ErrPossibleBreakout indicates that during an operation we ended up in a // state that could be a breakout but we detected it. - ErrPossibleBreakout = errors.New("possible breakout detected") + ErrPossibleBreakout error = xdevErrorish{"possible breakout detected"} // ErrInvalidDirectory indicates an unlinked directory. ErrInvalidDirectory = errors.New("wandered into deleted directory") diff --git a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go index 23053083..3e937fe3 100644 --- a/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go +++ b/vendor/github.com/cyphar/filepath-securejoin/pathrs-lite/internal/fd/openat2_linux.go @@ -17,8 +17,6 @@ import ( "runtime" "golang.org/x/sys/unix" - - "github.com/cyphar/filepath-securejoin/pathrs-lite/internal" ) func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { @@ -34,7 +32,10 @@ func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool { (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV)) } -const scopedLookupMaxRetries = 32 +// This is a fairly arbitrary limit we have just to avoid an attacker being +// able to make us spin in an infinite retry loop -- callers can choose to +// retry on EAGAIN if they prefer. +const scopedLookupMaxRetries = 128 // Openat2 is an [Fd]-based wrapper around unix.Openat2, but with some retry // logic in case of EAGAIN errors. @@ -43,10 +44,10 @@ func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { // Make sure we always set O_CLOEXEC. how.Flags |= unix.O_CLOEXEC var tries int - for tries < scopedLookupMaxRetries { + for { fd, err := unix.Openat2(dirFd, path, how) if err != nil { - if scopedLookupShouldRetry(how, err) { + if scopedLookupShouldRetry(how, err) && tries < scopedLookupMaxRetries { // We retry a couple of times to avoid the spurious errors, and // if we are being attacked then returning -EAGAIN is the best // we can do. @@ -58,5 +59,4 @@ func Openat2(dir Fd, path string, how *unix.OpenHow) (*os.File, error) { runtime.KeepAlive(dir) return os.NewFile(uintptr(fd), fullPath), nil } - return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: internal.ErrPossibleAttack} } diff --git a/vendor/modules.txt b/vendor/modules.txt index f22001c8..18276b61 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -27,7 +27,7 @@ github.com/coreos/go-systemd/v22/dbus # github.com/cpuguy83/go-md2man/v2 v2.0.5 ## explicit; go 1.11 github.com/cpuguy83/go-md2man/v2/md2man -# github.com/cyphar/filepath-securejoin v0.5.0 +# github.com/cyphar/filepath-securejoin v0.5.1 ## explicit; go 1.18 github.com/cyphar/filepath-securejoin github.com/cyphar/filepath-securejoin/internal/consts -- 2.51.1