From ae5ca948ccd2763e716c8b6dde572890429d70da Mon Sep 17 00:00:00 2001 From: Andrew Lukoshko Date: Fri, 10 Apr 2026 00:10:54 +0200 Subject: [PATCH] proc: fix a dentry lock race between release_task and lookup --- config.yaml | 4 + ...race-between-release_task-and-lookup.patch | 172 ++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 files/0001-proc-fix-a-dentry-lock-race-between-release_task-and-lookup.patch diff --git a/config.yaml b/config.yaml index 350f2c6..e173998 100644 --- a/config.yaml +++ b/config.yaml @@ -24,6 +24,9 @@ actions: - type: "patch" name: "0001-Make-KVM-PMU-symbols-global-for-ppc64le-module-build.patch" number: 2008 + - type: "patch" + name: "0001-proc-fix-a-dentry-lock-race-between-release_task-and-lookup.patch" + number: 2009 - type: "source" name: "almalinuxdup1.x509" number: 100 @@ -459,6 +462,7 @@ actions: - name: "Andrew Lukoshko" email: "alukoshko@almalinux.org" line: + - "proc: fix a dentry lock race between release_task and lookup" - "hpsa: bring back deprecated PCI ids #CFHack #CFHack2024" - "mptsas: bring back deprecated PCI ids #CFHack #CFHack2024" - "megaraid_sas: bring back deprecated PCI ids #CFHack #CFHack2024" diff --git a/files/0001-proc-fix-a-dentry-lock-race-between-release_task-and-lookup.patch b/files/0001-proc-fix-a-dentry-lock-race-between-release_task-and-lookup.patch new file mode 100644 index 0000000..231334b --- /dev/null +++ b/files/0001-proc-fix-a-dentry-lock-race-between-release_task-and-lookup.patch @@ -0,0 +1,172 @@ +From 9f855caf390cf2b760c65a60649c88a7d7d60b01 Mon Sep 17 00:00:00 2001 +From: Andrew Lukoshko +Date: Fri, 9 Jan 2026 10:09:48 +0000 +Subject: [PATCH] proc: fix a dentry lock race between release_task and lookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +JIRA: https://issues.redhat.com/browse/RHEL-124568 + +commit d919a1e79bac890421537cf02ae773007bf55e6b +Author: Zhihao Cheng +Date: Wed Jul 13 21:00:29 2022 +0800 + + proc: fix a dentry lock race between release_task and lookup + + Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc") + moved proc_flush_task() behind __exit_signal(). Then, process systemd can + take long period high cpu usage during releasing task in following + concurrent processes: + + systemd ps + kernel_waitid stat(/proc/tgid) + do_wait filename_lookup + wait_consider_task lookup_fast + release_task + __exit_signal + __unhash_process + detach_pid + __change_pid // remove task->pid_links + d_revalidate -> pid_revalidate // 0 + d_invalidate(/proc/tgid) + shrink_dcache_parent(/proc/tgid) + d_walk(/proc/tgid) + spin_lock_nested(/proc/tgid/fd) + // iterating opened fd + proc_flush_pid | + d_invalidate (/proc/tgid/fd) | + shrink_dcache_parent(/proc/tgid/fd) | + shrink_dentry_list(subdirs) ↓ + shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock + + Function d_invalidate() will remove dentry from hash firstly, but why does + proc_flush_pid() process dentry '/proc/tgid/fd' before dentry + '/proc/tgid'? That's because proc_pid_make_inode() adds proc inode in + reverse order by invoking hlist_add_head_rcu(). But proc should not add + any inodes under '/proc/tgid' except '/proc/tgid/task/pid', fix it by + adding inode into 'pid->inodes' only if the inode is /proc/tgid or + /proc/tgid/task/pid. + + Performance regression: + Create 200 tasks, each task open one file for 50,000 times. Kill all + tasks when opened files exceed 10,000,000 (cat /proc/sys/fs/file-nr). + + Before fix: + $ time killall -wq aa + real 4m40.946s # During this period, we can see 'ps' and 'systemd' + taking high cpu usage. + + After fix: + $ time killall -wq aa + real 1m20.732s # During this period, we can see 'systemd' taking + high cpu usage. + + Link: https://lkml.kernel.org/r/20220713130029.4133533-1-chengzhihao1@huawei.com + Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc") + Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054 + Signed-off-by: Zhihao Cheng + Signed-off-by: Zhang Yi + Suggested-by: Brian Foster + Reviewed-by: Brian Foster + Cc: Al Viro + Cc: Alexey Dobriyan + Cc: Eric Biederman + Cc: Matthew Wilcox + Cc: Baoquan He + Cc: Kalesh Singh + Cc: Yu Kuai + Signed-off-by: Andrew Morton + +Signed-off-by: Andrew Lukoshko +--- + fs/proc/base.c | 46 ++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 38 insertions(+), 8 deletions(-) + +diff --git a/fs/proc/base.c b/fs/proc/base.c +index dbb251465954..67d1afedaa47 100644 +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -1887,7 +1887,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) + put_pid(pid); + } + +-struct inode *proc_pid_make_inode(struct super_block * sb, ++struct inode *proc_pid_make_inode(struct super_block *sb, + struct task_struct *task, umode_t mode) + { + struct inode * inode; +@@ -1916,11 +1916,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, + + /* Let the pid remember us for quick removal */ + ei->pid = pid; +- if (S_ISDIR(mode)) { +- spin_lock(&pid->lock); +- hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); +- spin_unlock(&pid->lock); +- } + + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + security_task_to_inode(task, inode); +@@ -1933,6 +1928,39 @@ struct inode *proc_pid_make_inode(struct super_block * sb, + return NULL; + } + ++/* ++ * Generating an inode and adding it into @pid->inodes, so that task will ++ * invalidate inode's dentry before being released. ++ * ++ * This helper is used for creating dir-type entries under '/proc' and ++ * '/proc//task'. Other entries(eg. fd, stat) under '/proc/' ++ * can be released by invalidating '/proc/' dentry. ++ * In theory, dentries under '/proc//task' can also be released by ++ * invalidating '/proc/' dentry, we reserve it to handle single ++ * thread exiting situation: Any one of threads should invalidate its ++ * '/proc//task/' dentry before released. ++ */ ++static struct inode *proc_pid_make_base_inode(struct super_block *sb, ++ struct task_struct *task, umode_t mode) ++{ ++ struct inode *inode; ++ struct proc_inode *ei; ++ struct pid *pid; ++ ++ inode = proc_pid_make_inode(sb, task, mode); ++ if (!inode) ++ return NULL; ++ ++ /* Let proc_flush_pid find this directory inode */ ++ ei = PROC_I(inode); ++ pid = ei->pid; ++ spin_lock(&pid->lock); ++ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); ++ spin_unlock(&pid->lock); ++ ++ return inode; ++} ++ + int pid_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) + { +@@ -3401,7 +3429,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, + { + struct inode *inode; + +- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); ++ inode = proc_pid_make_base_inode(dentry->d_sb, task, ++ S_IFDIR | S_IRUGO | S_IXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + +@@ -3705,7 +3734,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) + { + struct inode *inode; +- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); ++ inode = proc_pid_make_base_inode(dentry->d_sb, task, ++ S_IFDIR | S_IRUGO | S_IXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + +-- +2.43.5