New upstream release 5.19.0

Upstream now has these patches:
	c1c71781 mkfs: update manpage of bigtime and inobtcount
	1c08f0ae mkfs: enable inobtcount and bigtime by default

* So do not amend the man page and do not change the default option
	values for inobtcount and bigtime.
* But continue turning off inobtcount and bigtime for kernels older
	than 5.10

Unlike upstream RHEL-9 will continue to support tiny filesystems,
but add warning about deprecation.

Backport all "Fixing" patches relevant to 5.19

Resolves: rhbz#2142910
Signed-off-by: Pavel Reichl <preichl@redhat.com>
This commit is contained in:
Pavel Reichl 2023-01-10 15:43:10 +01:00
parent c312099f9d
commit 17fdafd9b5
15 changed files with 1118 additions and 91 deletions

2
.gitignore vendored
View File

@ -57,3 +57,5 @@ xfsprogs-3.1.2.tar.gz
/xfsprogs-5.14.0.tar.xz
/xfsprogs-5.14.2.tar.sign
/xfsprogs-5.14.2.tar.xz
/xfsprogs-5.19.0.tar.xz
/xfsprogs-5.19.0.tar.sign

View File

@ -1,2 +1,2 @@
SHA512 (xfsprogs-5.14.2.tar.sign) = 4f61784b4c95b596f8923a54a9a2bcce6381d19c0a8b3728de274797b0bb8a8b73f426f68efde1125416088b4012a2f13c7196b250d914ac3bb576df8c9705d5
SHA512 (xfsprogs-5.14.2.tar.xz) = cb9794247a2eed8bd9f10ff811d38f97e4adbed97bad334b0fa8a26503ec224237eddd92fd78892584a153967417ab2009fb2fc8bab6ef12cd53fd20bdba1b4e
SHA512 (xfsprogs-5.19.0.tar.xz) = 0b069cd44eb87052ee55ebffcce736ef103adeda539e854bd86d53942f541c07d74cc174f06a2221230eaa82b21d43327833492cd0ded577d442e1935a0abe7d
SHA512 (xfsprogs-5.19.0.tar.sign) = a87766fa2c40a3e6990cb84f85bd687469b59baf075561ca1867e53490ce120bd87a4b11d9843d4820b2d8b6bdc7d56d6e090eed223bc3b663cf999a5a443597

View File

@ -1,85 +0,0 @@
--- xfsprogs-5.12.0.orig/man/man8/mkfs.xfs.8
+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8
@@ -203,7 +203,7 @@ December 1901 to January 2038, and quota
.IP
By default,
.B mkfs.xfs
-will not enable this feature.
+in RHEL9 will enable this feature.
If the option
.B \-m crc=0
is used, the large timestamp feature is not supported and is disabled.
@@ -256,7 +256,7 @@ This can be used to reduce mount times w
.IP
By default,
.B mkfs.xfs
-will not enable this option.
+in RHEL9 will enable this option.
This feature is only available for filesystems created with the (default)
.B \-m finobt=1
option set.
--- xfsprogs-5.12.0.orig/mkfs/xfs_mkfs.c
+++ xfsprogs-5.12.0/mkfs/xfs_mkfs.c
@@ -3795,6 +3797,23 @@ cfgfile_parse(
cli->cfgfile);
}
+static unsigned int get_system_kver(void)
+{
+ const char *kver = getenv("KVER");
+ struct utsname utsname;
+ int a, b, c;
+
+ if (!kver) {
+ uname(&utsname);
+ kver = utsname.release;
+ }
+
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
+ return LINUX_VERSION_CODE;
+
+ return KERNEL_VERSION(a,b,c);
+}
+
int
main(
int argc,
@@ -3848,17 +3867,25 @@ main(
.spinodes = true,
.rmapbt = false,
.reflink = true,
- .inobtcnt = false,
+ .inobtcnt = true,
.parent_pointers = false,
.nodalign = false,
.nortalign = false,
- .bigtime = false,
+ .bigtime = true,
},
};
struct list_head buffer_list;
+ unsigned int kver;
int error;
+ /* turn bigtime & inobtcnt back off if running under older kernels */
+ kver = get_system_kver();
+ if (kver < KERNEL_VERSION(5,10,0)) {
+ dft.sb_feat.inobtcnt = false;
+ dft.sb_feat.bigtime = false;
+ }
+
platform_uuid_generate(&cli.uuid);
progname = basename(argv[0]);
setlocale(LC_ALL, "");
--- xfsprogs-5.14.0/mkfs/xfs_mkfs.c.orig
+++ xfsprogs-5.14.0/mkfs/xfs_mkfs.c
@@ -12,6 +12,8 @@
#include "libfrog/convert.h"
#include "proto.h"
#include <ini.h>
+#include <linux/version.h>
+#include <sys/utsname.h>
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))

View File

@ -1,5 +1,5 @@
--- xfsprogs-5.12.0/man/man8/mkfs.xfs.8.backup 2021-12-01 20:48:09.241170607 +0100
+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8 2021-12-03 15:34:32.382616819 +0100
--- a/man/man8/mkfs.xfs.8.in 2021-12-01 20:48:09.241170607 +0100
+++ b/man/man8/mkfs.xfs.8.in 2021-12-03 15:34:32.382616819 +0100
@@ -1091,6 +1091,12 @@
.HP
.PD

View File

@ -0,0 +1,52 @@
--- a/mkfs/xfs_mkfs.c.orig 2022-08-12 20:38:21.000000000 +0200
+++ b/mkfs/xfs_mkfs.c 2023-01-25 11:06:01.863076713 +0100
@@ -13,6 +13,8 @@
#include "libfrog/crc32cselftest.h"
#include "proto.h"
#include <ini.h>
+#include <linux/version.h>
+#include <sys/utsname.h>
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))
@@ -3998,6 +4000,23 @@
cli->cfgfile);
}
+static unsigned int get_system_kver(void)
+{
+ const char *kver = getenv("KVER");
+ struct utsname utsname;
+ int a, b, c;
+
+ if (!kver) {
+ uname(&utsname);
+ kver = utsname.release;
+ }
+
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
+ return LINUX_VERSION_CODE;
+
+ return KERNEL_VERSION(a,b,c);
+}
+
int
main(
int argc,
@@ -4077,8 +4096,16 @@
};
struct list_head buffer_list;
+ unsigned int kver;
int error;
+ /* turn bigtime & inobtcnt back off if running under older kernels */
+ kver = get_system_kver();
+ if (kver < KERNEL_VERSION(5,10,0)) {
+ dft.sb_feat.inobtcnt = false;
+ dft.sb_feat.bigtime = false;
+ }
+
platform_uuid_generate(&cli.uuid);
progname = basename(argv[0]);
setlocale(LC_ALL, "");

View File

@ -0,0 +1,91 @@
From 17b691400e8ce0755bb1d7a33490fbc014067e5e Mon Sep 17 00:00:00 2001
From: Pavel Reichl <preichl@redhat.com>
Date: Fri, 27 Jan 2023 06:30:20 +0100
Subject: [PATCH] mkfs: tolerate tiny filesystems
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
man/man8/mkfs.xfs.8.in | 4 ++--
mkfs/xfs_mkfs.c | 23 ++++++++++++++---------
2 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
index 211e7b0c..03f0fda8 100644
--- a/man/man8/mkfs.xfs.8.in
+++ b/man/man8/mkfs.xfs.8.in
@@ -405,7 +405,7 @@ is required if
is given. Otherwise, it is only needed if the filesystem should occupy
less space than the size of the special file.
-The data section must be at least 300MB in size.
+The data section should be at least 300MB in size.
.TP
.BI sunit= value
This is used to specify the stripe unit for a RAID device or a
@@ -705,7 +705,7 @@ described above. The overriding minimum value for size is 512 blocks.
With some combinations of filesystem block size, inode size,
and directory block size, the minimum log size is larger than 512 blocks.
-The log must be at least 64MB in size.
+The log should be at least 64MB in size.
The log cannot be more than 2GB in size.
.TP
.BI version= value
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 9dd0e79c..72c906d6 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2503,6 +2503,8 @@ validate_supported(
struct xfs_mount *mp,
struct cli_params *cli)
{
+ bool deprecated = false;
+
/* Undocumented option to enable unsupported tiny filesystems. */
if (!cli->is_supported) {
printf(
@@ -2532,9 +2534,8 @@ validate_supported(
* 64MB * (8 / 7) * 4 = 293MB
*/
if (mp->m_sb.sb_dblocks < MEGABYTES(300, mp->m_sb.sb_blocklog)) {
- fprintf(stderr,
- _("Filesystem must be larger than 300MB.\n"));
- usage();
+ printf(_("Filesystem should be larger than 300MB.\n"));
+ deprecated = true;
}
/*
@@ -2543,9 +2544,8 @@ validate_supported(
*/
if (mp->m_sb.sb_logblocks <
XFS_MIN_REALISTIC_LOG_BLOCKS(mp->m_sb.sb_blocklog)) {
- fprintf(stderr,
- _("Log size must be at least 64MB.\n"));
- usage();
+ printf( _("Log size should be at least 64MB.\n"));
+ deprecated = true;
}
/*
@@ -2553,9 +2553,14 @@ validate_supported(
* have redundant superblocks.
*/
if (mp->m_sb.sb_agcount < 2) {
- fprintf(stderr,
- _("Filesystem must have at least 2 superblocks for redundancy!\n"));
- usage();
+ printf(
+ _("Filesystem should have at least 2 superblocks for redundancy!\n"));
+ deprecated = true;
+ }
+
+ if (deprecated) {
+ printf(
+_("Support for filesystems like this one is deprecated and they will not be supported in future releases.\n"));
}
}
--
2.39.1

View File

@ -0,0 +1,113 @@
From b445624f0882badf00da739c52e58a85c18ae002 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:56:35 +0100
Subject: [PATCH] xfs: estimate post-merge refcounts correctly
Source kernel commit: b25d1984aa884fc91a73a5a407b9ac976d441e9b
Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount
metadata corruptions after being run. Specifically, xfs_repair noticed
single-block refcount records that could be combined but had not been.
The root cause of this is improper MAXREFCOUNT edge case handling in
xfs_refcount_merge_extents. When we're trying to find candidates for a
refcount btree record merge, we compute the refcount attribute of the
merged record, but we fail to account for the fact that once a record
hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence
the computed refcount is wrong, and we fail to merge the extents.
Fix this by adjusting the merge predicates to compute the adjusted
refcount correctly.
Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_refcount.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
index f6167c5f..29258bdd 100644
--- a/libxfs/xfs_refcount.c
+++ b/libxfs/xfs_refcount.c
@@ -819,6 +819,17 @@ xfs_refc_valid(
return rc->rc_startblock != NULLAGBLOCK;
}
+static inline xfs_nlink_t
+xfs_refc_merge_refcount(
+ const struct xfs_refcount_irec *irec,
+ enum xfs_refc_adjust_op adjust)
+{
+ /* Once a record hits MAXREFCOUNT, it is pinned there forever */
+ if (irec->rc_refcount == MAXREFCOUNT)
+ return MAXREFCOUNT;
+ return irec->rc_refcount + adjust;
+}
+
static inline bool
xfs_refc_want_merge_center(
const struct xfs_refcount_irec *left,
@@ -830,6 +841,7 @@ xfs_refc_want_merge_center(
unsigned long long *ulenp)
{
unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* To merge with a center record, both shoulder records must be
@@ -845,9 +857,10 @@ xfs_refc_want_merge_center(
return false;
/* The shoulder record refcounts must match the new refcount. */
- if (left->rc_refcount != cleft->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
return false;
- if (right->rc_refcount != cleft->rc_refcount + adjust)
+ if (right->rc_refcount != new_refcount)
return false;
/*
@@ -870,6 +883,7 @@ xfs_refc_want_merge_left(
enum xfs_refc_adjust_op adjust)
{
unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* For a left merge, the left shoulder record must be adjacent to the
@@ -880,7 +894,8 @@ xfs_refc_want_merge_left(
return false;
/* Left shoulder record refcount must match the new refcount. */
- if (left->rc_refcount != cleft->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
return false;
/*
@@ -902,6 +917,7 @@ xfs_refc_want_merge_right(
enum xfs_refc_adjust_op adjust)
{
unsigned long long ulen = right->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* For a right merge, the right shoulder record must be adjacent to the
@@ -912,7 +928,8 @@ xfs_refc_want_merge_right(
return false;
/* Right shoulder record refcount must match the new refcount. */
- if (right->rc_refcount != cright->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cright, adjust);
+ if (right->rc_refcount != new_refcount)
return false;
/*
--
2.40.0

View File

@ -0,0 +1,88 @@
From a68dabd45f3591456ecf7e35f6a6077db79f6bc6 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:59:35 +0100
Subject: [PATCH] xfs: fix off-by-one error in xfs_btree_space_to_height
Source kernel commit: c0f399ff51495ac8d30367418f4f6292ecd61fbe
Lately I've been stress-testing extreme-sized rmap btrees by using the
(new) xfs_db bmap_inflate command to clone bmbt mappings billions of
times and then using xfs_repair to build new rmap and refcount btrees.
This of course is /much/ faster than actually FICLONEing a file billions
of times.
Unfortunately, xfs_repair fails in xfs_btree_bload_compute_geometry with
EOVERFLOW, which indicates that xfs_mount.m_rmap_maxlevels is not
sufficiently large for the test scenario. For a 1TB filesystem (~67
million AG blocks, 4 AGs) the btheight command reports:
$ xfs_db -c 'btheight -n 4400801200 -w min rmapbt' /dev/sda
rmapbt: worst case per 4096-byte block: 84 records (leaf) / 45 keyptrs (node)
level 0: 4400801200 records, 52390491 blocks
level 1: 52390491 records, 1164234 blocks
level 2: 1164234 records, 25872 blocks
level 3: 25872 records, 575 blocks
level 4: 575 records, 13 blocks
level 5: 13 records, 1 block
6 levels, 53581186 blocks total
The AG is sufficiently large to build this rmap btree. Unfortunately,
m_rmap_maxlevels is 5. Augmenting the loop in the space->height
function to report height, node blocks, and blocks remaining produces
this:
ht 1 node_blocks 45 blockleft 67108863
ht 2 node_blocks 2025 blockleft 67108818
ht 3 node_blocks 91125 blockleft 67106793
ht 4 node_blocks 4100625 blockleft 67015668
final height: 5
The goal of this function is to compute the maximum height btree that
can be stored in the given number of ondisk fsblocks. Starting with the
top level of the tree, each iteration through the loop adds the fanout
factor of the next level down until we run out of blocks. IOWs, maximum
height is achieved by using the smallest fanout factor that can apply
to that level.
However, the loop setup is not correct. Top level btree blocks are
allowed to contain fewer than minrecs items, so the computation is
incorrect because the first time through the loop it should be using a
fanout factor of 2. With this corrected, the above becomes:
ht 1 node_blocks 2 blockleft 67108863
ht 2 node_blocks 90 blockleft 67108861
ht 3 node_blocks 4050 blockleft 67108771
ht 4 node_blocks 182250 blockleft 67104721
ht 5 node_blocks 8201250 blockleft 66922471
final height: 6
Fixes: 9ec691205e7d ("xfs: compute the maximum height of the rmap btree when reflink enabled")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_btree.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 65d38637..38a3092d 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -4663,7 +4663,12 @@ xfs_btree_space_to_height(
const unsigned int *limits,
unsigned long long leaf_blocks)
{
- unsigned long long node_blocks = limits[1];
+ /*
+ * The root btree block can have fewer than minrecs pointers in it
+ * because the tree might not be big enough to require that amount of
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
+ */
+ unsigned long long node_blocks = 2;
unsigned long long blocks_left = leaf_blocks - 1;
unsigned int height = 1;
--
2.40.0

View File

@ -0,0 +1,119 @@
From b827e2318ea2bb3eabca13a965c2535a1d7289e5 Mon Sep 17 00:00:00 2001
From: Long Li <leo.lilong@huawei.com>
Date: Fri, 18 Nov 2022 12:23:57 +0100
Subject: [PATCH] xfs: fix sb write verify for lazysbcount
Source kernel commit: 7cecd500d90164419add650e26cc1de03a7a66cb
When lazysbcount is enabled, fsstress and loop mount/unmount test report
the following problems:
XFS (loop0): SB summary counter sanity check failed
XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460,
xfs_sb block 0x0
XFS (loop0): Unmount and run xfs_repair
XFS (loop0): First 128 bytes of corrupted metadata buffer:
00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(..
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z
00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... ..........
00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................
00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................
00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................
00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................
XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply
+0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem.
XFS (loop0): Please unmount the filesystem and rectify the problem(s)
XFS (loop0): log mount/recovery failed: error -117
XFS (loop0): log mount failed
This corruption will shutdown the file system and the file system will
no longer be mountable. The following script can reproduce the problem,
but it may take a long time.
#!/bin/bash
device=/dev/sda
testdir=/mnt/test
round=0
function fail()
{
echo "$*"
exit 1
}
mkdir -p $testdir
while [ $round -lt 10000 ]
do
echo "******* round $round ********"
mkfs.xfs -f $device
mount $device $testdir || fail "mount failed!"
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null &
sleep 4
killall -w fsstress
umount $testdir
xfs_repair -e $device > /dev/null
if [ $? -eq 2 ];then
echo "ERR CODE 2: Dirty log exception during repair."
exit 1
fi
round=$(($round+1))
done
With lazysbcount is enabled, There is no additional lock protection for
reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the
m_ifree, this will make the m_ifree greater than m_icount. For example,
consider the following sequence and ifreedelta is postive:
CPU0 CPU1
xfs_log_sb xfs_trans_unreserve_and_mod_sb
---------- ------------------------------
percpu_counter_sum(&mp->m_icount)
percpu_counter_add_batch(&mp->m_icount,
idelta, XFS_ICOUNT_BATCH)
percpu_counter_add(&mp->m_ifree, ifreedelta);
percpu_counter_sum(&mp->m_ifree)
After this, incorrect inode count (sb_ifree > sb_icount) will be writen to
the log. In the subsequent writing of sb, incorrect inode count (sb_ifree >
sb_icount) will fail to pass the boundary check in xfs_validate_sb_write()
that cause the file system shutdown.
When lazysbcount is enabled, we don't need to guarantee that Lazy sb
counters are completely correct, but we do need to guarantee that sb_ifree
<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount
must be satisfied any time that there /cannot/ be other threads allocating
or freeing inode chunks. If the constraint is violated under these
circumstances, sb_i{count,free} (the ondisk superblock inode counters)
maybe incorrect and need to be marked sick at unmount, the count will
be rebuilt on the next mount.
Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_sb.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index fc33dc4a..d05f0e6e 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -970,7 +970,9 @@ xfs_log_sb(
*/
if (xfs_has_lazysbcount(mp)) {
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+ mp->m_sb.sb_ifree = min_t(uint64_t,
+ percpu_counter_sum(&mp->m_ifree),
+ mp->m_sb.sb_icount);
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
}
--
2.40.0

View File

@ -0,0 +1,73 @@
From f5ef812888a81be534466fa34df747c16bb65b7f Mon Sep 17 00:00:00 2001
From: Guo Xuenan <guoxuenan@huawei.com>
Date: Wed, 15 Mar 2023 15:57:35 +0100
Subject: [PATCH] xfs: get rid of assert from xfs_btree_islastblock
Source kernel commit: 8c25febf23963431686f04874b96321288504127
xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up,
turn on the debugging knob can trigger the assert of xfs_btree_islastblock,
test script as follows:
while true
do
mount $disk $mountpoint
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null
echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk
sleep 10
umount $mountpoint
done
Kick off fsstress and only *then* turn on the debugging knob. If it
happens that the knob gets turned on after the cntbt lookup succeeds
but before the call to xfs_btree_islastblock, then we *can* end up in
the situation where a previously checked btree block suddenly starts
returning EFSCORRUPTED from xfs_btree_check_block. Kaboom.
Darrick give a very detailed explanation as follows:
Looking back at commit 27d9ee577dcce, I think the point of all this was
to make sure that the cursor has actually performed a lookup, and that
the btree block at whatever level we're asking about is ok.
If the caller hasn't ever done a lookup, the bc_levels array will be
empty, so cur->bc_levels[level].bp pointer will be NULL. The call to
xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is
pointless.
If the caller did a lookup but the lookup failed due to block
corruption, the corresponding cur->bc_levels[level].bp pointer will also
be NULL, and we'll still crash. The "ASSERT(xfs_btree_check_block);"
logic is also unnecessary.
If the cursor level points to an inode root, the block buffer will be
incore, so it had better always be consistent.
If the caller ignores a failed lookup after a successful one and calls
this function, the cursor state is garbage and the assert wouldn't have
tripped anyway. So get rid of the assert.
Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock")
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_btree.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index eef27858..29c4b4cc 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -556,7 +556,6 @@ xfs_btree_islastblock(
struct xfs_buf *bp;
block = xfs_btree_get_block(cur, level, &bp);
- ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
--
2.40.0

View File

@ -0,0 +1,187 @@
From d1dca9f6b365e439878e550ed0c801bbfb6d347b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:55:35 +0100
Subject: [PATCH] xfs: hoist refcount record merge predicates
Source kernel commit: 9d720a5a658f5135861773f26e927449bef93d61
Hoist these multiline conditionals into separate static inline helpers
to improve readability and set the stage for corruption fixes that will
be introduced in the next patch.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++++++++------
1 file changed, 113 insertions(+), 16 deletions(-)
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
index 64e66861..f6167c5f 100644
--- a/libxfs/xfs_refcount.c
+++ b/libxfs/xfs_refcount.c
@@ -814,11 +814,119 @@ out_error:
/* Is this extent valid? */
static inline bool
xfs_refc_valid(
- struct xfs_refcount_irec *rc)
+ const struct xfs_refcount_irec *rc)
{
return rc->rc_startblock != NULLAGBLOCK;
}
+static inline bool
+xfs_refc_want_merge_center(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ bool cleft_is_cright,
+ enum xfs_refc_adjust_op adjust,
+ unsigned long long *ulenp)
+{
+ unsigned long long ulen = left->rc_blockcount;
+
+ /*
+ * To merge with a center record, both shoulder records must be
+ * adjacent to the record we want to adjust. This is only true if
+ * find_left and find_right made all four records valid.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
+ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
+ return false;
+
+ /* There must only be one record for the entire range. */
+ if (!cleft_is_cright)
+ return false;
+
+ /* The shoulder record refcounts must match the new refcount. */
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+ if (right->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount + right->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ *ulenp = ulen;
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_left(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = left->rc_blockcount;
+
+ /*
+ * For a left merge, the left shoulder record must be adjacent to the
+ * start of the range. If this is true, find_left made left and cleft
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
+ return false;
+
+ /* Left shoulder record refcount must match the new refcount. */
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_right(
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = right->rc_blockcount;
+
+ /*
+ * For a right merge, the right shoulder record must be adjacent to the
+ * end of the range. If this is true, find_right made cright and right
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
+ return false;
+
+ /* Right shoulder record refcount must match the new refcount. */
+ if (right->rc_refcount != cright->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cright->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
/*
* Try to merge with any extents on the boundaries of the adjustment range.
*/
@@ -860,23 +968,15 @@ xfs_refcount_merge_extents(
(cleft.rc_blockcount == cright.rc_blockcount);
/* Try to merge left, cleft, and right. cleft must == cright. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
- right.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- right.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
+ adjust, &ulen)) {
*shape_changed = true;
return xfs_refcount_merge_center_extents(cur, &left, &cleft,
&right, ulen, aglen);
}
/* Try to merge left and cleft. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
*shape_changed = true;
error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
agbno, aglen);
@@ -892,10 +992,7 @@ xfs_refcount_merge_extents(
}
/* Try to merge cright and right. */
- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
- right.rc_refcount == cright.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
*shape_changed = true;
return xfs_refcount_merge_right_extent(cur, &right, &cright,
aglen);
--
2.40.0

View File

@ -0,0 +1,69 @@
From 798d43495df2c8a09a73b8e868a71d8f2fd81d5e Mon Sep 17 00:00:00 2001
From: Andrey Strachuk <strochuk@ispras.ru>
Date: Wed, 24 Aug 2022 10:24:01 +0200
Subject: [PATCH] xfs: removed useless condition in function xfs_attr_node_get
Source kernel commit: 0f38063d7a38015a47ca1488406bf21e0effe80e
At line 1561, variable "state" is being compared
with NULL every loop iteration.
-------------------------------------------------------------------
1561 for (i = 0; state != NULL && i < state->path.active; i++) {
1562 xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1563 state->path.blk[i].bp = NULL;
1564 }
-------------------------------------------------------------------
However, it cannot be NULL.
----------------------------------------
1546 state = xfs_da_state_alloc(args);
----------------------------------------
xfs_da_state_alloc calls kmem_cache_zalloc. kmem_cache_zalloc is
called with __GFP_NOFAIL flag and, therefore, it cannot return NULL.
--------------------------------------------------------------------------
struct xfs_da_state *
xfs_da_state_alloc(
struct xfs_da_args *args)
{
struct xfs_da_state *state;
state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
state->args = args;
state->mp = args->dp->i_mount;
return state;
}
--------------------------------------------------------------------------
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Signed-off-by: Andrey Strachuk <strochuk@ispras.ru>
Fixes: 4d0cdd2bb8f0 ("xfs: clean up xfs_attr_node_hasname")
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_attr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c
index 08973934..b451fcdb 100644
--- a/libxfs/xfs_attr.c
+++ b/libxfs/xfs_attr.c
@@ -1556,7 +1556,7 @@ xfs_attr_node_get(
* If not in a transaction, we have to release all the buffers.
*/
out_release:
- for (i = 0; state != NULL && i < state->path.active; i++) {
+ for (i = 0; i < state->path.active; i++) {
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
--
2.40.0

View File

@ -0,0 +1,34 @@
From 7374f58bfeb38467bab6552a47a5cd6bbe3c2e2e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 20 Dec 2022 16:53:34 -0800
Subject: [PATCH] xfs_db: fix dir3 block magic check
Fix this broken check, which (amazingly) went unnoticed until I cranked
up the warning level /and/ built the system for s390x.
Fixes: e96864ff4d4 ("xfs_db: enable blockget for v5 filesystems")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
db/check.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/db/check.c b/db/check.c
index bb27ce58..964756d0 100644
--- a/db/check.c
+++ b/db/check.c
@@ -2578,7 +2578,7 @@ process_data_dir_v2(
error++;
}
if ((be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC ||
- be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC) &&
+ be32_to_cpu(data->magic) == XFS_DIR3_BLOCK_MAGIC) &&
stale != be32_to_cpu(btp->stale)) {
if (!sflag || v)
dbprintf(_("dir %lld block %d bad stale tail count %d\n"),
--
2.40.0

View File

@ -0,0 +1,266 @@
From 945c7341dedab44ae5daed83377e6366c3fb8fee Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 23 Nov 2022 09:09:33 -0800
Subject: [PATCH] xfs_repair: retain superblock buffer to avoid write hook
deadlock
Every now and then I experience the following deadlock in xfs_repair
when I'm running the offline repair fuzz tests:
#0 futex_wait (private=0, expected=2, futex_word=0x55555566df70) at ../sysdeps/nptl/futex-internal.h:146
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555566df70, private=0) at ./nptl/lowlevellock.c:49
#2 lll_mutex_lock_optimized (mutex=0x55555566df70) at ./nptl/pthread_mutex_lock.c:48
#3 ___pthread_mutex_lock (mutex=mutex@entry=0x55555566df70) at ./nptl/pthread_mutex_lock.c:93
#4 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:231
#5 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e01b0, nodep=nodep@entry=0x7fffe55e0168) at cache.c:452
#6 __cache_lookup (key=key@entry=0x7fffe55e01b0, flags=0, bpp=bpp@entry=0x7fffe55e0228) at rdwr.c:405
#7 libxfs_getbuf_flags (btp=0x55555566de00, blkno=0, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0228) at rdwr.c:457
#8 libxfs_buf_read_map (btp=0x55555566de00, map=map@entry=0x7fffe55e0280, nmaps=nmaps@entry=1, flags=flags@entry=0, bpp=bpp@entry=0x7fffe55e0278, ops=0x5555556233e0 <xfs_sb_buf_ops>)
at rdwr.c:704
#9 libxfs_buf_read (ops=<optimized out>, bpp=0x7fffe55e0278, flags=0, numblks=<optimized out>, blkno=0, target=<optimized out>)
at /storage/home/djwong/cdev/work/xfsprogs/build-x86_64/libxfs/libxfs_io.h:195
#10 libxfs_getsb (mp=mp@entry=0x7fffffffd690) at rdwr.c:162
#11 force_needsrepair (mp=0x7fffffffd690) at xfs_repair.c:924
#12 repair_capture_writeback (bp=<optimized out>) at xfs_repair.c:1000
#13 libxfs_bwrite (bp=0x7fffe011e530) at rdwr.c:869
#14 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:240
#15 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e0470, nodep=nodep@entry=0x7fffe55e0428) at cache.c:452
#16 __cache_lookup (key=key@entry=0x7fffe55e0470, flags=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:405
#17 libxfs_getbuf_flags (btp=0x55555566de00, blkno=12736, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0538) at rdwr.c:457
#18 __libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:501
#19 libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:525
#20 pf_queue_io (args=args@entry=0x5555556722c0, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flag=flag@entry=11) at prefetch.c:124
#21 pf_read_bmbt_reclist (args=0x5555556722c0, rp=<optimized out>, numrecs=78) at prefetch.c:220
#22 pf_scan_lbtree (dbno=dbno@entry=1211, level=level@entry=1, isadir=isadir@entry=1, args=args@entry=0x5555556722c0, func=0x55555557f240 <pf_scanfunc_bmap>) at prefetch.c:298
#23 pf_read_btinode (isadir=1, dino=<optimized out>, args=0x5555556722c0) at prefetch.c:385
#24 pf_read_inode_dirs (args=args@entry=0x5555556722c0, bp=bp@entry=0x7fffdc023790) at prefetch.c:459
#25 pf_read_inode_dirs (bp=<optimized out>, args=0x5555556722c0) at prefetch.c:411
#26 pf_batch_read (args=args@entry=0x5555556722c0, which=which@entry=PF_PRIMARY, buf=buf@entry=0x7fffd001d000) at prefetch.c:609
#27 pf_io_worker (param=0x5555556722c0) at prefetch.c:673
#28 start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#29 clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>From this stack trace, we see that xfs_repair's prefetch module is
getting some xfs_buf objects ahead of initiating a read (#19). The
buffer cache has hit its limit, so it calls cache_shake (#14) to free
some unused xfs_bufs. The buffer it finds is a dirty buffer, so it
calls libxfs_bwrite to flush it out to disk, which in turn invokes the
buffer write hook that xfs_repair set up in 3b7667cb to mark the ondisk
filesystem's superblock as NEEDSREPAIR until repair actually completes.
Unfortunately, the NEEDSREPAIR handler itself needs to grab the
superblock buffer, so it makes another call into the buffer cache (#9),
which sees that the cache is full and tries to shake it(#4). Hence we
deadlock on cm_mutex because shaking is not reentrant.
Fix this by retaining a reference to the superblock buffer when possible
so that the writeback hook doesn't have to access the buffer cache to
set NEEDSREPAIR.
Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/libxfs_api_defs.h | 2 ++
libxfs/libxfs_io.h | 1 +
libxfs/rdwr.c | 8 +++++
repair/phase2.c | 8 +++++
repair/protos.h | 1 +
repair/xfs_repair.c | 75 +++++++++++++++++++++++++++++++++++-----
6 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 2716a731..f8efcce7 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -53,9 +53,11 @@
#define xfs_buf_delwri_submit libxfs_buf_delwri_submit
#define xfs_buf_get libxfs_buf_get
#define xfs_buf_get_uncached libxfs_buf_get_uncached
+#define xfs_buf_lock libxfs_buf_lock
#define xfs_buf_read libxfs_buf_read
#define xfs_buf_read_uncached libxfs_buf_read_uncached
#define xfs_buf_relse libxfs_buf_relse
+#define xfs_buf_unlock libxfs_buf_unlock
#define xfs_bunmapi libxfs_bunmapi
#define xfs_bwrite libxfs_bwrite
#define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
index 9c0e2704..fae86427 100644
--- a/libxfs/libxfs_io.h
+++ b/libxfs/libxfs_io.h
@@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp)
}
void xfs_buf_lock(struct xfs_buf *bp);
+void xfs_buf_unlock(struct xfs_buf *bp);
int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
struct xfs_buf **bpp);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 20e0793c..d5aad3ea 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -384,6 +384,14 @@ xfs_buf_lock(
pthread_mutex_lock(&bp->b_lock);
}
+void
+xfs_buf_unlock(
+ struct xfs_buf *bp)
+{
+ if (use_xfs_buf_lock)
+ pthread_mutex_unlock(&bp->b_lock);
+}
+
static int
__cache_lookup(
struct xfs_bufkey *key,
diff --git a/repair/phase2.c b/repair/phase2.c
index 56a39bb4..2ada95ae 100644
--- a/repair/phase2.c
+++ b/repair/phase2.c
@@ -370,6 +370,14 @@ phase2(
} else
do_log(_("Phase 2 - using internal log\n"));
+ /*
+ * Now that we've set up the buffer cache the way we want it, try to
+ * grab our own reference to the primary sb so that the hooks will not
+ * have to call out to the buffer cache.
+ */
+ if (mp->m_buf_writeback_fn)
+ retain_primary_sb(mp);
+
/* Zero log if applicable */
do_log(_(" - zero log...\n"));
diff --git a/repair/protos.h b/repair/protos.h
index 03ebae14..83e471ff 100644
--- a/repair/protos.h
+++ b/repair/protos.h
@@ -16,6 +16,7 @@ int get_sb(xfs_sb_t *sbp,
xfs_off_t off,
int size,
xfs_agnumber_t agno);
+int retain_primary_sb(struct xfs_mount *mp);
void write_primary_sb(xfs_sb_t *sbp,
int size);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 871b428d..ff29bea9 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -749,6 +749,63 @@ check_fs_vs_host_sectsize(
}
}
+/*
+ * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
+ * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
+ * cache while trying to get the primary sb buffer if the first non-sb write to
+ * the filesystem is the result of a cache shake. Retain a reference to the
+ * primary sb buffer to avoid all that.
+ */
+static struct xfs_buf *primary_sb_bp; /* buffer for superblock */
+
+int
+retain_primary_sb(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+ XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
+ &xfs_sb_buf_ops);
+ if (error)
+ return error;
+
+ libxfs_buf_unlock(primary_sb_bp);
+ return 0;
+}
+
+static void
+drop_primary_sb(void)
+{
+ if (!primary_sb_bp)
+ return;
+
+ libxfs_buf_lock(primary_sb_bp);
+ libxfs_buf_relse(primary_sb_bp);
+ primary_sb_bp = NULL;
+}
+
+static int
+get_primary_sb(
+ struct xfs_mount *mp,
+ struct xfs_buf **bpp)
+{
+ int error;
+
+ *bpp = NULL;
+
+ if (!primary_sb_bp) {
+ error = retain_primary_sb(mp);
+ if (error)
+ return error;
+ }
+
+ libxfs_buf_lock(primary_sb_bp);
+ xfs_buf_hold(primary_sb_bp);
+ *bpp = primary_sb_bp;
+ return 0;
+}
+
/* Clear needsrepair after a successful repair run. */
void
clear_needsrepair(
@@ -769,15 +826,14 @@ clear_needsrepair(
do_warn(
_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
error);
- return;
+ goto drop;
}
/* Clear needsrepair from the superblock. */
- bp = libxfs_getsb(mp);
- if (!bp || bp->b_error) {
+ error = get_primary_sb(mp, &bp);
+ if (error) {
do_warn(
- _("Cannot clear needsrepair from primary super, err=%d.\n"),
- bp ? bp->b_error : ENOMEM);
+ _("Cannot clear needsrepair from primary super, err=%d.\n"), error);
} else {
mp->m_sb.sb_features_incompat &=
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
@@ -786,6 +842,8 @@ clear_needsrepair(
}
if (bp)
libxfs_buf_relse(bp);
+drop:
+ drop_primary_sb();
}
static void
@@ -808,11 +866,10 @@ force_needsrepair(
xfs_sb_version_needsrepair(&mp->m_sb))
return;
- bp = libxfs_getsb(mp);
- if (!bp || bp->b_error) {
+ error = get_primary_sb(mp, &bp);
+ if (error) {
do_log(
- _("couldn't get superblock to set needsrepair, err=%d\n"),
- bp ? bp->b_error : ENOMEM);
+ _("couldn't get superblock to set needsrepair, err=%d\n"), error);
} else {
/*
* It's possible that we need to set NEEDSREPAIR before we've
--
2.40.0

View File

@ -1,6 +1,6 @@
Summary: Utilities for managing the XFS filesystem
Name: xfsprogs
Version: 5.14.2
Version: 5.19.0
Release: 1%{?dist}
License: GPL+ and LGPLv2+
URL: https://xfs.wiki.kernel.org
@ -23,8 +23,17 @@ Obsoletes: xfsprogs-qa-devel <= %{version}
Conflicts: xfsdump < 3.0.1
Suggests: xfsprogs-xfs_scrub
Patch0: xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch
Patch0: xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch
Patch1: xfsprogs-5.12.0-example-conf.patch
Patch2: xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch
Patch3: xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch
Patch4: xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch
Patch5: xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch
Patch7: xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch
Patch8: xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch
Patch9: xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch
Patch10: xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch
Patch11: xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch
%description
A set of commands to use the XFS filesystem, including mkfs.xfs.
@ -136,6 +145,15 @@ install -m 0644 %{SOURCE3} %{buildroot}%{mkfsdir}
%{_libdir}/*.so
%changelog
* Tue Jan 10 2023 Pavel Reichl <preichl@redhat.com> - 5.19.0-1
- New upstream release
- Tolerate tiny (<300MB) filesystems
- Rename xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch to
xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch
and amend it to reflect upstream changes
- Backport all "Fixing" patches relevant to 5.19
Related: rhbz#2142910
* Fri Jan 21 2022 Pavel Reichl <preichl@redhat.com> - 5.14.2-1
- New upstream release
Related: rhbz#2041525