From 17fdafd9b58c07506a60e103acbb9c0c3fd6c17f Mon Sep 17 00:00:00 2001 From: Pavel Reichl Date: Tue, 10 Jan 2023 15:43:10 +0100 Subject: [PATCH] New upstream release 5.19.0 Upstream now has these patches: c1c71781 mkfs: update manpage of bigtime and inobtcount 1c08f0ae mkfs: enable inobtcount and bigtime by default * So do not amend the man page and do not change the default option values for inobtcount and bigtime. * But continue turning off inobtcount and bigtime for kernels older than 5.10 Unlike upstream RHEL-9 will continue to support tiny filesystems, but add warning about deprecation. Backport all "Fixing" patches relevant to 5.19 Resolves: rhbz#2142910 Signed-off-by: Pavel Reichl --- .gitignore | 2 + sources | 4 +- ...s-5.12.0-default-bigtime-inobtcnt-on.patch | 85 ------ xfsprogs-5.12.0-example-conf.patch | 4 +- ...sable-old-kernel-bigtime-inobtcnt-on.patch | 52 ++++ ....19.0-mkfs-tolerate-tiny-filesystems.patch | 91 ++++++ ...imate-post-merge-refcounts-correctly.patch | 113 ++++++++ ...ne-error-in-xfs_btree_space_to_heigh.patch | 88 ++++++ ...-fix-sb-write-verify-for-lazysbcount.patch | 119 ++++++++ ...of-assert-from-xfs_btree_islastblock.patch | 73 +++++ ...ist-refcount-record-merge-predicates.patch | 187 ++++++++++++ ...ess-condition-in-function-xfs_attr_n.patch | 69 +++++ ....0-xfs_db-fix-dir3-block-magic-check.patch | 34 +++ ...n-superblock-buffer-to-avoid-write-h.patch | 266 ++++++++++++++++++ xfsprogs.spec | 22 +- 15 files changed, 1118 insertions(+), 91 deletions(-) delete mode 100644 xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch create mode 100644 xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch create mode 100644 xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch create mode 100644 xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch create mode 100644 xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch create mode 100644 xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch create mode 100644 xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch create mode 100644 xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch create mode 100644 xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch create mode 100644 xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch create mode 100644 xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch diff --git a/.gitignore b/.gitignore index d969006..af03e17 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,5 @@ xfsprogs-3.1.2.tar.gz /xfsprogs-5.14.0.tar.xz /xfsprogs-5.14.2.tar.sign /xfsprogs-5.14.2.tar.xz +/xfsprogs-5.19.0.tar.xz +/xfsprogs-5.19.0.tar.sign diff --git a/sources b/sources index 17066d0..be68804 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (xfsprogs-5.14.2.tar.sign) = 4f61784b4c95b596f8923a54a9a2bcce6381d19c0a8b3728de274797b0bb8a8b73f426f68efde1125416088b4012a2f13c7196b250d914ac3bb576df8c9705d5 -SHA512 (xfsprogs-5.14.2.tar.xz) = cb9794247a2eed8bd9f10ff811d38f97e4adbed97bad334b0fa8a26503ec224237eddd92fd78892584a153967417ab2009fb2fc8bab6ef12cd53fd20bdba1b4e +SHA512 (xfsprogs-5.19.0.tar.xz) = 0b069cd44eb87052ee55ebffcce736ef103adeda539e854bd86d53942f541c07d74cc174f06a2221230eaa82b21d43327833492cd0ded577d442e1935a0abe7d +SHA512 (xfsprogs-5.19.0.tar.sign) = a87766fa2c40a3e6990cb84f85bd687469b59baf075561ca1867e53490ce120bd87a4b11d9843d4820b2d8b6bdc7d56d6e090eed223bc3b663cf999a5a443597 diff --git a/xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch b/xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch deleted file mode 100644 index ed4f52b..0000000 --- a/xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch +++ /dev/null @@ -1,85 +0,0 @@ ---- xfsprogs-5.12.0.orig/man/man8/mkfs.xfs.8 -+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8 -@@ -203,7 +203,7 @@ December 1901 to January 2038, and quota - .IP - By default, - .B mkfs.xfs --will not enable this feature. -+in RHEL9 will enable this feature. - If the option - .B \-m crc=0 - is used, the large timestamp feature is not supported and is disabled. -@@ -256,7 +256,7 @@ This can be used to reduce mount times w - .IP - By default, - .B mkfs.xfs --will not enable this option. -+in RHEL9 will enable this option. - This feature is only available for filesystems created with the (default) - .B \-m finobt=1 - option set. ---- xfsprogs-5.12.0.orig/mkfs/xfs_mkfs.c -+++ xfsprogs-5.12.0/mkfs/xfs_mkfs.c -@@ -3795,6 +3797,23 @@ cfgfile_parse( - cli->cfgfile); - } - -+static unsigned int get_system_kver(void) -+{ -+ const char *kver = getenv("KVER"); -+ struct utsname utsname; -+ int a, b, c; -+ -+ if (!kver) { -+ uname(&utsname); -+ kver = utsname.release; -+ } -+ -+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3) -+ return LINUX_VERSION_CODE; -+ -+ return KERNEL_VERSION(a,b,c); -+} -+ - int - main( - int argc, -@@ -3848,17 +3867,25 @@ main( - .spinodes = true, - .rmapbt = false, - .reflink = true, -- .inobtcnt = false, -+ .inobtcnt = true, - .parent_pointers = false, - .nodalign = false, - .nortalign = false, -- .bigtime = false, -+ .bigtime = true, - }, - }; - - struct list_head buffer_list; -+ unsigned int kver; - int error; - -+ /* turn bigtime & inobtcnt back off if running under older kernels */ -+ kver = get_system_kver(); -+ if (kver < KERNEL_VERSION(5,10,0)) { -+ dft.sb_feat.inobtcnt = false; -+ dft.sb_feat.bigtime = false; -+ } -+ - platform_uuid_generate(&cli.uuid); - progname = basename(argv[0]); - setlocale(LC_ALL, ""); ---- xfsprogs-5.14.0/mkfs/xfs_mkfs.c.orig -+++ xfsprogs-5.14.0/mkfs/xfs_mkfs.c -@@ -12,6 +12,8 @@ - #include "libfrog/convert.h" - #include "proto.h" - #include -+#include -+#include - - #define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog))) - #define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog))) diff --git a/xfsprogs-5.12.0-example-conf.patch b/xfsprogs-5.12.0-example-conf.patch index ca06875..4934e4a 100644 --- a/xfsprogs-5.12.0-example-conf.patch +++ b/xfsprogs-5.12.0-example-conf.patch @@ -1,5 +1,5 @@ ---- xfsprogs-5.12.0/man/man8/mkfs.xfs.8.backup 2021-12-01 20:48:09.241170607 +0100 -+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8 2021-12-03 15:34:32.382616819 +0100 +--- a/man/man8/mkfs.xfs.8.in 2021-12-01 20:48:09.241170607 +0100 ++++ b/man/man8/mkfs.xfs.8.in 2021-12-03 15:34:32.382616819 +0100 @@ -1091,6 +1091,12 @@ .HP .PD diff --git a/xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch b/xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch new file mode 100644 index 0000000..1f5cc7f --- /dev/null +++ b/xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch @@ -0,0 +1,52 @@ +--- a/mkfs/xfs_mkfs.c.orig 2022-08-12 20:38:21.000000000 +0200 ++++ b/mkfs/xfs_mkfs.c 2023-01-25 11:06:01.863076713 +0100 +@@ -13,6 +13,8 @@ + #include "libfrog/crc32cselftest.h" + #include "proto.h" + #include ++#include ++#include + + #define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog))) + #define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog))) +@@ -3998,6 +4000,23 @@ + cli->cfgfile); + } + ++static unsigned int get_system_kver(void) ++{ ++ const char *kver = getenv("KVER"); ++ struct utsname utsname; ++ int a, b, c; ++ ++ if (!kver) { ++ uname(&utsname); ++ kver = utsname.release; ++ } ++ ++ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3) ++ return LINUX_VERSION_CODE; ++ ++ return KERNEL_VERSION(a,b,c); ++} ++ + int + main( + int argc, +@@ -4077,8 +4096,16 @@ + }; + + struct list_head buffer_list; ++ unsigned int kver; + int error; + ++ /* turn bigtime & inobtcnt back off if running under older kernels */ ++ kver = get_system_kver(); ++ if (kver < KERNEL_VERSION(5,10,0)) { ++ dft.sb_feat.inobtcnt = false; ++ dft.sb_feat.bigtime = false; ++ } ++ + platform_uuid_generate(&cli.uuid); + progname = basename(argv[0]); + setlocale(LC_ALL, ""); diff --git a/xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch b/xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch new file mode 100644 index 0000000..c7103fa --- /dev/null +++ b/xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch @@ -0,0 +1,91 @@ +From 17b691400e8ce0755bb1d7a33490fbc014067e5e Mon Sep 17 00:00:00 2001 +From: Pavel Reichl +Date: Fri, 27 Jan 2023 06:30:20 +0100 +Subject: [PATCH] mkfs: tolerate tiny filesystems + +Signed-off-by: Pavel Reichl +--- + man/man8/mkfs.xfs.8.in | 4 ++-- + mkfs/xfs_mkfs.c | 23 ++++++++++++++--------- + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in +index 211e7b0c..03f0fda8 100644 +--- a/man/man8/mkfs.xfs.8.in ++++ b/man/man8/mkfs.xfs.8.in +@@ -405,7 +405,7 @@ is required if + is given. Otherwise, it is only needed if the filesystem should occupy + less space than the size of the special file. + +-The data section must be at least 300MB in size. ++The data section should be at least 300MB in size. + .TP + .BI sunit= value + This is used to specify the stripe unit for a RAID device or a +@@ -705,7 +705,7 @@ described above. The overriding minimum value for size is 512 blocks. + With some combinations of filesystem block size, inode size, + and directory block size, the minimum log size is larger than 512 blocks. + +-The log must be at least 64MB in size. ++The log should be at least 64MB in size. + The log cannot be more than 2GB in size. + .TP + .BI version= value +diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c +index 9dd0e79c..72c906d6 100644 +--- a/mkfs/xfs_mkfs.c ++++ b/mkfs/xfs_mkfs.c +@@ -2503,6 +2503,8 @@ validate_supported( + struct xfs_mount *mp, + struct cli_params *cli) + { ++ bool deprecated = false; ++ + /* Undocumented option to enable unsupported tiny filesystems. */ + if (!cli->is_supported) { + printf( +@@ -2532,9 +2534,8 @@ validate_supported( + * 64MB * (8 / 7) * 4 = 293MB + */ + if (mp->m_sb.sb_dblocks < MEGABYTES(300, mp->m_sb.sb_blocklog)) { +- fprintf(stderr, +- _("Filesystem must be larger than 300MB.\n")); +- usage(); ++ printf(_("Filesystem should be larger than 300MB.\n")); ++ deprecated = true; + } + + /* +@@ -2543,9 +2544,8 @@ validate_supported( + */ + if (mp->m_sb.sb_logblocks < + XFS_MIN_REALISTIC_LOG_BLOCKS(mp->m_sb.sb_blocklog)) { +- fprintf(stderr, +- _("Log size must be at least 64MB.\n")); +- usage(); ++ printf( _("Log size should be at least 64MB.\n")); ++ deprecated = true; + } + + /* +@@ -2553,9 +2553,14 @@ validate_supported( + * have redundant superblocks. + */ + if (mp->m_sb.sb_agcount < 2) { +- fprintf(stderr, +- _("Filesystem must have at least 2 superblocks for redundancy!\n")); +- usage(); ++ printf( ++ _("Filesystem should have at least 2 superblocks for redundancy!\n")); ++ deprecated = true; ++ } ++ ++ if (deprecated) { ++ printf( ++_("Support for filesystems like this one is deprecated and they will not be supported in future releases.\n")); + } + } + +-- +2.39.1 + diff --git a/xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch b/xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch new file mode 100644 index 0000000..9b53f1a --- /dev/null +++ b/xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch @@ -0,0 +1,113 @@ +From b445624f0882badf00da739c52e58a85c18ae002 Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Wed, 15 Mar 2023 15:56:35 +0100 +Subject: [PATCH] xfs: estimate post-merge refcounts correctly + +Source kernel commit: b25d1984aa884fc91a73a5a407b9ac976d441e9b + +Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount +metadata corruptions after being run. Specifically, xfs_repair noticed +single-block refcount records that could be combined but had not been. + +The root cause of this is improper MAXREFCOUNT edge case handling in +xfs_refcount_merge_extents. When we're trying to find candidates for a +refcount btree record merge, we compute the refcount attribute of the +merged record, but we fail to account for the fact that once a record +hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence +the computed refcount is wrong, and we fail to merge the extents. + +Fix this by adjusting the merge predicates to compute the adjusted +refcount correctly. + +Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Reviewed-by: Xiao Yang +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_refcount.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c +index f6167c5f..29258bdd 100644 +--- a/libxfs/xfs_refcount.c ++++ b/libxfs/xfs_refcount.c +@@ -819,6 +819,17 @@ xfs_refc_valid( + return rc->rc_startblock != NULLAGBLOCK; + } + ++static inline xfs_nlink_t ++xfs_refc_merge_refcount( ++ const struct xfs_refcount_irec *irec, ++ enum xfs_refc_adjust_op adjust) ++{ ++ /* Once a record hits MAXREFCOUNT, it is pinned there forever */ ++ if (irec->rc_refcount == MAXREFCOUNT) ++ return MAXREFCOUNT; ++ return irec->rc_refcount + adjust; ++} ++ + static inline bool + xfs_refc_want_merge_center( + const struct xfs_refcount_irec *left, +@@ -830,6 +841,7 @@ xfs_refc_want_merge_center( + unsigned long long *ulenp) + { + unsigned long long ulen = left->rc_blockcount; ++ xfs_nlink_t new_refcount; + + /* + * To merge with a center record, both shoulder records must be +@@ -845,9 +857,10 @@ xfs_refc_want_merge_center( + return false; + + /* The shoulder record refcounts must match the new refcount. */ +- if (left->rc_refcount != cleft->rc_refcount + adjust) ++ new_refcount = xfs_refc_merge_refcount(cleft, adjust); ++ if (left->rc_refcount != new_refcount) + return false; +- if (right->rc_refcount != cleft->rc_refcount + adjust) ++ if (right->rc_refcount != new_refcount) + return false; + + /* +@@ -870,6 +883,7 @@ xfs_refc_want_merge_left( + enum xfs_refc_adjust_op adjust) + { + unsigned long long ulen = left->rc_blockcount; ++ xfs_nlink_t new_refcount; + + /* + * For a left merge, the left shoulder record must be adjacent to the +@@ -880,7 +894,8 @@ xfs_refc_want_merge_left( + return false; + + /* Left shoulder record refcount must match the new refcount. */ +- if (left->rc_refcount != cleft->rc_refcount + adjust) ++ new_refcount = xfs_refc_merge_refcount(cleft, adjust); ++ if (left->rc_refcount != new_refcount) + return false; + + /* +@@ -902,6 +917,7 @@ xfs_refc_want_merge_right( + enum xfs_refc_adjust_op adjust) + { + unsigned long long ulen = right->rc_blockcount; ++ xfs_nlink_t new_refcount; + + /* + * For a right merge, the right shoulder record must be adjacent to the +@@ -912,7 +928,8 @@ xfs_refc_want_merge_right( + return false; + + /* Right shoulder record refcount must match the new refcount. */ +- if (right->rc_refcount != cright->rc_refcount + adjust) ++ new_refcount = xfs_refc_merge_refcount(cright, adjust); ++ if (right->rc_refcount != new_refcount) + return false; + + /* +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch b/xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch new file mode 100644 index 0000000..4d25a3f --- /dev/null +++ b/xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch @@ -0,0 +1,88 @@ +From a68dabd45f3591456ecf7e35f6a6077db79f6bc6 Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Wed, 15 Mar 2023 15:59:35 +0100 +Subject: [PATCH] xfs: fix off-by-one error in xfs_btree_space_to_height + +Source kernel commit: c0f399ff51495ac8d30367418f4f6292ecd61fbe + +Lately I've been stress-testing extreme-sized rmap btrees by using the +(new) xfs_db bmap_inflate command to clone bmbt mappings billions of +times and then using xfs_repair to build new rmap and refcount btrees. +This of course is /much/ faster than actually FICLONEing a file billions +of times. + +Unfortunately, xfs_repair fails in xfs_btree_bload_compute_geometry with +EOVERFLOW, which indicates that xfs_mount.m_rmap_maxlevels is not +sufficiently large for the test scenario. For a 1TB filesystem (~67 +million AG blocks, 4 AGs) the btheight command reports: + +$ xfs_db -c 'btheight -n 4400801200 -w min rmapbt' /dev/sda +rmapbt: worst case per 4096-byte block: 84 records (leaf) / 45 keyptrs (node) +level 0: 4400801200 records, 52390491 blocks +level 1: 52390491 records, 1164234 blocks +level 2: 1164234 records, 25872 blocks +level 3: 25872 records, 575 blocks +level 4: 575 records, 13 blocks +level 5: 13 records, 1 block +6 levels, 53581186 blocks total + +The AG is sufficiently large to build this rmap btree. Unfortunately, +m_rmap_maxlevels is 5. Augmenting the loop in the space->height +function to report height, node blocks, and blocks remaining produces +this: + +ht 1 node_blocks 45 blockleft 67108863 +ht 2 node_blocks 2025 blockleft 67108818 +ht 3 node_blocks 91125 blockleft 67106793 +ht 4 node_blocks 4100625 blockleft 67015668 +final height: 5 + +The goal of this function is to compute the maximum height btree that +can be stored in the given number of ondisk fsblocks. Starting with the +top level of the tree, each iteration through the loop adds the fanout +factor of the next level down until we run out of blocks. IOWs, maximum +height is achieved by using the smallest fanout factor that can apply +to that level. + +However, the loop setup is not correct. Top level btree blocks are +allowed to contain fewer than minrecs items, so the computation is +incorrect because the first time through the loop it should be using a +fanout factor of 2. With this corrected, the above becomes: + +ht 1 node_blocks 2 blockleft 67108863 +ht 2 node_blocks 90 blockleft 67108861 +ht 3 node_blocks 4050 blockleft 67108771 +ht 4 node_blocks 182250 blockleft 67104721 +ht 5 node_blocks 8201250 blockleft 66922471 +final height: 6 + +Fixes: 9ec691205e7d ("xfs: compute the maximum height of the rmap btree when reflink enabled") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_btree.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c +index 65d38637..38a3092d 100644 +--- a/libxfs/xfs_btree.c ++++ b/libxfs/xfs_btree.c +@@ -4663,7 +4663,12 @@ xfs_btree_space_to_height( + const unsigned int *limits, + unsigned long long leaf_blocks) + { +- unsigned long long node_blocks = limits[1]; ++ /* ++ * The root btree block can have fewer than minrecs pointers in it ++ * because the tree might not be big enough to require that amount of ++ * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. ++ */ ++ unsigned long long node_blocks = 2; + unsigned long long blocks_left = leaf_blocks - 1; + unsigned int height = 1; + +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch b/xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch new file mode 100644 index 0000000..4873b9f --- /dev/null +++ b/xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch @@ -0,0 +1,119 @@ +From b827e2318ea2bb3eabca13a965c2535a1d7289e5 Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Fri, 18 Nov 2022 12:23:57 +0100 +Subject: [PATCH] xfs: fix sb write verify for lazysbcount + +Source kernel commit: 7cecd500d90164419add650e26cc1de03a7a66cb + +When lazysbcount is enabled, fsstress and loop mount/unmount test report +the following problems: + +XFS (loop0): SB summary counter sanity check failed +XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460, +xfs_sb block 0x0 +XFS (loop0): Unmount and run xfs_repair +XFS (loop0): First 128 bytes of corrupted metadata buffer: +00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(.. +00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z +00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... .......... +00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................ +00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................ +00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................ +00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................ +XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply ++0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem. +XFS (loop0): Please unmount the filesystem and rectify the problem(s) +XFS (loop0): log mount/recovery failed: error -117 +XFS (loop0): log mount failed + +This corruption will shutdown the file system and the file system will +no longer be mountable. The following script can reproduce the problem, +but it may take a long time. + +#!/bin/bash + +device=/dev/sda +testdir=/mnt/test +round=0 + +function fail() +{ +echo "$*" +exit 1 +} + +mkdir -p $testdir +while [ $round -lt 10000 ] +do +echo "******* round $round ********" +mkfs.xfs -f $device +mount $device $testdir || fail "mount failed!" +fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null & +sleep 4 +killall -w fsstress +umount $testdir +xfs_repair -e $device > /dev/null +if [ $? -eq 2 ];then +echo "ERR CODE 2: Dirty log exception during repair." +exit 1 +fi +round=$(($round+1)) +done + +With lazysbcount is enabled, There is no additional lock protection for +reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the +m_ifree, this will make the m_ifree greater than m_icount. For example, +consider the following sequence and ifreedelta is postive: + +CPU0 CPU1 +xfs_log_sb xfs_trans_unreserve_and_mod_sb +---------- ------------------------------ +percpu_counter_sum(&mp->m_icount) +percpu_counter_add_batch(&mp->m_icount, +idelta, XFS_ICOUNT_BATCH) +percpu_counter_add(&mp->m_ifree, ifreedelta); +percpu_counter_sum(&mp->m_ifree) + +After this, incorrect inode count (sb_ifree > sb_icount) will be writen to +the log. In the subsequent writing of sb, incorrect inode count (sb_ifree > +sb_icount) will fail to pass the boundary check in xfs_validate_sb_write() +that cause the file system shutdown. + +When lazysbcount is enabled, we don't need to guarantee that Lazy sb +counters are completely correct, but we do need to guarantee that sb_ifree +<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount +must be satisfied any time that there /cannot/ be other threads allocating +or freeing inode chunks. If the constraint is violated under these +circumstances, sb_i{count,free} (the ondisk superblock inode counters) +maybe incorrect and need to be marked sick at unmount, the count will +be rebuilt on the next mount. + +Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks") +Signed-off-by: Long Li +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_sb.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c +index fc33dc4a..d05f0e6e 100644 +--- a/libxfs/xfs_sb.c ++++ b/libxfs/xfs_sb.c +@@ -970,7 +970,9 @@ xfs_log_sb( + */ + if (xfs_has_lazysbcount(mp)) { + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); +- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); ++ mp->m_sb.sb_ifree = min_t(uint64_t, ++ percpu_counter_sum(&mp->m_ifree), ++ mp->m_sb.sb_icount); + mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + } + +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch b/xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch new file mode 100644 index 0000000..1c3e4c4 --- /dev/null +++ b/xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch @@ -0,0 +1,73 @@ +From f5ef812888a81be534466fa34df747c16bb65b7f Mon Sep 17 00:00:00 2001 +From: Guo Xuenan +Date: Wed, 15 Mar 2023 15:57:35 +0100 +Subject: [PATCH] xfs: get rid of assert from xfs_btree_islastblock + +Source kernel commit: 8c25febf23963431686f04874b96321288504127 + +xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up, +turn on the debugging knob can trigger the assert of xfs_btree_islastblock, +test script as follows: + +while true +do +mount $disk $mountpoint +fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null +echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk +sleep 10 +umount $mountpoint +done + +Kick off fsstress and only *then* turn on the debugging knob. If it +happens that the knob gets turned on after the cntbt lookup succeeds +but before the call to xfs_btree_islastblock, then we *can* end up in +the situation where a previously checked btree block suddenly starts +returning EFSCORRUPTED from xfs_btree_check_block. Kaboom. + +Darrick give a very detailed explanation as follows: +Looking back at commit 27d9ee577dcce, I think the point of all this was +to make sure that the cursor has actually performed a lookup, and that +the btree block at whatever level we're asking about is ok. + +If the caller hasn't ever done a lookup, the bc_levels array will be +empty, so cur->bc_levels[level].bp pointer will be NULL. The call to +xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is +pointless. + +If the caller did a lookup but the lookup failed due to block +corruption, the corresponding cur->bc_levels[level].bp pointer will also +be NULL, and we'll still crash. The "ASSERT(xfs_btree_check_block);" +logic is also unnecessary. + +If the cursor level points to an inode root, the block buffer will be +incore, so it had better always be consistent. + +If the caller ignores a failed lookup after a successful one and calls +this function, the cursor state is garbage and the assert wouldn't have +tripped anyway. So get rid of the assert. + +Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock") +Signed-off-by: Guo Xuenan +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_btree.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h +index eef27858..29c4b4cc 100644 +--- a/libxfs/xfs_btree.h ++++ b/libxfs/xfs_btree.h +@@ -556,7 +556,6 @@ xfs_btree_islastblock( + struct xfs_buf *bp; + + block = xfs_btree_get_block(cur, level, &bp); +- ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch b/xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch new file mode 100644 index 0000000..4fefb24 --- /dev/null +++ b/xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch @@ -0,0 +1,187 @@ +From d1dca9f6b365e439878e550ed0c801bbfb6d347b Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Wed, 15 Mar 2023 15:55:35 +0100 +Subject: [PATCH] xfs: hoist refcount record merge predicates + +Source kernel commit: 9d720a5a658f5135861773f26e927449bef93d61 + +Hoist these multiline conditionals into separate static inline helpers +to improve readability and set the stage for corruption fixes that will +be introduced in the next patch. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Reviewed-by: Xiao Yang +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++++++++------ + 1 file changed, 113 insertions(+), 16 deletions(-) + +diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c +index 64e66861..f6167c5f 100644 +--- a/libxfs/xfs_refcount.c ++++ b/libxfs/xfs_refcount.c +@@ -814,11 +814,119 @@ out_error: + /* Is this extent valid? */ + static inline bool + xfs_refc_valid( +- struct xfs_refcount_irec *rc) ++ const struct xfs_refcount_irec *rc) + { + return rc->rc_startblock != NULLAGBLOCK; + } + ++static inline bool ++xfs_refc_want_merge_center( ++ const struct xfs_refcount_irec *left, ++ const struct xfs_refcount_irec *cleft, ++ const struct xfs_refcount_irec *cright, ++ const struct xfs_refcount_irec *right, ++ bool cleft_is_cright, ++ enum xfs_refc_adjust_op adjust, ++ unsigned long long *ulenp) ++{ ++ unsigned long long ulen = left->rc_blockcount; ++ ++ /* ++ * To merge with a center record, both shoulder records must be ++ * adjacent to the record we want to adjust. This is only true if ++ * find_left and find_right made all four records valid. ++ */ ++ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || ++ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) ++ return false; ++ ++ /* There must only be one record for the entire range. */ ++ if (!cleft_is_cright) ++ return false; ++ ++ /* The shoulder record refcounts must match the new refcount. */ ++ if (left->rc_refcount != cleft->rc_refcount + adjust) ++ return false; ++ if (right->rc_refcount != cleft->rc_refcount + adjust) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cleft->rc_blockcount + right->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ *ulenp = ulen; ++ return true; ++} ++ ++static inline bool ++xfs_refc_want_merge_left( ++ const struct xfs_refcount_irec *left, ++ const struct xfs_refcount_irec *cleft, ++ enum xfs_refc_adjust_op adjust) ++{ ++ unsigned long long ulen = left->rc_blockcount; ++ ++ /* ++ * For a left merge, the left shoulder record must be adjacent to the ++ * start of the range. If this is true, find_left made left and cleft ++ * contain valid contents. ++ */ ++ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) ++ return false; ++ ++ /* Left shoulder record refcount must match the new refcount. */ ++ if (left->rc_refcount != cleft->rc_refcount + adjust) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cleft->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ return true; ++} ++ ++static inline bool ++xfs_refc_want_merge_right( ++ const struct xfs_refcount_irec *cright, ++ const struct xfs_refcount_irec *right, ++ enum xfs_refc_adjust_op adjust) ++{ ++ unsigned long long ulen = right->rc_blockcount; ++ ++ /* ++ * For a right merge, the right shoulder record must be adjacent to the ++ * end of the range. If this is true, find_right made cright and right ++ * contain valid contents. ++ */ ++ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) ++ return false; ++ ++ /* Right shoulder record refcount must match the new refcount. */ ++ if (right->rc_refcount != cright->rc_refcount + adjust) ++ return false; ++ ++ /* ++ * The new record cannot exceed the max length. ulen is a ULL as the ++ * individual record block counts can be up to (u32 - 1) in length ++ * hence we need to catch u32 addition overflows here. ++ */ ++ ulen += cright->rc_blockcount; ++ if (ulen >= MAXREFCEXTLEN) ++ return false; ++ ++ return true; ++} ++ + /* + * Try to merge with any extents on the boundaries of the adjustment range. + */ +@@ -860,23 +968,15 @@ xfs_refcount_merge_extents( + (cleft.rc_blockcount == cright.rc_blockcount); + + /* Try to merge left, cleft, and right. cleft must == cright. */ +- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + +- right.rc_blockcount; +- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && +- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && +- left.rc_refcount == cleft.rc_refcount + adjust && +- right.rc_refcount == cleft.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, ++ adjust, &ulen)) { + *shape_changed = true; + return xfs_refcount_merge_center_extents(cur, &left, &cleft, + &right, ulen, aglen); + } + + /* Try to merge left and cleft. */ +- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; +- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && +- left.rc_refcount == cleft.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { + *shape_changed = true; + error = xfs_refcount_merge_left_extent(cur, &left, &cleft, + agbno, aglen); +@@ -892,10 +992,7 @@ xfs_refcount_merge_extents( + } + + /* Try to merge cright and right. */ +- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; +- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && +- right.rc_refcount == cright.rc_refcount + adjust && +- ulen < MAXREFCEXTLEN) { ++ if (xfs_refc_want_merge_right(&cright, &right, adjust)) { + *shape_changed = true; + return xfs_refcount_merge_right_extent(cur, &right, &cright, + aglen); +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch b/xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch new file mode 100644 index 0000000..edd074e --- /dev/null +++ b/xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch @@ -0,0 +1,69 @@ +From 798d43495df2c8a09a73b8e868a71d8f2fd81d5e Mon Sep 17 00:00:00 2001 +From: Andrey Strachuk +Date: Wed, 24 Aug 2022 10:24:01 +0200 +Subject: [PATCH] xfs: removed useless condition in function xfs_attr_node_get + +Source kernel commit: 0f38063d7a38015a47ca1488406bf21e0effe80e + +At line 1561, variable "state" is being compared +with NULL every loop iteration. + +------------------------------------------------------------------- +1561 for (i = 0; state != NULL && i < state->path.active; i++) { +1562 xfs_trans_brelse(args->trans, state->path.blk[i].bp); +1563 state->path.blk[i].bp = NULL; +1564 } +------------------------------------------------------------------- + +However, it cannot be NULL. + +---------------------------------------- +1546 state = xfs_da_state_alloc(args); +---------------------------------------- + +xfs_da_state_alloc calls kmem_cache_zalloc. kmem_cache_zalloc is +called with __GFP_NOFAIL flag and, therefore, it cannot return NULL. + +-------------------------------------------------------------------------- +struct xfs_da_state * +xfs_da_state_alloc( +struct xfs_da_args *args) +{ +struct xfs_da_state *state; + +state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); +state->args = args; +state->mp = args->dp->i_mount; +return state; +} +-------------------------------------------------------------------------- + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Signed-off-by: Andrey Strachuk + +Fixes: 4d0cdd2bb8f0 ("xfs: clean up xfs_attr_node_hasname") +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/xfs_attr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c +index 08973934..b451fcdb 100644 +--- a/libxfs/xfs_attr.c ++++ b/libxfs/xfs_attr.c +@@ -1556,7 +1556,7 @@ xfs_attr_node_get( + * If not in a transaction, we have to release all the buffers. + */ + out_release: +- for (i = 0; state != NULL && i < state->path.active; i++) { ++ for (i = 0; i < state->path.active; i++) { + xfs_trans_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch b/xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch new file mode 100644 index 0000000..427adec --- /dev/null +++ b/xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch @@ -0,0 +1,34 @@ +From 7374f58bfeb38467bab6552a47a5cd6bbe3c2e2e Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Tue, 20 Dec 2022 16:53:34 -0800 +Subject: [PATCH] xfs_db: fix dir3 block magic check + +Fix this broken check, which (amazingly) went unnoticed until I cranked +up the warning level /and/ built the system for s390x. + +Fixes: e96864ff4d4 ("xfs_db: enable blockget for v5 filesystems") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Reviewed-by: Carlos Maiolino +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + db/check.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/db/check.c b/db/check.c +index bb27ce58..964756d0 100644 +--- a/db/check.c ++++ b/db/check.c +@@ -2578,7 +2578,7 @@ process_data_dir_v2( + error++; + } + if ((be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC || +- be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC) && ++ be32_to_cpu(data->magic) == XFS_DIR3_BLOCK_MAGIC) && + stale != be32_to_cpu(btp->stale)) { + if (!sflag || v) + dbprintf(_("dir %lld block %d bad stale tail count %d\n"), +-- +2.40.0 + diff --git a/xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch b/xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch new file mode 100644 index 0000000..f1bee81 --- /dev/null +++ b/xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch @@ -0,0 +1,266 @@ +From 945c7341dedab44ae5daed83377e6366c3fb8fee Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Wed, 23 Nov 2022 09:09:33 -0800 +Subject: [PATCH] xfs_repair: retain superblock buffer to avoid write hook + deadlock + +Every now and then I experience the following deadlock in xfs_repair +when I'm running the offline repair fuzz tests: + +#0 futex_wait (private=0, expected=2, futex_word=0x55555566df70) at ../sysdeps/nptl/futex-internal.h:146 +#1 __GI___lll_lock_wait (futex=futex@entry=0x55555566df70, private=0) at ./nptl/lowlevellock.c:49 +#2 lll_mutex_lock_optimized (mutex=0x55555566df70) at ./nptl/pthread_mutex_lock.c:48 +#3 ___pthread_mutex_lock (mutex=mutex@entry=0x55555566df70) at ./nptl/pthread_mutex_lock.c:93 +#4 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:231 +#5 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e01b0, nodep=nodep@entry=0x7fffe55e0168) at cache.c:452 +#6 __cache_lookup (key=key@entry=0x7fffe55e01b0, flags=0, bpp=bpp@entry=0x7fffe55e0228) at rdwr.c:405 +#7 libxfs_getbuf_flags (btp=0x55555566de00, blkno=0, len=, flags=, bpp=0x7fffe55e0228) at rdwr.c:457 +#8 libxfs_buf_read_map (btp=0x55555566de00, map=map@entry=0x7fffe55e0280, nmaps=nmaps@entry=1, flags=flags@entry=0, bpp=bpp@entry=0x7fffe55e0278, ops=0x5555556233e0 ) + at rdwr.c:704 +#9 libxfs_buf_read (ops=, bpp=0x7fffe55e0278, flags=0, numblks=, blkno=0, target=) + at /storage/home/djwong/cdev/work/xfsprogs/build-x86_64/libxfs/libxfs_io.h:195 +#10 libxfs_getsb (mp=mp@entry=0x7fffffffd690) at rdwr.c:162 +#11 force_needsrepair (mp=0x7fffffffd690) at xfs_repair.c:924 +#12 repair_capture_writeback (bp=) at xfs_repair.c:1000 +#13 libxfs_bwrite (bp=0x7fffe011e530) at rdwr.c:869 +#14 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:240 +#15 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e0470, nodep=nodep@entry=0x7fffe55e0428) at cache.c:452 +#16 __cache_lookup (key=key@entry=0x7fffe55e0470, flags=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:405 +#17 libxfs_getbuf_flags (btp=0x55555566de00, blkno=12736, len=, flags=, bpp=0x7fffe55e0538) at rdwr.c:457 +#18 __libxfs_buf_get_map (btp=, map=map@entry=0x7fffe55e05b0, nmaps=, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:501 +#19 libxfs_buf_get_map (btp=, map=map@entry=0x7fffe55e05b0, nmaps=, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:525 +#20 pf_queue_io (args=args@entry=0x5555556722c0, map=map@entry=0x7fffe55e05b0, nmaps=, flag=flag@entry=11) at prefetch.c:124 +#21 pf_read_bmbt_reclist (args=0x5555556722c0, rp=, numrecs=78) at prefetch.c:220 +#22 pf_scan_lbtree (dbno=dbno@entry=1211, level=level@entry=1, isadir=isadir@entry=1, args=args@entry=0x5555556722c0, func=0x55555557f240 ) at prefetch.c:298 +#23 pf_read_btinode (isadir=1, dino=, args=0x5555556722c0) at prefetch.c:385 +#24 pf_read_inode_dirs (args=args@entry=0x5555556722c0, bp=bp@entry=0x7fffdc023790) at prefetch.c:459 +#25 pf_read_inode_dirs (bp=, args=0x5555556722c0) at prefetch.c:411 +#26 pf_batch_read (args=args@entry=0x5555556722c0, which=which@entry=PF_PRIMARY, buf=buf@entry=0x7fffd001d000) at prefetch.c:609 +#27 pf_io_worker (param=0x5555556722c0) at prefetch.c:673 +#28 start_thread (arg=) at ./nptl/pthread_create.c:442 +#29 clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 + +>From this stack trace, we see that xfs_repair's prefetch module is +getting some xfs_buf objects ahead of initiating a read (#19). The +buffer cache has hit its limit, so it calls cache_shake (#14) to free +some unused xfs_bufs. The buffer it finds is a dirty buffer, so it +calls libxfs_bwrite to flush it out to disk, which in turn invokes the +buffer write hook that xfs_repair set up in 3b7667cb to mark the ondisk +filesystem's superblock as NEEDSREPAIR until repair actually completes. + +Unfortunately, the NEEDSREPAIR handler itself needs to grab the +superblock buffer, so it makes another call into the buffer cache (#9), +which sees that the cache is full and tries to shake it(#4). Hence we +deadlock on cm_mutex because shaking is not reentrant. + +Fix this by retaining a reference to the superblock buffer when possible +so that the writeback hook doesn't have to access the buffer cache to +set NEEDSREPAIR. + +Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem") +Signed-off-by: Darrick J. Wong +Reviewed-by: Carlos Maiolino +Signed-off-by: Carlos Maiolino +Signed-off-by: Pavel Reichl +--- + libxfs/libxfs_api_defs.h | 2 ++ + libxfs/libxfs_io.h | 1 + + libxfs/rdwr.c | 8 +++++ + repair/phase2.c | 8 +++++ + repair/protos.h | 1 + + repair/xfs_repair.c | 75 +++++++++++++++++++++++++++++++++++----- + 6 files changed, 86 insertions(+), 9 deletions(-) + +diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h +index 2716a731..f8efcce7 100644 +--- a/libxfs/libxfs_api_defs.h ++++ b/libxfs/libxfs_api_defs.h +@@ -53,9 +53,11 @@ + #define xfs_buf_delwri_submit libxfs_buf_delwri_submit + #define xfs_buf_get libxfs_buf_get + #define xfs_buf_get_uncached libxfs_buf_get_uncached ++#define xfs_buf_lock libxfs_buf_lock + #define xfs_buf_read libxfs_buf_read + #define xfs_buf_read_uncached libxfs_buf_read_uncached + #define xfs_buf_relse libxfs_buf_relse ++#define xfs_buf_unlock libxfs_buf_unlock + #define xfs_bunmapi libxfs_bunmapi + #define xfs_bwrite libxfs_bwrite + #define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk +diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h +index 9c0e2704..fae86427 100644 +--- a/libxfs/libxfs_io.h ++++ b/libxfs/libxfs_io.h +@@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp) + } + + void xfs_buf_lock(struct xfs_buf *bp); ++void xfs_buf_unlock(struct xfs_buf *bp); + + int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags, + struct xfs_buf **bpp); +diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c +index 20e0793c..d5aad3ea 100644 +--- a/libxfs/rdwr.c ++++ b/libxfs/rdwr.c +@@ -384,6 +384,14 @@ xfs_buf_lock( + pthread_mutex_lock(&bp->b_lock); + } + ++void ++xfs_buf_unlock( ++ struct xfs_buf *bp) ++{ ++ if (use_xfs_buf_lock) ++ pthread_mutex_unlock(&bp->b_lock); ++} ++ + static int + __cache_lookup( + struct xfs_bufkey *key, +diff --git a/repair/phase2.c b/repair/phase2.c +index 56a39bb4..2ada95ae 100644 +--- a/repair/phase2.c ++++ b/repair/phase2.c +@@ -370,6 +370,14 @@ phase2( + } else + do_log(_("Phase 2 - using internal log\n")); + ++ /* ++ * Now that we've set up the buffer cache the way we want it, try to ++ * grab our own reference to the primary sb so that the hooks will not ++ * have to call out to the buffer cache. ++ */ ++ if (mp->m_buf_writeback_fn) ++ retain_primary_sb(mp); ++ + /* Zero log if applicable */ + do_log(_(" - zero log...\n")); + +diff --git a/repair/protos.h b/repair/protos.h +index 03ebae14..83e471ff 100644 +--- a/repair/protos.h ++++ b/repair/protos.h +@@ -16,6 +16,7 @@ int get_sb(xfs_sb_t *sbp, + xfs_off_t off, + int size, + xfs_agnumber_t agno); ++int retain_primary_sb(struct xfs_mount *mp); + void write_primary_sb(xfs_sb_t *sbp, + int size); + +diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c +index 871b428d..ff29bea9 100644 +--- a/repair/xfs_repair.c ++++ b/repair/xfs_repair.c +@@ -749,6 +749,63 @@ check_fs_vs_host_sectsize( + } + } + ++/* ++ * If we set up a writeback function to set NEEDSREPAIR while the filesystem is ++ * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer ++ * cache while trying to get the primary sb buffer if the first non-sb write to ++ * the filesystem is the result of a cache shake. Retain a reference to the ++ * primary sb buffer to avoid all that. ++ */ ++static struct xfs_buf *primary_sb_bp; /* buffer for superblock */ ++ ++int ++retain_primary_sb( ++ struct xfs_mount *mp) ++{ ++ int error; ++ ++ error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, ++ XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp, ++ &xfs_sb_buf_ops); ++ if (error) ++ return error; ++ ++ libxfs_buf_unlock(primary_sb_bp); ++ return 0; ++} ++ ++static void ++drop_primary_sb(void) ++{ ++ if (!primary_sb_bp) ++ return; ++ ++ libxfs_buf_lock(primary_sb_bp); ++ libxfs_buf_relse(primary_sb_bp); ++ primary_sb_bp = NULL; ++} ++ ++static int ++get_primary_sb( ++ struct xfs_mount *mp, ++ struct xfs_buf **bpp) ++{ ++ int error; ++ ++ *bpp = NULL; ++ ++ if (!primary_sb_bp) { ++ error = retain_primary_sb(mp); ++ if (error) ++ return error; ++ } ++ ++ libxfs_buf_lock(primary_sb_bp); ++ xfs_buf_hold(primary_sb_bp); ++ *bpp = primary_sb_bp; ++ return 0; ++} ++ + /* Clear needsrepair after a successful repair run. */ + void + clear_needsrepair( +@@ -769,15 +826,14 @@ clear_needsrepair( + do_warn( + _("Cannot clear needsrepair due to flush failure, err=%d.\n"), + error); +- return; ++ goto drop; + } + + /* Clear needsrepair from the superblock. */ +- bp = libxfs_getsb(mp); +- if (!bp || bp->b_error) { ++ error = get_primary_sb(mp, &bp); ++ if (error) { + do_warn( +- _("Cannot clear needsrepair from primary super, err=%d.\n"), +- bp ? bp->b_error : ENOMEM); ++ _("Cannot clear needsrepair from primary super, err=%d.\n"), error); + } else { + mp->m_sb.sb_features_incompat &= + ~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; +@@ -786,6 +842,8 @@ clear_needsrepair( + } + if (bp) + libxfs_buf_relse(bp); ++drop: ++ drop_primary_sb(); + } + + static void +@@ -808,11 +866,10 @@ force_needsrepair( + xfs_sb_version_needsrepair(&mp->m_sb)) + return; + +- bp = libxfs_getsb(mp); +- if (!bp || bp->b_error) { ++ error = get_primary_sb(mp, &bp); ++ if (error) { + do_log( +- _("couldn't get superblock to set needsrepair, err=%d\n"), +- bp ? bp->b_error : ENOMEM); ++ _("couldn't get superblock to set needsrepair, err=%d\n"), error); + } else { + /* + * It's possible that we need to set NEEDSREPAIR before we've +-- +2.40.0 + diff --git a/xfsprogs.spec b/xfsprogs.spec index 859f182..c37ff3b 100644 --- a/xfsprogs.spec +++ b/xfsprogs.spec @@ -1,6 +1,6 @@ Summary: Utilities for managing the XFS filesystem Name: xfsprogs -Version: 5.14.2 +Version: 5.19.0 Release: 1%{?dist} License: GPL+ and LGPLv2+ URL: https://xfs.wiki.kernel.org @@ -23,8 +23,17 @@ Obsoletes: xfsprogs-qa-devel <= %{version} Conflicts: xfsdump < 3.0.1 Suggests: xfsprogs-xfs_scrub -Patch0: xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch +Patch0: xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch Patch1: xfsprogs-5.12.0-example-conf.patch +Patch2: xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch +Patch3: xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch +Patch4: xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch +Patch5: xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch +Patch7: xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch +Patch8: xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch +Patch9: xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch +Patch10: xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch +Patch11: xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch %description A set of commands to use the XFS filesystem, including mkfs.xfs. @@ -136,6 +145,15 @@ install -m 0644 %{SOURCE3} %{buildroot}%{mkfsdir} %{_libdir}/*.so %changelog +* Tue Jan 10 2023 Pavel Reichl - 5.19.0-1 +- New upstream release +- Tolerate tiny (<300MB) filesystems +- Rename xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch to + xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch + and amend it to reflect upstream changes +- Backport all "Fixing" patches relevant to 5.19 + Related: rhbz#2142910 + * Fri Jan 21 2022 Pavel Reichl - 5.14.2-1 - New upstream release Related: rhbz#2041525