803 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			803 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-or-later
 | |
| /*
 | |
|  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
 | |
|  * Author: Darrick J. Wong <djwong@kernel.org>
 | |
|  */
 | |
| #include "xfs.h"
 | |
| #include "xfs_fs.h"
 | |
| #include "xfs_shared.h"
 | |
| #include "xfs_format.h"
 | |
| #include "xfs_trans_resv.h"
 | |
| #include "xfs_mount.h"
 | |
| #include "xfs_inode.h"
 | |
| #include "xfs_btree.h"
 | |
| #include "scrub/scrub.h"
 | |
| #include "scrub/common.h"
 | |
| #include "scrub/btree.h"
 | |
| #include "scrub/trace.h"
 | |
| 
 | |
| /* btree scrubbing */
 | |
| 
 | |
| /*
 | |
|  * Check for btree operation errors.  See the section about handling
 | |
|  * operational errors in common.c.
 | |
|  */
 | |
| static bool
 | |
| __xchk_btree_process_error(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level,
 | |
| 	int			*error,
 | |
| 	__u32			errflag,
 | |
| 	void			*ret_ip)
 | |
| {
 | |
| 	if (*error == 0)
 | |
| 		return true;
 | |
| 
 | |
| 	switch (*error) {
 | |
| 	case -EDEADLOCK:
 | |
| 	case -ECHRNG:
 | |
| 		/* Used to restart an op with deadlock avoidance. */
 | |
| 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
 | |
| 		break;
 | |
| 	case -EFSBADCRC:
 | |
| 	case -EFSCORRUPTED:
 | |
| 		/* Note the badness but don't abort. */
 | |
| 		sc->sm->sm_flags |= errflag;
 | |
| 		*error = 0;
 | |
| 		fallthrough;
 | |
| 	default:
 | |
| 		if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
 | |
| 			trace_xchk_ifork_btree_op_error(sc, cur, level,
 | |
| 					*error, ret_ip);
 | |
| 		else
 | |
| 			trace_xchk_btree_op_error(sc, cur, level,
 | |
| 					*error, ret_ip);
 | |
| 		break;
 | |
| 	}
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| bool
 | |
| xchk_btree_process_error(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level,
 | |
| 	int			*error)
 | |
| {
 | |
| 	return __xchk_btree_process_error(sc, cur, level, error,
 | |
| 			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
 | |
| }
 | |
| 
 | |
| bool
 | |
| xchk_btree_xref_process_error(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level,
 | |
| 	int			*error)
 | |
| {
 | |
| 	return __xchk_btree_process_error(sc, cur, level, error,
 | |
| 			XFS_SCRUB_OFLAG_XFAIL, __return_address);
 | |
| }
 | |
| 
 | |
| /* Record btree block corruption. */
 | |
| static void
 | |
| __xchk_btree_set_corrupt(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level,
 | |
| 	__u32			errflag,
 | |
| 	void			*ret_ip)
 | |
| {
 | |
| 	sc->sm->sm_flags |= errflag;
 | |
| 
 | |
| 	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
 | |
| 		trace_xchk_ifork_btree_error(sc, cur, level,
 | |
| 				ret_ip);
 | |
| 	else
 | |
| 		trace_xchk_btree_error(sc, cur, level,
 | |
| 				ret_ip);
 | |
| }
 | |
| 
 | |
| void
 | |
| xchk_btree_set_corrupt(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level)
 | |
| {
 | |
| 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
 | |
| 			__return_address);
 | |
| }
 | |
| 
 | |
| void
 | |
| xchk_btree_xref_set_corrupt(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level)
 | |
| {
 | |
| 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
 | |
| 			__return_address);
 | |
| }
 | |
| 
 | |
| void
 | |
| xchk_btree_set_preen(
 | |
| 	struct xfs_scrub	*sc,
 | |
| 	struct xfs_btree_cur	*cur,
 | |
| 	int			level)
 | |
| {
 | |
| 	__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
 | |
| 			__return_address);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Make sure this record is in order and doesn't stray outside of the parent
 | |
|  * keys.
 | |
|  */
 | |
| STATIC void
 | |
| xchk_btree_rec(
 | |
| 	struct xchk_btree	*bs)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	union xfs_btree_rec	*rec;
 | |
| 	union xfs_btree_key	key;
 | |
| 	union xfs_btree_key	hkey;
 | |
| 	union xfs_btree_key	*keyp;
 | |
| 	struct xfs_btree_block	*block;
 | |
| 	struct xfs_btree_block	*keyblock;
 | |
| 	struct xfs_buf		*bp;
 | |
| 
 | |
| 	block = xfs_btree_get_block(cur, 0, &bp);
 | |
| 	rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
 | |
| 
 | |
| 	trace_xchk_btree_rec(bs->sc, cur, 0);
 | |
| 
 | |
| 	/* Are all records across all record blocks in order? */
 | |
| 	if (bs->lastrec_valid &&
 | |
| 	    !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, 0);
 | |
| 	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
 | |
| 	bs->lastrec_valid = true;
 | |
| 
 | |
| 	if (cur->bc_nlevels == 1)
 | |
| 		return;
 | |
| 
 | |
| 	/* Is low_key(rec) at least as large as the parent low key? */
 | |
| 	cur->bc_ops->init_key_from_rec(&key, rec);
 | |
| 	keyblock = xfs_btree_get_block(cur, 1, &bp);
 | |
| 	keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
 | |
| 	if (xfs_btree_keycmp_lt(cur, &key, keyp))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, 1);
 | |
| 
 | |
| 	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
 | |
| 		return;
 | |
| 
 | |
| 	/* Is high_key(rec) no larger than the parent high key? */
 | |
| 	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
 | |
| 	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
 | |
| 	if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, 1);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Make sure this key is in order and doesn't stray outside of the parent
 | |
|  * keys.
 | |
|  */
 | |
| STATIC void
 | |
| xchk_btree_key(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	union xfs_btree_key	*key;
 | |
| 	union xfs_btree_key	*keyp;
 | |
| 	struct xfs_btree_block	*block;
 | |
| 	struct xfs_btree_block	*keyblock;
 | |
| 	struct xfs_buf		*bp;
 | |
| 
 | |
| 	block = xfs_btree_get_block(cur, level, &bp);
 | |
| 	key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
 | |
| 
 | |
| 	trace_xchk_btree_key(bs->sc, cur, level);
 | |
| 
 | |
| 	/* Are all low keys across all node blocks in order? */
 | |
| 	if (bs->lastkey[level - 1].valid &&
 | |
| 	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| 	memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
 | |
| 	bs->lastkey[level - 1].valid = true;
 | |
| 
 | |
| 	if (level + 1 >= cur->bc_nlevels)
 | |
| 		return;
 | |
| 
 | |
| 	/* Is this block's low key at least as large as the parent low key? */
 | |
| 	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
 | |
| 	keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
 | |
| 	if (xfs_btree_keycmp_lt(cur, key, keyp))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| 
 | |
| 	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
 | |
| 		return;
 | |
| 
 | |
| 	/* Is this block's high key no larger than the parent high key? */
 | |
| 	key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
 | |
| 	keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
 | |
| 			keyblock);
 | |
| 	if (xfs_btree_keycmp_lt(cur, keyp, key))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Check a btree pointer.  Returns true if it's ok to use this pointer.
 | |
|  * Callers do not need to set the corrupt flag.
 | |
|  */
 | |
| static bool
 | |
| xchk_btree_ptr_ok(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	union xfs_btree_ptr	*ptr)
 | |
| {
 | |
| 	/* A btree rooted in an inode has no block pointer to the root. */
 | |
| 	if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
 | |
| 	    level == bs->cur->bc_nlevels)
 | |
| 		return true;
 | |
| 
 | |
| 	/* Otherwise, check the pointers. */
 | |
| 	if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
 | |
| 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /* Check that a btree block's sibling matches what we expect it. */
 | |
| STATIC int
 | |
| xchk_btree_block_check_sibling(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	int			direction,
 | |
| 	union xfs_btree_ptr	*sibling)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	struct xfs_btree_block	*pblock;
 | |
| 	struct xfs_buf		*pbp;
 | |
| 	struct xfs_btree_cur	*ncur = NULL;
 | |
| 	union xfs_btree_ptr	*pp;
 | |
| 	int			success;
 | |
| 	int			error;
 | |
| 
 | |
| 	error = xfs_btree_dup_cursor(cur, &ncur);
 | |
| 	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
 | |
| 	    !ncur)
 | |
| 		return error;
 | |
| 
 | |
| 	/*
 | |
| 	 * If the pointer is null, we shouldn't be able to move the upper
 | |
| 	 * level pointer anywhere.
 | |
| 	 */
 | |
| 	if (xfs_btree_ptr_is_null(cur, sibling)) {
 | |
| 		if (direction > 0)
 | |
| 			error = xfs_btree_increment(ncur, level + 1, &success);
 | |
| 		else
 | |
| 			error = xfs_btree_decrement(ncur, level + 1, &success);
 | |
| 		if (error == 0 && success)
 | |
| 			xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| 		error = 0;
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	/* Increment upper level pointer. */
 | |
| 	if (direction > 0)
 | |
| 		error = xfs_btree_increment(ncur, level + 1, &success);
 | |
| 	else
 | |
| 		error = xfs_btree_decrement(ncur, level + 1, &success);
 | |
| 	if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
 | |
| 		goto out;
 | |
| 	if (!success) {
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level + 1);
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	/* Compare upper level pointer to sibling pointer. */
 | |
| 	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
 | |
| 	pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
 | |
| 	if (!xchk_btree_ptr_ok(bs, level + 1, pp))
 | |
| 		goto out;
 | |
| 	if (pbp)
 | |
| 		xchk_buffer_recheck(bs->sc, pbp);
 | |
| 
 | |
| 	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| out:
 | |
| 	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
 | |
| 	return error;
 | |
| }
 | |
| 
 | |
| /* Check the siblings of a btree block. */
 | |
| STATIC int
 | |
| xchk_btree_block_check_siblings(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	struct xfs_btree_block	*block)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	union xfs_btree_ptr	leftsib;
 | |
| 	union xfs_btree_ptr	rightsib;
 | |
| 	int			level;
 | |
| 	int			error = 0;
 | |
| 
 | |
| 	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
 | |
| 	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
 | |
| 	level = xfs_btree_get_level(block);
 | |
| 
 | |
| 	/* Root block should never have siblings. */
 | |
| 	if (level == cur->bc_nlevels - 1) {
 | |
| 		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
 | |
| 		    !xfs_btree_ptr_is_null(cur, &rightsib))
 | |
| 			xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| 		goto out;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Does the left & right sibling pointers match the adjacent
 | |
| 	 * parent level pointers?
 | |
| 	 * (These function absorbs error codes for us.)
 | |
| 	 */
 | |
| 	error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| 	error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| out:
 | |
| 	return error;
 | |
| }
 | |
| 
 | |
| struct check_owner {
 | |
| 	struct list_head	list;
 | |
| 	xfs_daddr_t		daddr;
 | |
| 	int			level;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Make sure this btree block isn't in the free list and that there's
 | |
|  * an rmap record for it.
 | |
|  */
 | |
| STATIC int
 | |
| xchk_btree_check_block_owner(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	xfs_daddr_t		daddr)
 | |
| {
 | |
| 	xfs_agnumber_t		agno;
 | |
| 	xfs_agblock_t		agbno;
 | |
| 	bool			init_sa;
 | |
| 	int			error = 0;
 | |
| 
 | |
| 	if (!bs->cur)
 | |
| 		return 0;
 | |
| 
 | |
| 	agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
 | |
| 	agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
 | |
| 
 | |
| 	/*
 | |
| 	 * If the btree being examined is not itself a per-AG btree, initialize
 | |
| 	 * sc->sa so that we can check for the presence of an ownership record
 | |
| 	 * in the rmap btree for the AG containing the block.
 | |
| 	 */
 | |
| 	init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG;
 | |
| 	if (init_sa) {
 | |
| 		error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
 | |
| 		if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
 | |
| 				level, &error))
 | |
| 			goto out_free;
 | |
| 	}
 | |
| 
 | |
| 	xchk_xref_is_used_space(bs->sc, agbno, 1);
 | |
| 	/*
 | |
| 	 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
 | |
| 	 * have to nullify it (to shut down further block owner checks) if
 | |
| 	 * self-xref encounters problems.
 | |
| 	 */
 | |
| 	if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
 | |
| 		bs->cur = NULL;
 | |
| 
 | |
| 	xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
 | |
| 	if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops))
 | |
| 		bs->cur = NULL;
 | |
| 
 | |
| out_free:
 | |
| 	if (init_sa)
 | |
| 		xchk_ag_free(bs->sc, &bs->sc->sa);
 | |
| 
 | |
| 	return error;
 | |
| }
 | |
| 
 | |
| /* Check the owner of a btree block. */
 | |
| STATIC int
 | |
| xchk_btree_check_owner(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	struct xfs_buf		*bp)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 
 | |
| 	/*
 | |
| 	 * In theory, xfs_btree_get_block should only give us a null buffer
 | |
| 	 * pointer for the root of a root-in-inode btree type, but we need
 | |
| 	 * to check defensively here in case the cursor state is also screwed
 | |
| 	 * up.
 | |
| 	 */
 | |
| 	if (bp == NULL) {
 | |
| 		if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
 | |
| 			xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * We want to cross-reference each btree block with the bnobt
 | |
| 	 * and the rmapbt.  We cannot cross-reference the bnobt or
 | |
| 	 * rmapbt while scanning the bnobt or rmapbt, respectively,
 | |
| 	 * because we cannot alter the cursor and we'd prefer not to
 | |
| 	 * duplicate cursors.  Therefore, save the buffer daddr for
 | |
| 	 * later scanning.
 | |
| 	 */
 | |
| 	if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) {
 | |
| 		struct check_owner	*co;
 | |
| 
 | |
| 		co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
 | |
| 		if (!co)
 | |
| 			return -ENOMEM;
 | |
| 
 | |
| 		INIT_LIST_HEAD(&co->list);
 | |
| 		co->level = level;
 | |
| 		co->daddr = xfs_buf_daddr(bp);
 | |
| 		list_add_tail(&co->list, &bs->to_check);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
 | |
| }
 | |
| 
 | |
| /* Decide if we want to check minrecs of a btree block in the inode root. */
 | |
| static inline bool
 | |
| xchk_btree_check_iroot_minrecs(
 | |
| 	struct xchk_btree	*bs)
 | |
| {
 | |
| 	/*
 | |
| 	 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
 | |
| 	 * would miscalculate the space required for the data fork bmbt root
 | |
| 	 * when adding an attr fork, and promote the iroot contents to an
 | |
| 	 * external block unnecessarily.  This went unnoticed for many years
 | |
| 	 * until scrub found filesystems in this state.  Inode rooted btrees are
 | |
| 	 * not supposed to have immediate child blocks that are small enough
 | |
| 	 * that the contents could fit in the inode root, but we can't fail
 | |
| 	 * existing filesystems, so instead we disable the check for data fork
 | |
| 	 * bmap btrees when there's an attr fork.
 | |
| 	 */
 | |
| 	if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
 | |
| 	    bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
 | |
| 	    xfs_inode_has_attr_fork(bs->sc->ip))
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Check that this btree block has at least minrecs records or is one of the
 | |
|  * special blocks that don't require that.
 | |
|  */
 | |
| STATIC void
 | |
| xchk_btree_check_minrecs(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	struct xfs_btree_block	*block)
 | |
| {
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	unsigned int		root_level = cur->bc_nlevels - 1;
 | |
| 	unsigned int		numrecs = be16_to_cpu(block->bb_numrecs);
 | |
| 
 | |
| 	/* More records than minrecs means the block is ok. */
 | |
| 	if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
 | |
| 		return;
 | |
| 
 | |
| 	/*
 | |
| 	 * For btrees rooted in the inode, it's possible that the root block
 | |
| 	 * contents spilled into a regular ondisk block because there wasn't
 | |
| 	 * enough space in the inode root.  The number of records in that
 | |
| 	 * child block might be less than the standard minrecs, but that's ok
 | |
| 	 * provided that there's only one direct child of the root.
 | |
| 	 */
 | |
| 	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
 | |
| 	    level == cur->bc_nlevels - 2) {
 | |
| 		struct xfs_btree_block	*root_block;
 | |
| 		struct xfs_buf		*root_bp;
 | |
| 		int			root_maxrecs;
 | |
| 
 | |
| 		root_block = xfs_btree_get_block(cur, root_level, &root_bp);
 | |
| 		root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
 | |
| 		if (xchk_btree_check_iroot_minrecs(bs) &&
 | |
| 		    (be16_to_cpu(root_block->bb_numrecs) != 1 ||
 | |
| 		     numrecs <= root_maxrecs))
 | |
| 			xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Otherwise, only the root level is allowed to have fewer than minrecs
 | |
| 	 * records or keyptrs.
 | |
| 	 */
 | |
| 	if (level < root_level)
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, level);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * If this btree block has a parent, make sure that the parent's keys capture
 | |
|  * the keyspace contained in this block.
 | |
|  */
 | |
| STATIC void
 | |
| xchk_btree_block_check_keys(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	struct xfs_btree_block	*block)
 | |
| {
 | |
| 	union xfs_btree_key	block_key;
 | |
| 	union xfs_btree_key	*block_high_key;
 | |
| 	union xfs_btree_key	*parent_low_key, *parent_high_key;
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	struct xfs_btree_block	*parent_block;
 | |
| 	struct xfs_buf		*bp;
 | |
| 
 | |
| 	if (level == cur->bc_nlevels - 1)
 | |
| 		return;
 | |
| 
 | |
| 	xfs_btree_get_keys(cur, block, &block_key);
 | |
| 
 | |
| 	/* Make sure the low key of this block matches the parent. */
 | |
| 	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
 | |
| 	parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
 | |
| 			parent_block);
 | |
| 	if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
 | |
| 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
 | |
| 		return;
 | |
| 
 | |
| 	/* Make sure the high key of this block matches the parent. */
 | |
| 	parent_high_key = xfs_btree_high_key_addr(cur,
 | |
| 			cur->bc_levels[level + 1].ptr, parent_block);
 | |
| 	block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
 | |
| 	if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
 | |
| 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Grab and scrub a btree block given a btree pointer.  Returns block
 | |
|  * and buffer pointers (if applicable) if they're ok to use.
 | |
|  */
 | |
| STATIC int
 | |
| xchk_btree_get_block(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	union xfs_btree_ptr	*pp,
 | |
| 	struct xfs_btree_block	**pblock,
 | |
| 	struct xfs_buf		**pbp)
 | |
| {
 | |
| 	int			error;
 | |
| 
 | |
| 	*pblock = NULL;
 | |
| 	*pbp = NULL;
 | |
| 
 | |
| 	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
 | |
| 	if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
 | |
| 	    !*pblock)
 | |
| 		return error;
 | |
| 
 | |
| 	xfs_btree_get_block(bs->cur, level, pbp);
 | |
| 	if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
 | |
| 		xchk_btree_set_corrupt(bs->sc, bs->cur, level);
 | |
| 		return 0;
 | |
| 	}
 | |
| 	if (*pbp)
 | |
| 		xchk_buffer_recheck(bs->sc, *pbp);
 | |
| 
 | |
| 	xchk_btree_check_minrecs(bs, level, *pblock);
 | |
| 
 | |
| 	/*
 | |
| 	 * Check the block's owner; this function absorbs error codes
 | |
| 	 * for us.
 | |
| 	 */
 | |
| 	error = xchk_btree_check_owner(bs, level, *pbp);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| 
 | |
| 	/*
 | |
| 	 * Check the block's siblings; this function absorbs error codes
 | |
| 	 * for us.
 | |
| 	 */
 | |
| 	error = xchk_btree_block_check_siblings(bs, *pblock);
 | |
| 	if (error)
 | |
| 		return error;
 | |
| 
 | |
| 	xchk_btree_block_check_keys(bs, level, *pblock);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Check that the low and high keys of this block match the keys stored
 | |
|  * in the parent block.
 | |
|  */
 | |
| STATIC void
 | |
| xchk_btree_block_keys(
 | |
| 	struct xchk_btree	*bs,
 | |
| 	int			level,
 | |
| 	struct xfs_btree_block	*block)
 | |
| {
 | |
| 	union xfs_btree_key	block_keys;
 | |
| 	struct xfs_btree_cur	*cur = bs->cur;
 | |
| 	union xfs_btree_key	*high_bk;
 | |
| 	union xfs_btree_key	*parent_keys;
 | |
| 	union xfs_btree_key	*high_pk;
 | |
| 	struct xfs_btree_block	*parent_block;
 | |
| 	struct xfs_buf		*bp;
 | |
| 
 | |
| 	if (level >= cur->bc_nlevels - 1)
 | |
| 		return;
 | |
| 
 | |
| 	/* Calculate the keys for this block. */
 | |
| 	xfs_btree_get_keys(cur, block, &block_keys);
 | |
| 
 | |
| 	/* Obtain the parent's copy of the keys for this block. */
 | |
| 	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
 | |
| 	parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
 | |
| 			parent_block);
 | |
| 
 | |
| 	if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, 1);
 | |
| 
 | |
| 	if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
 | |
| 		return;
 | |
| 
 | |
| 	/* Get high keys */
 | |
| 	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
 | |
| 	high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
 | |
| 			parent_block);
 | |
| 
 | |
| 	if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
 | |
| 		xchk_btree_set_corrupt(bs->sc, cur, 1);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Visit all nodes and leaves of a btree.  Check that all pointers and
 | |
|  * records are in order, that the keys reflect the records, and use a callback
 | |
|  * so that the caller can verify individual records.
 | |
|  */
 | |
| int
 | |
| xchk_btree(
 | |
| 	struct xfs_scrub		*sc,
 | |
| 	struct xfs_btree_cur		*cur,
 | |
| 	xchk_btree_rec_fn		scrub_fn,
 | |
| 	const struct xfs_owner_info	*oinfo,
 | |
| 	void				*private)
 | |
| {
 | |
| 	union xfs_btree_ptr		ptr;
 | |
| 	struct xchk_btree		*bs;
 | |
| 	union xfs_btree_ptr		*pp;
 | |
| 	union xfs_btree_rec		*recp;
 | |
| 	struct xfs_btree_block		*block;
 | |
| 	struct xfs_buf			*bp;
 | |
| 	struct check_owner		*co;
 | |
| 	struct check_owner		*n;
 | |
| 	size_t				cur_sz;
 | |
| 	int				level;
 | |
| 	int				error = 0;
 | |
| 
 | |
| 	/*
 | |
| 	 * Allocate the btree scrub context from the heap, because this
 | |
| 	 * structure can get rather large.  Don't let a caller feed us a
 | |
| 	 * totally absurd size.
 | |
| 	 */
 | |
| 	cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
 | |
| 	if (cur_sz > PAGE_SIZE) {
 | |
| 		xchk_btree_set_corrupt(sc, cur, 0);
 | |
| 		return 0;
 | |
| 	}
 | |
| 	bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
 | |
| 	if (!bs)
 | |
| 		return -ENOMEM;
 | |
| 	bs->cur = cur;
 | |
| 	bs->scrub_rec = scrub_fn;
 | |
| 	bs->oinfo = oinfo;
 | |
| 	bs->private = private;
 | |
| 	bs->sc = sc;
 | |
| 
 | |
| 	/* Initialize scrub state */
 | |
| 	INIT_LIST_HEAD(&bs->to_check);
 | |
| 
 | |
| 	/*
 | |
| 	 * Load the root of the btree.  The helper function absorbs
 | |
| 	 * error codes for us.
 | |
| 	 */
 | |
| 	level = cur->bc_nlevels - 1;
 | |
| 	xfs_btree_init_ptr_from_cur(cur, &ptr);
 | |
| 	if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
 | |
| 		goto out;
 | |
| 	error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
 | |
| 	if (error || !block)
 | |
| 		goto out;
 | |
| 
 | |
| 	cur->bc_levels[level].ptr = 1;
 | |
| 
 | |
| 	while (level < cur->bc_nlevels) {
 | |
| 		block = xfs_btree_get_block(cur, level, &bp);
 | |
| 
 | |
| 		if (level == 0) {
 | |
| 			/* End of leaf, pop back towards the root. */
 | |
| 			if (cur->bc_levels[level].ptr >
 | |
| 			    be16_to_cpu(block->bb_numrecs)) {
 | |
| 				xchk_btree_block_keys(bs, level, block);
 | |
| 				if (level < cur->bc_nlevels - 1)
 | |
| 					cur->bc_levels[level + 1].ptr++;
 | |
| 				level++;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			/* Records in order for scrub? */
 | |
| 			xchk_btree_rec(bs);
 | |
| 
 | |
| 			/* Call out to the record checker. */
 | |
| 			recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
 | |
| 					block);
 | |
| 			error = bs->scrub_rec(bs, recp);
 | |
| 			if (error)
 | |
| 				break;
 | |
| 			if (xchk_should_terminate(sc, &error) ||
 | |
| 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 | |
| 				break;
 | |
| 
 | |
| 			cur->bc_levels[level].ptr++;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* End of node, pop back towards the root. */
 | |
| 		if (cur->bc_levels[level].ptr >
 | |
| 					be16_to_cpu(block->bb_numrecs)) {
 | |
| 			xchk_btree_block_keys(bs, level, block);
 | |
| 			if (level < cur->bc_nlevels - 1)
 | |
| 				cur->bc_levels[level + 1].ptr++;
 | |
| 			level++;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* Keys in order for scrub? */
 | |
| 		xchk_btree_key(bs, level);
 | |
| 
 | |
| 		/* Drill another level deeper. */
 | |
| 		pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
 | |
| 		if (!xchk_btree_ptr_ok(bs, level, pp)) {
 | |
| 			cur->bc_levels[level].ptr++;
 | |
| 			continue;
 | |
| 		}
 | |
| 		level--;
 | |
| 		error = xchk_btree_get_block(bs, level, pp, &block, &bp);
 | |
| 		if (error || !block)
 | |
| 			goto out;
 | |
| 
 | |
| 		cur->bc_levels[level].ptr = 1;
 | |
| 	}
 | |
| 
 | |
| out:
 | |
| 	/* Process deferred owner checks on btree blocks. */
 | |
| 	list_for_each_entry_safe(co, n, &bs->to_check, list) {
 | |
| 		if (!error && bs->cur)
 | |
| 			error = xchk_btree_check_block_owner(bs, co->level,
 | |
| 					co->daddr);
 | |
| 		list_del(&co->list);
 | |
| 		kfree(co);
 | |
| 	}
 | |
| 	kfree(bs);
 | |
| 
 | |
| 	return error;
 | |
| }
 |