1902 lines
		
	
	
		
			48 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1902 lines
		
	
	
		
			48 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
						|
/*
 | 
						|
 * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
 | 
						|
 * Author: Darrick J. Wong <djwong@kernel.org>
 | 
						|
 */
 | 
						|
#include "xfs.h"
 | 
						|
#include "xfs_fs.h"
 | 
						|
#include "xfs_shared.h"
 | 
						|
#include "xfs_format.h"
 | 
						|
#include "xfs_trans_resv.h"
 | 
						|
#include "xfs_mount.h"
 | 
						|
#include "xfs_defer.h"
 | 
						|
#include "xfs_btree.h"
 | 
						|
#include "xfs_bit.h"
 | 
						|
#include "xfs_log_format.h"
 | 
						|
#include "xfs_trans.h"
 | 
						|
#include "xfs_sb.h"
 | 
						|
#include "xfs_inode.h"
 | 
						|
#include "xfs_icache.h"
 | 
						|
#include "xfs_inode_buf.h"
 | 
						|
#include "xfs_inode_fork.h"
 | 
						|
#include "xfs_ialloc.h"
 | 
						|
#include "xfs_da_format.h"
 | 
						|
#include "xfs_reflink.h"
 | 
						|
#include "xfs_alloc.h"
 | 
						|
#include "xfs_rmap.h"
 | 
						|
#include "xfs_rmap_btree.h"
 | 
						|
#include "xfs_bmap.h"
 | 
						|
#include "xfs_bmap_btree.h"
 | 
						|
#include "xfs_bmap_util.h"
 | 
						|
#include "xfs_dir2.h"
 | 
						|
#include "xfs_dir2_priv.h"
 | 
						|
#include "xfs_quota_defs.h"
 | 
						|
#include "xfs_quota.h"
 | 
						|
#include "xfs_ag.h"
 | 
						|
#include "xfs_rtbitmap.h"
 | 
						|
#include "xfs_attr_leaf.h"
 | 
						|
#include "xfs_log_priv.h"
 | 
						|
#include "xfs_health.h"
 | 
						|
#include "xfs_symlink_remote.h"
 | 
						|
#include "scrub/xfs_scrub.h"
 | 
						|
#include "scrub/scrub.h"
 | 
						|
#include "scrub/common.h"
 | 
						|
#include "scrub/btree.h"
 | 
						|
#include "scrub/trace.h"
 | 
						|
#include "scrub/repair.h"
 | 
						|
#include "scrub/iscan.h"
 | 
						|
#include "scrub/readdir.h"
 | 
						|
#include "scrub/tempfile.h"
 | 
						|
 | 
						|
/*
 | 
						|
 * Inode Record Repair
 | 
						|
 * ===================
 | 
						|
 *
 | 
						|
 * Roughly speaking, inode problems can be classified based on whether or not
 | 
						|
 * they trip the dinode verifiers.  If those trip, then we won't be able to
 | 
						|
 * xfs_iget ourselves the inode.
 | 
						|
 *
 | 
						|
 * Therefore, the xrep_dinode_* functions fix anything that will cause the
 | 
						|
 * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
 | 
						|
 * fix things on live incore inodes.  The inode repair functions make decisions
 | 
						|
 * with security and usability implications when reviving a file:
 | 
						|
 *
 | 
						|
 * - Files with zero di_mode or a garbage di_mode are converted to regular file
 | 
						|
 *   that only root can read.  This file may not actually contain user data,
 | 
						|
 *   if the file was not previously a regular file.  Setuid and setgid bits
 | 
						|
 *   are cleared.
 | 
						|
 *
 | 
						|
 * - Zero-size directories can be truncated to look empty.  It is necessary to
 | 
						|
 *   run the bmapbtd and directory repair functions to fully rebuild the
 | 
						|
 *   directory.
 | 
						|
 *
 | 
						|
 * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
 | 
						|
 *   to run the bmapbtd and symlink repair functions to salvage the symlink.
 | 
						|
 *
 | 
						|
 * - Invalid extent size hints will be removed.
 | 
						|
 *
 | 
						|
 * - Quotacheck will be scheduled if we repaired an inode that was so badly
 | 
						|
 *   damaged that the ondisk inode had to be rebuilt.
 | 
						|
 *
 | 
						|
 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
 | 
						|
 *   Setuid and setgid bits are cleared.
 | 
						|
 *
 | 
						|
 * - Data and attr forks are reset to extents format with zero extents if the
 | 
						|
 *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
 | 
						|
 *   repair functions to recover the space mapping.
 | 
						|
 *
 | 
						|
 * - ACLs will not be recovered if the attr fork is zapped or the extended
 | 
						|
 *   attribute structure itself requires salvaging.
 | 
						|
 *
 | 
						|
 * - If the attr fork is zapped, the user and group ids are reset to root and
 | 
						|
 *   the setuid and setgid bits are removed.
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * All the information we need to repair the ondisk inode if we can't iget the
 | 
						|
 * incore inode.  We don't allocate this buffer unless we're going to perform
 | 
						|
 * a repair to the ondisk inode cluster buffer.
 | 
						|
 */
 | 
						|
struct xrep_inode {
 | 
						|
	/* Inode mapping that we saved from the initial lookup attempt. */
 | 
						|
	struct xfs_imap		imap;
 | 
						|
 | 
						|
	struct xfs_scrub	*sc;
 | 
						|
 | 
						|
	/* Blocks in use on the data device by data extents or bmbt blocks. */
 | 
						|
	xfs_rfsblock_t		data_blocks;
 | 
						|
 | 
						|
	/* Blocks in use on the rt device. */
 | 
						|
	xfs_rfsblock_t		rt_blocks;
 | 
						|
 | 
						|
	/* Blocks in use by the attr fork. */
 | 
						|
	xfs_rfsblock_t		attr_blocks;
 | 
						|
 | 
						|
	/* Number of data device extents for the data fork. */
 | 
						|
	xfs_extnum_t		data_extents;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Number of realtime device extents for the data fork.  If
 | 
						|
	 * data_extents and rt_extents indicate that the data fork has extents
 | 
						|
	 * on both devices, we'll just back away slowly.
 | 
						|
	 */
 | 
						|
	xfs_extnum_t		rt_extents;
 | 
						|
 | 
						|
	/* Number of (data device) extents for the attr fork. */
 | 
						|
	xfs_aextnum_t		attr_extents;
 | 
						|
 | 
						|
	/* Sick state to set after zapping parts of the inode. */
 | 
						|
	unsigned int		ino_sick_mask;
 | 
						|
 | 
						|
	/* Must we remove all access from this file? */
 | 
						|
	bool			zap_acls;
 | 
						|
 | 
						|
	/* Inode scanner to see if we can find the ftype from dirents */
 | 
						|
	struct xchk_iscan	ftype_iscan;
 | 
						|
	uint8_t			alleged_ftype;
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 * Setup function for inode repair.  @imap contains the ondisk inode mapping
 | 
						|
 * information so that we can correct the ondisk inode cluster buffer if
 | 
						|
 * necessary to make iget work.
 | 
						|
 */
 | 
						|
int
 | 
						|
xrep_setup_inode(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	const struct xfs_imap	*imap)
 | 
						|
{
 | 
						|
	struct xrep_inode	*ri;
 | 
						|
 | 
						|
	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
 | 
						|
	if (!sc->buf)
 | 
						|
		return -ENOMEM;
 | 
						|
 | 
						|
	ri = sc->buf;
 | 
						|
	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
 | 
						|
	ri->sc = sc;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Make sure this ondisk inode can pass the inode buffer verifier.  This is
 | 
						|
 * not the same as the dinode verifier.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_buf_core(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_buf		*bp,
 | 
						|
	unsigned int		ioffset)
 | 
						|
{
 | 
						|
	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
 | 
						|
	struct xfs_trans	*tp = sc->tp;
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	xfs_agino_t		agino;
 | 
						|
	bool			crc_ok = false;
 | 
						|
	bool			magic_ok = false;
 | 
						|
	bool			unlinked_ok = false;
 | 
						|
 | 
						|
	agino = be32_to_cpu(dip->di_next_unlinked);
 | 
						|
 | 
						|
	if (xfs_verify_agino_or_null(bp->b_pag, agino))
 | 
						|
		unlinked_ok = true;
 | 
						|
 | 
						|
	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 | 
						|
	    xfs_dinode_good_version(mp, dip->di_version))
 | 
						|
		magic_ok = true;
 | 
						|
 | 
						|
	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 | 
						|
			XFS_DINODE_CRC_OFF))
 | 
						|
		crc_ok = true;
 | 
						|
 | 
						|
	if (magic_ok && unlinked_ok && crc_ok)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (!magic_ok) {
 | 
						|
		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 | 
						|
		dip->di_version = 3;
 | 
						|
	}
 | 
						|
	if (!unlinked_ok)
 | 
						|
		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
 | 
						|
	xfs_dinode_calc_crc(mp, dip);
 | 
						|
	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 | 
						|
	xfs_trans_log_buf(tp, bp, ioffset,
 | 
						|
				  ioffset + sizeof(struct xfs_dinode) - 1);
 | 
						|
}
 | 
						|
 | 
						|
/* Make sure this inode cluster buffer can pass the inode buffer verifier. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_buf(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_buf		*bp)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	int			i;
 | 
						|
	int			ni;
 | 
						|
 | 
						|
	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 | 
						|
	for (i = 0; i < ni; i++)
 | 
						|
		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
 | 
						|
}
 | 
						|
 | 
						|
/* Reinitialize things that never change in an inode. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_header(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	trace_xrep_dinode_header(sc, dip);
 | 
						|
 | 
						|
	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 | 
						|
	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
 | 
						|
		dip->di_version = 3;
 | 
						|
	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
 | 
						|
	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
 | 
						|
	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * If this directory entry points to the scrub target inode, then the directory
 | 
						|
 * we're scanning is the parent of the scrub target inode.
 | 
						|
 */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_findmode_dirent(
 | 
						|
	struct xfs_scrub		*sc,
 | 
						|
	struct xfs_inode		*dp,
 | 
						|
	xfs_dir2_dataptr_t		dapos,
 | 
						|
	const struct xfs_name		*name,
 | 
						|
	xfs_ino_t			ino,
 | 
						|
	void				*priv)
 | 
						|
{
 | 
						|
	struct xrep_inode		*ri = priv;
 | 
						|
	int				error = 0;
 | 
						|
 | 
						|
	if (xchk_should_terminate(ri->sc, &error))
 | 
						|
		return error;
 | 
						|
 | 
						|
	if (ino != sc->sm->sm_ino)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/* Ignore garbage directory entry names. */
 | 
						|
	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
 | 
						|
		return -EFSCORRUPTED;
 | 
						|
 | 
						|
	/* Don't pick up dot or dotdot entries; we only want child dirents. */
 | 
						|
	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
 | 
						|
	    xfs_dir2_samename(name, &xfs_name_dot))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Uhoh, more than one parent for this inode and they don't agree on
 | 
						|
	 * the file type?
 | 
						|
	 */
 | 
						|
	if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
 | 
						|
	    ri->alleged_ftype != name->type) {
 | 
						|
		trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
 | 
						|
				ri->alleged_ftype);
 | 
						|
		return -EFSCORRUPTED;
 | 
						|
	}
 | 
						|
 | 
						|
	/* We found a potential parent; remember the ftype. */
 | 
						|
	trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
 | 
						|
	ri->alleged_ftype = name->type;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Try to lock a directory, or wait a jiffy. */
 | 
						|
static inline int
 | 
						|
xrep_dinode_ilock_nowait(
 | 
						|
	struct xfs_inode	*dp,
 | 
						|
	unsigned int		lock_mode)
 | 
						|
{
 | 
						|
	if (xfs_ilock_nowait(dp, lock_mode))
 | 
						|
		return true;
 | 
						|
 | 
						|
	schedule_timeout_killable(1);
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Try to lock a directory to look for ftype hints.  Since we already hold the
 | 
						|
 * AGI buffer, we cannot block waiting for the ILOCK because rename can take
 | 
						|
 * the ILOCK and then try to lock AGIs.
 | 
						|
 */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_trylock_directory(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_inode	*dp,
 | 
						|
	unsigned int		*lock_modep)
 | 
						|
{
 | 
						|
	unsigned long		deadline = jiffies + msecs_to_jiffies(30000);
 | 
						|
	unsigned int		lock_mode;
 | 
						|
	int			error = 0;
 | 
						|
 | 
						|
	do {
 | 
						|
		if (xchk_should_terminate(ri->sc, &error))
 | 
						|
			return error;
 | 
						|
 | 
						|
		if (xfs_need_iread_extents(&dp->i_df))
 | 
						|
			lock_mode = XFS_ILOCK_EXCL;
 | 
						|
		else
 | 
						|
			lock_mode = XFS_ILOCK_SHARED;
 | 
						|
 | 
						|
		if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
 | 
						|
			*lock_modep = lock_mode;
 | 
						|
			return 0;
 | 
						|
		}
 | 
						|
	} while (!time_is_before_jiffies(deadline));
 | 
						|
	return -EBUSY;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * If this is a directory, walk the dirents looking for any that point to the
 | 
						|
 * scrub target inode.
 | 
						|
 */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_findmode_walk_directory(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_inode	*dp)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	unsigned int		lock_mode;
 | 
						|
	int			error = 0;
 | 
						|
 | 
						|
	/* Ignore temporary repair directories. */
 | 
						|
	if (xrep_is_tempfile(dp))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Scan the directory to see if there it contains an entry pointing to
 | 
						|
	 * the directory that we are repairing.
 | 
						|
	 */
 | 
						|
	error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If this directory is known to be sick, we cannot scan it reliably
 | 
						|
	 * and must abort.
 | 
						|
	 */
 | 
						|
	if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
 | 
						|
				       XFS_SICK_INO_BMBTD |
 | 
						|
				       XFS_SICK_INO_DIR)) {
 | 
						|
		error = -EFSCORRUPTED;
 | 
						|
		goto out_unlock;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * We cannot complete our parent pointer scan if a directory looks as
 | 
						|
	 * though it has been zapped by the inode record repair code.
 | 
						|
	 */
 | 
						|
	if (xchk_dir_looks_zapped(dp)) {
 | 
						|
		error = -EBUSY;
 | 
						|
		goto out_unlock;
 | 
						|
	}
 | 
						|
 | 
						|
	error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
 | 
						|
	if (error)
 | 
						|
		goto out_unlock;
 | 
						|
 | 
						|
out_unlock:
 | 
						|
	xfs_iunlock(dp, lock_mode);
 | 
						|
	return error;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Try to find the mode of the inode being repaired by looking for directories
 | 
						|
 * that point down to this file.
 | 
						|
 */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_find_mode(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	uint16_t		*mode)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	struct xfs_inode	*dp;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	/* No ftype means we have no other metadata to consult. */
 | 
						|
	if (!xfs_has_ftype(sc->mp)) {
 | 
						|
		*mode = S_IFREG;
 | 
						|
		return 0;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Scan all directories for parents that might point down to this
 | 
						|
	 * inode.  Skip the inode being repaired during the scan since it
 | 
						|
	 * cannot be its own parent.  Note that we still hold the AGI locked
 | 
						|
	 * so there's a real possibility that _iscan_iter can return EBUSY.
 | 
						|
	 */
 | 
						|
	xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
 | 
						|
	xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
 | 
						|
	ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
 | 
						|
	ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
 | 
						|
	while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
 | 
						|
		if (S_ISDIR(VFS_I(dp)->i_mode))
 | 
						|
			error = xrep_dinode_findmode_walk_directory(ri, dp);
 | 
						|
		xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
 | 
						|
		xchk_irele(sc, dp);
 | 
						|
		if (error < 0)
 | 
						|
			break;
 | 
						|
		if (xchk_should_terminate(sc, &error))
 | 
						|
			break;
 | 
						|
	}
 | 
						|
	xchk_iscan_iter_finish(&ri->ftype_iscan);
 | 
						|
	xchk_iscan_teardown(&ri->ftype_iscan);
 | 
						|
 | 
						|
	if (error == -EBUSY) {
 | 
						|
		if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
 | 
						|
			/*
 | 
						|
			 * If we got an EBUSY after finding at least one
 | 
						|
			 * dirent, that means the scan found an inode on the
 | 
						|
			 * inactivation list and could not open it.  Accept the
 | 
						|
			 * alleged ftype and install a new mode below.
 | 
						|
			 */
 | 
						|
			error = 0;
 | 
						|
		} else if (!(sc->flags & XCHK_TRY_HARDER)) {
 | 
						|
			/*
 | 
						|
			 * Otherwise, retry the operation one time to see if
 | 
						|
			 * the reason for the delay is an inode from the same
 | 
						|
			 * cluster buffer waiting on the inactivation list.
 | 
						|
			 */
 | 
						|
			error = -EDEADLOCK;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Convert the discovered ftype into the file mode.  If all else fails,
 | 
						|
	 * return S_IFREG.
 | 
						|
	 */
 | 
						|
	switch (ri->alleged_ftype) {
 | 
						|
	case XFS_DIR3_FT_DIR:
 | 
						|
		*mode = S_IFDIR;
 | 
						|
		break;
 | 
						|
	case XFS_DIR3_FT_WHT:
 | 
						|
	case XFS_DIR3_FT_CHRDEV:
 | 
						|
		*mode = S_IFCHR;
 | 
						|
		break;
 | 
						|
	case XFS_DIR3_FT_BLKDEV:
 | 
						|
		*mode = S_IFBLK;
 | 
						|
		break;
 | 
						|
	case XFS_DIR3_FT_FIFO:
 | 
						|
		*mode = S_IFIFO;
 | 
						|
		break;
 | 
						|
	case XFS_DIR3_FT_SOCK:
 | 
						|
		*mode = S_IFSOCK;
 | 
						|
		break;
 | 
						|
	case XFS_DIR3_FT_SYMLINK:
 | 
						|
		*mode = S_IFLNK;
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		*mode = S_IFREG;
 | 
						|
		break;
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_mode(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	uint16_t		mode = be16_to_cpu(dip->di_mode);
 | 
						|
	int			error;
 | 
						|
 | 
						|
	trace_xrep_dinode_mode(sc, dip);
 | 
						|
 | 
						|
	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/* Try to fix the mode.  If we cannot, then leave everything alone. */
 | 
						|
	error = xrep_dinode_find_mode(ri, &mode);
 | 
						|
	switch (error) {
 | 
						|
	case -EINTR:
 | 
						|
	case -EBUSY:
 | 
						|
	case -EDEADLOCK:
 | 
						|
		/* temporary failure or fatal signal */
 | 
						|
		return error;
 | 
						|
	case 0:
 | 
						|
		/* found mode */
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		/* some other error, assume S_IFREG */
 | 
						|
		mode = S_IFREG;
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	/* bad mode, so we set it to a file that only root can read */
 | 
						|
	dip->di_mode = cpu_to_be16(mode);
 | 
						|
	dip->di_uid = 0;
 | 
						|
	dip->di_gid = 0;
 | 
						|
	ri->zap_acls = true;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Fix unused link count fields having nonzero values. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_nlinks(
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	if (dip->di_version > 1)
 | 
						|
		dip->di_onlink = 0;
 | 
						|
	else
 | 
						|
		dip->di_nlink = 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Fix any conflicting flags that the verifiers complain about. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_flags(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	bool			isrt)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 | 
						|
	uint16_t		flags = be16_to_cpu(dip->di_flags);
 | 
						|
	uint16_t		mode = be16_to_cpu(dip->di_mode);
 | 
						|
 | 
						|
	trace_xrep_dinode_flags(sc, dip);
 | 
						|
 | 
						|
	if (isrt)
 | 
						|
		flags |= XFS_DIFLAG_REALTIME;
 | 
						|
	else
 | 
						|
		flags &= ~XFS_DIFLAG_REALTIME;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * For regular files on a reflink filesystem, set the REFLINK flag to
 | 
						|
	 * protect shared extents.  A later stage will actually check those
 | 
						|
	 * extents and clear the flag if possible.
 | 
						|
	 */
 | 
						|
	if (xfs_has_reflink(mp) && S_ISREG(mode))
 | 
						|
		flags2 |= XFS_DIFLAG2_REFLINK;
 | 
						|
	else
 | 
						|
		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
 | 
						|
	if (flags & XFS_DIFLAG_REALTIME)
 | 
						|
		flags2 &= ~XFS_DIFLAG2_REFLINK;
 | 
						|
	if (!xfs_has_bigtime(mp))
 | 
						|
		flags2 &= ~XFS_DIFLAG2_BIGTIME;
 | 
						|
	if (!xfs_has_large_extent_counts(mp))
 | 
						|
		flags2 &= ~XFS_DIFLAG2_NREXT64;
 | 
						|
	if (flags2 & XFS_DIFLAG2_NREXT64)
 | 
						|
		dip->di_nrext64_pad = 0;
 | 
						|
	else if (dip->di_version >= 3)
 | 
						|
		dip->di_v3_pad = 0;
 | 
						|
	dip->di_flags = cpu_to_be16(flags);
 | 
						|
	dip->di_flags2 = cpu_to_be64(flags2);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Blow out symlink; now it points nowhere.  We don't have to worry about
 | 
						|
 * incore state because this inode is failing the verifiers.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_zap_symlink(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	char			*p;
 | 
						|
 | 
						|
	trace_xrep_dinode_zap_symlink(sc, dip);
 | 
						|
 | 
						|
	dip->di_format = XFS_DINODE_FMT_LOCAL;
 | 
						|
	dip->di_size = cpu_to_be64(1);
 | 
						|
	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 | 
						|
	*p = '?';
 | 
						|
	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Blow out dir, make the parent point to the root.  In the future repair will
 | 
						|
 * reconstruct this directory for us.  Note that there's no in-core directory
 | 
						|
 * inode because the sf verifier tripped, so we don't have to worry about the
 | 
						|
 * dentry cache.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_zap_dir(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	struct xfs_dir2_sf_hdr	*sfp;
 | 
						|
	int			i8count;
 | 
						|
 | 
						|
	trace_xrep_dinode_zap_dir(sc, dip);
 | 
						|
 | 
						|
	dip->di_format = XFS_DINODE_FMT_LOCAL;
 | 
						|
	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
 | 
						|
	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 | 
						|
	sfp->count = 0;
 | 
						|
	sfp->i8count = i8count;
 | 
						|
	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
 | 
						|
	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
 | 
						|
	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
 | 
						|
}
 | 
						|
 | 
						|
/* Make sure we don't have a garbage file size. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_size(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	uint64_t		size = be64_to_cpu(dip->di_size);
 | 
						|
	uint16_t		mode = be16_to_cpu(dip->di_mode);
 | 
						|
 | 
						|
	trace_xrep_dinode_size(sc, dip);
 | 
						|
 | 
						|
	switch (mode & S_IFMT) {
 | 
						|
	case S_IFIFO:
 | 
						|
	case S_IFCHR:
 | 
						|
	case S_IFBLK:
 | 
						|
	case S_IFSOCK:
 | 
						|
		/* di_size can't be nonzero for special files */
 | 
						|
		dip->di_size = 0;
 | 
						|
		break;
 | 
						|
	case S_IFREG:
 | 
						|
		/* Regular files can't be larger than 2^63-1 bytes. */
 | 
						|
		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
 | 
						|
		break;
 | 
						|
	case S_IFLNK:
 | 
						|
		/*
 | 
						|
		 * Truncate ridiculously oversized symlinks.  If the size is
 | 
						|
		 * zero, reset it to point to the current directory.  Both of
 | 
						|
		 * these conditions trigger dinode verifier errors, so there
 | 
						|
		 * is no in-core state to reset.
 | 
						|
		 */
 | 
						|
		if (size > XFS_SYMLINK_MAXLEN)
 | 
						|
			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
 | 
						|
		else if (size == 0)
 | 
						|
			xrep_dinode_zap_symlink(ri, dip);
 | 
						|
		break;
 | 
						|
	case S_IFDIR:
 | 
						|
		/*
 | 
						|
		 * Directories can't have a size larger than 32G.  If the size
 | 
						|
		 * is zero, reset it to an empty directory.  Both of these
 | 
						|
		 * conditions trigger dinode verifier errors, so there is no
 | 
						|
		 * in-core state to reset.
 | 
						|
		 */
 | 
						|
		if (size > XFS_DIR2_SPACE_SIZE)
 | 
						|
			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
 | 
						|
		else if (size == 0)
 | 
						|
			xrep_dinode_zap_dir(ri, dip);
 | 
						|
		break;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/* Fix extent size hints. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_extsize_hints(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
 | 
						|
	uint16_t		flags = be16_to_cpu(dip->di_flags);
 | 
						|
	uint16_t		mode = be16_to_cpu(dip->di_mode);
 | 
						|
 | 
						|
	xfs_failaddr_t		fa;
 | 
						|
 | 
						|
	trace_xrep_dinode_extsize_hints(sc, dip);
 | 
						|
 | 
						|
	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 | 
						|
			mode, flags);
 | 
						|
	if (fa) {
 | 
						|
		dip->di_extsize = 0;
 | 
						|
		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
 | 
						|
					      XFS_DIFLAG_EXTSZINHERIT);
 | 
						|
	}
 | 
						|
 | 
						|
	if (dip->di_version < 3)
 | 
						|
		return;
 | 
						|
 | 
						|
	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 | 
						|
			mode, flags, flags2);
 | 
						|
	if (fa) {
 | 
						|
		dip->di_cowextsize = 0;
 | 
						|
		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/* Count extents and blocks for an inode given an rmap. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_walk_rmap(
 | 
						|
	struct xfs_btree_cur		*cur,
 | 
						|
	const struct xfs_rmap_irec	*rec,
 | 
						|
	void				*priv)
 | 
						|
{
 | 
						|
	struct xrep_inode		*ri = priv;
 | 
						|
	int				error = 0;
 | 
						|
 | 
						|
	if (xchk_should_terminate(ri->sc, &error))
 | 
						|
		return error;
 | 
						|
 | 
						|
	/* We only care about this inode. */
 | 
						|
	if (rec->rm_owner != ri->sc->sm->sm_ino)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
 | 
						|
		ri->attr_blocks += rec->rm_blockcount;
 | 
						|
		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 | 
						|
			ri->attr_extents++;
 | 
						|
 | 
						|
		return 0;
 | 
						|
	}
 | 
						|
 | 
						|
	ri->data_blocks += rec->rm_blockcount;
 | 
						|
	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 | 
						|
		ri->data_extents++;
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Count extents and blocks for an inode from all AG rmap data. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_count_ag_rmaps(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_perag	*pag)
 | 
						|
{
 | 
						|
	struct xfs_btree_cur	*cur;
 | 
						|
	struct xfs_buf		*agf;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
 | 
						|
	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
 | 
						|
	xfs_btree_del_cursor(cur, error);
 | 
						|
	xfs_trans_brelse(ri->sc->tp, agf);
 | 
						|
	return error;
 | 
						|
}
 | 
						|
 | 
						|
/* Count extents and blocks for a given inode from all rmap data. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_count_rmaps(
 | 
						|
	struct xrep_inode	*ri)
 | 
						|
{
 | 
						|
	struct xfs_perag	*pag;
 | 
						|
	xfs_agnumber_t		agno;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
 | 
						|
		return -EOPNOTSUPP;
 | 
						|
 | 
						|
	for_each_perag(ri->sc->mp, agno, pag) {
 | 
						|
		error = xrep_dinode_count_ag_rmaps(ri, pag);
 | 
						|
		if (error) {
 | 
						|
			xfs_perag_rele(pag);
 | 
						|
			return error;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/* Can't have extents on both the rt and the data device. */
 | 
						|
	if (ri->data_extents && ri->rt_extents)
 | 
						|
		return -EFSCORRUPTED;
 | 
						|
 | 
						|
	trace_xrep_dinode_count_rmaps(ri->sc,
 | 
						|
			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
 | 
						|
			ri->data_extents, ri->rt_extents, ri->attr_extents);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Return true if this extents-format ifork looks like garbage. */
 | 
						|
STATIC bool
 | 
						|
xrep_dinode_bad_extents_fork(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	unsigned int		dfork_size,
 | 
						|
	int			whichfork)
 | 
						|
{
 | 
						|
	struct xfs_bmbt_irec	new;
 | 
						|
	struct xfs_bmbt_rec	*dp;
 | 
						|
	xfs_extnum_t		nex;
 | 
						|
	bool			isrt;
 | 
						|
	unsigned int		i;
 | 
						|
 | 
						|
	nex = xfs_dfork_nextents(dip, whichfork);
 | 
						|
	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
 | 
						|
		return true;
 | 
						|
 | 
						|
	dp = XFS_DFORK_PTR(dip, whichfork);
 | 
						|
 | 
						|
	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
 | 
						|
	for (i = 0; i < nex; i++, dp++) {
 | 
						|
		xfs_failaddr_t	fa;
 | 
						|
 | 
						|
		xfs_bmbt_disk_get_all(dp, &new);
 | 
						|
		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
 | 
						|
				&new);
 | 
						|
		if (fa)
 | 
						|
			return true;
 | 
						|
	}
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
/* Return true if this btree-format ifork looks like garbage. */
 | 
						|
STATIC bool
 | 
						|
xrep_dinode_bad_bmbt_fork(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	unsigned int		dfork_size,
 | 
						|
	int			whichfork)
 | 
						|
{
 | 
						|
	struct xfs_bmdr_block	*dfp;
 | 
						|
	xfs_extnum_t		nex;
 | 
						|
	unsigned int		i;
 | 
						|
	unsigned int		dmxr;
 | 
						|
	unsigned int		nrecs;
 | 
						|
	unsigned int		level;
 | 
						|
 | 
						|
	nex = xfs_dfork_nextents(dip, whichfork);
 | 
						|
	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
 | 
						|
		return true;
 | 
						|
 | 
						|
	if (dfork_size < sizeof(struct xfs_bmdr_block))
 | 
						|
		return true;
 | 
						|
 | 
						|
	dfp = XFS_DFORK_PTR(dip, whichfork);
 | 
						|
	nrecs = be16_to_cpu(dfp->bb_numrecs);
 | 
						|
	level = be16_to_cpu(dfp->bb_level);
 | 
						|
 | 
						|
	if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
 | 
						|
		return true;
 | 
						|
	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
 | 
						|
		return true;
 | 
						|
 | 
						|
	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
 | 
						|
	for (i = 1; i <= nrecs; i++) {
 | 
						|
		struct xfs_bmbt_key	*fkp;
 | 
						|
		xfs_bmbt_ptr_t		*fpp;
 | 
						|
		xfs_fileoff_t		fileoff;
 | 
						|
		xfs_fsblock_t		fsbno;
 | 
						|
 | 
						|
		fkp = xfs_bmdr_key_addr(dfp, i);
 | 
						|
		fileoff = be64_to_cpu(fkp->br_startoff);
 | 
						|
		if (!xfs_verify_fileoff(sc->mp, fileoff))
 | 
						|
			return true;
 | 
						|
 | 
						|
		fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
 | 
						|
		fsbno = be64_to_cpu(*fpp);
 | 
						|
		if (!xfs_verify_fsbno(sc->mp, fsbno))
 | 
						|
			return true;
 | 
						|
	}
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Check the data fork for things that will fail the ifork verifiers or the
 | 
						|
 * ifork formatters.
 | 
						|
 */
 | 
						|
STATIC bool
 | 
						|
xrep_dinode_check_dfork(
 | 
						|
	struct xfs_scrub	*sc,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	uint16_t		mode)
 | 
						|
{
 | 
						|
	void			*dfork_ptr;
 | 
						|
	int64_t			data_size;
 | 
						|
	unsigned int		fmt;
 | 
						|
	unsigned int		dfork_size;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Verifier functions take signed int64_t, so check for bogus negative
 | 
						|
	 * values first.
 | 
						|
	 */
 | 
						|
	data_size = be64_to_cpu(dip->di_size);
 | 
						|
	if (data_size < 0)
 | 
						|
		return true;
 | 
						|
 | 
						|
	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
 | 
						|
	switch (mode & S_IFMT) {
 | 
						|
	case S_IFIFO:
 | 
						|
	case S_IFCHR:
 | 
						|
	case S_IFBLK:
 | 
						|
	case S_IFSOCK:
 | 
						|
		if (fmt != XFS_DINODE_FMT_DEV)
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	case S_IFREG:
 | 
						|
		if (fmt == XFS_DINODE_FMT_LOCAL)
 | 
						|
			return true;
 | 
						|
		fallthrough;
 | 
						|
	case S_IFLNK:
 | 
						|
	case S_IFDIR:
 | 
						|
		switch (fmt) {
 | 
						|
		case XFS_DINODE_FMT_LOCAL:
 | 
						|
		case XFS_DINODE_FMT_EXTENTS:
 | 
						|
		case XFS_DINODE_FMT_BTREE:
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			return true;
 | 
						|
		}
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
 | 
						|
	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 | 
						|
 | 
						|
	switch (fmt) {
 | 
						|
	case XFS_DINODE_FMT_DEV:
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_LOCAL:
 | 
						|
		/* dir/symlink structure cannot be larger than the fork */
 | 
						|
		if (data_size > dfork_size)
 | 
						|
			return true;
 | 
						|
		/* directory structure must pass verification. */
 | 
						|
		if (S_ISDIR(mode) &&
 | 
						|
		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
 | 
						|
			return true;
 | 
						|
		/* symlink structure must pass verification. */
 | 
						|
		if (S_ISLNK(mode) &&
 | 
						|
		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_EXTENTS:
 | 
						|
		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
 | 
						|
				XFS_DATA_FORK))
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_BTREE:
 | 
						|
		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
 | 
						|
				XFS_DATA_FORK))
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
xrep_dinode_set_data_nextents(
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	xfs_extnum_t		nextents)
 | 
						|
{
 | 
						|
	if (xfs_dinode_has_large_extent_counts(dip))
 | 
						|
		dip->di_big_nextents = cpu_to_be64(nextents);
 | 
						|
	else
 | 
						|
		dip->di_nextents = cpu_to_be32(nextents);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
xrep_dinode_set_attr_nextents(
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	xfs_extnum_t		nextents)
 | 
						|
{
 | 
						|
	if (xfs_dinode_has_large_extent_counts(dip))
 | 
						|
		dip->di_big_anextents = cpu_to_be32(nextents);
 | 
						|
	else
 | 
						|
		dip->di_anextents = cpu_to_be16(nextents);
 | 
						|
}
 | 
						|
 | 
						|
/* Reset the data fork to something sane. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_zap_dfork(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	uint16_t		mode)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
 | 
						|
	trace_xrep_dinode_zap_dfork(sc, dip);
 | 
						|
 | 
						|
	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
 | 
						|
 | 
						|
	xrep_dinode_set_data_nextents(dip, 0);
 | 
						|
	ri->data_blocks = 0;
 | 
						|
	ri->rt_blocks = 0;
 | 
						|
 | 
						|
	/* Special files always get reset to DEV */
 | 
						|
	switch (mode & S_IFMT) {
 | 
						|
	case S_IFIFO:
 | 
						|
	case S_IFCHR:
 | 
						|
	case S_IFBLK:
 | 
						|
	case S_IFSOCK:
 | 
						|
		dip->di_format = XFS_DINODE_FMT_DEV;
 | 
						|
		dip->di_size = 0;
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If we have data extents, reset to an empty map and hope the user
 | 
						|
	 * will run the bmapbtd checker next.
 | 
						|
	 */
 | 
						|
	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
 | 
						|
		dip->di_format = XFS_DINODE_FMT_EXTENTS;
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Otherwise, reset the local format to the minimum. */
 | 
						|
	switch (mode & S_IFMT) {
 | 
						|
	case S_IFLNK:
 | 
						|
		xrep_dinode_zap_symlink(ri, dip);
 | 
						|
		break;
 | 
						|
	case S_IFDIR:
 | 
						|
		xrep_dinode_zap_dir(ri, dip);
 | 
						|
		break;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Check the attr fork for things that will fail the ifork verifiers or the
 | 
						|
 * ifork formatters.
 | 
						|
 */
 | 
						|
STATIC bool
 | 
						|
xrep_dinode_check_afork(
 | 
						|
	struct xfs_scrub		*sc,
 | 
						|
	struct xfs_dinode		*dip)
 | 
						|
{
 | 
						|
	struct xfs_attr_sf_hdr		*afork_ptr;
 | 
						|
	size_t				attr_size;
 | 
						|
	unsigned int			afork_size;
 | 
						|
 | 
						|
	if (XFS_DFORK_BOFF(dip) == 0)
 | 
						|
		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
 | 
						|
		       xfs_dfork_attr_extents(dip) != 0;
 | 
						|
 | 
						|
	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
 | 
						|
	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
 | 
						|
 | 
						|
	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
 | 
						|
	case XFS_DINODE_FMT_LOCAL:
 | 
						|
		/* Fork has to be large enough to extract the xattr size. */
 | 
						|
		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
 | 
						|
			return true;
 | 
						|
 | 
						|
		/* xattr structure cannot be larger than the fork */
 | 
						|
		attr_size = be16_to_cpu(afork_ptr->totsize);
 | 
						|
		if (attr_size > afork_size)
 | 
						|
			return true;
 | 
						|
 | 
						|
		/* xattr structure must pass verification. */
 | 
						|
		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
 | 
						|
	case XFS_DINODE_FMT_EXTENTS:
 | 
						|
		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
 | 
						|
					XFS_ATTR_FORK))
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_BTREE:
 | 
						|
		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
 | 
						|
					XFS_ATTR_FORK))
 | 
						|
			return true;
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		return true;
 | 
						|
	}
 | 
						|
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Reset the attr fork to empty.  Since the attr fork could have contained
 | 
						|
 * ACLs, make the file readable only by root.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_zap_afork(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	uint16_t		mode)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
 | 
						|
	trace_xrep_dinode_zap_afork(sc, dip);
 | 
						|
 | 
						|
	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
 | 
						|
 | 
						|
	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
 | 
						|
	xrep_dinode_set_attr_nextents(dip, 0);
 | 
						|
	ri->attr_blocks = 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If the data fork is in btree format, removing the attr fork entirely
 | 
						|
	 * might cause verifier failures if the next level down in the bmbt
 | 
						|
	 * could now fit in the data fork area.
 | 
						|
	 */
 | 
						|
	if (dip->di_format != XFS_DINODE_FMT_BTREE)
 | 
						|
		dip->di_forkoff = 0;
 | 
						|
	dip->di_mode = cpu_to_be16(mode & ~0777);
 | 
						|
	dip->di_uid = 0;
 | 
						|
	dip->di_gid = 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Make sure the fork offset is a sensible value. */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_ensure_forkoff(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip,
 | 
						|
	uint16_t		mode)
 | 
						|
{
 | 
						|
	struct xfs_bmdr_block	*bmdr;
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	xfs_extnum_t		attr_extents, data_extents;
 | 
						|
	size_t			bmdr_minsz = xfs_bmdr_space_calc(1);
 | 
						|
	unsigned int		lit_sz = XFS_LITINO(sc->mp);
 | 
						|
	unsigned int		afork_min, dfork_min;
 | 
						|
 | 
						|
	trace_xrep_dinode_ensure_forkoff(sc, dip);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Before calling this function, xrep_dinode_core ensured that both
 | 
						|
	 * forks actually fit inside their respective literal areas.  If this
 | 
						|
	 * was not the case, the fork was reset to FMT_EXTENTS with zero
 | 
						|
	 * records.  If the rmapbt scan found attr or data fork blocks, this
 | 
						|
	 * will be noted in the dinode_stats, and we must leave enough room
 | 
						|
	 * for the bmap repair code to reconstruct the mapping structure.
 | 
						|
	 *
 | 
						|
	 * First, compute the minimum space required for the attr fork.
 | 
						|
	 */
 | 
						|
	switch (dip->di_aformat) {
 | 
						|
	case XFS_DINODE_FMT_LOCAL:
 | 
						|
		/*
 | 
						|
		 * If we still have a shortform xattr structure at all, that
 | 
						|
		 * means the attr fork area was exactly large enough to fit
 | 
						|
		 * the sf structure.
 | 
						|
		 */
 | 
						|
		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_EXTENTS:
 | 
						|
		attr_extents = xfs_dfork_attr_extents(dip);
 | 
						|
		if (attr_extents) {
 | 
						|
			/*
 | 
						|
			 * We must maintain sufficient space to hold the entire
 | 
						|
			 * extent map array in the data fork.  Note that we
 | 
						|
			 * previously zapped the fork if it had no chance of
 | 
						|
			 * fitting in the inode.
 | 
						|
			 */
 | 
						|
			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
 | 
						|
		} else if (ri->attr_extents > 0) {
 | 
						|
			/*
 | 
						|
			 * The attr fork thinks it has zero extents, but we
 | 
						|
			 * found some xattr extents.  We need to leave enough
 | 
						|
			 * empty space here so that the incore attr fork will
 | 
						|
			 * get created (and hence trigger the attr fork bmap
 | 
						|
			 * repairer).
 | 
						|
			 */
 | 
						|
			afork_min = bmdr_minsz;
 | 
						|
		} else {
 | 
						|
			/* No extents on disk or found in rmapbt. */
 | 
						|
			afork_min = 0;
 | 
						|
		}
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_BTREE:
 | 
						|
		/* Must have space for btree header and key/pointers. */
 | 
						|
		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
 | 
						|
		afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		/* We should never see any other formats. */
 | 
						|
		afork_min = 0;
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Compute the minimum space required for the data fork. */
 | 
						|
	switch (dip->di_format) {
 | 
						|
	case XFS_DINODE_FMT_DEV:
 | 
						|
		dfork_min = sizeof(__be32);
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_UUID:
 | 
						|
		dfork_min = sizeof(uuid_t);
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_LOCAL:
 | 
						|
		/*
 | 
						|
		 * If we still have a shortform data fork at all, that means
 | 
						|
		 * the data fork area was large enough to fit whatever was in
 | 
						|
		 * there.
 | 
						|
		 */
 | 
						|
		dfork_min = be64_to_cpu(dip->di_size);
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_EXTENTS:
 | 
						|
		data_extents = xfs_dfork_data_extents(dip);
 | 
						|
		if (data_extents) {
 | 
						|
			/*
 | 
						|
			 * We must maintain sufficient space to hold the entire
 | 
						|
			 * extent map array in the data fork.  Note that we
 | 
						|
			 * previously zapped the fork if it had no chance of
 | 
						|
			 * fitting in the inode.
 | 
						|
			 */
 | 
						|
			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
 | 
						|
		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
 | 
						|
			/*
 | 
						|
			 * The data fork thinks it has zero extents, but we
 | 
						|
			 * found some data extents.  We need to leave enough
 | 
						|
			 * empty space here so that the data fork bmap repair
 | 
						|
			 * will recover the mappings.
 | 
						|
			 */
 | 
						|
			dfork_min = bmdr_minsz;
 | 
						|
		} else {
 | 
						|
			/* No extents on disk or found in rmapbt. */
 | 
						|
			dfork_min = 0;
 | 
						|
		}
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_BTREE:
 | 
						|
		/* Must have space for btree header and key/pointers. */
 | 
						|
		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 | 
						|
		dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
 | 
						|
		break;
 | 
						|
	default:
 | 
						|
		dfork_min = 0;
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Round all values up to the nearest 8 bytes, because that is the
 | 
						|
	 * precision of di_forkoff.
 | 
						|
	 */
 | 
						|
	afork_min = roundup(afork_min, 8);
 | 
						|
	dfork_min = roundup(dfork_min, 8);
 | 
						|
	bmdr_minsz = roundup(bmdr_minsz, 8);
 | 
						|
 | 
						|
	ASSERT(dfork_min <= lit_sz);
 | 
						|
	ASSERT(afork_min <= lit_sz);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If the data fork was zapped and we don't have enough space for the
 | 
						|
	 * recovery fork, move the attr fork up.
 | 
						|
	 */
 | 
						|
	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
 | 
						|
	    xfs_dfork_data_extents(dip) == 0 &&
 | 
						|
	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
 | 
						|
	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
 | 
						|
		if (bmdr_minsz + afork_min > lit_sz) {
 | 
						|
			/*
 | 
						|
			 * The attr for and the stub fork we need to recover
 | 
						|
			 * the data fork won't both fit.  Zap the attr fork.
 | 
						|
			 */
 | 
						|
			xrep_dinode_zap_afork(ri, dip, mode);
 | 
						|
			afork_min = bmdr_minsz;
 | 
						|
		} else {
 | 
						|
			void	*before, *after;
 | 
						|
 | 
						|
			/* Otherwise, just slide the attr fork up. */
 | 
						|
			before = XFS_DFORK_APTR(dip);
 | 
						|
			dip->di_forkoff = bmdr_minsz >> 3;
 | 
						|
			after = XFS_DFORK_APTR(dip);
 | 
						|
			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If the attr fork was zapped and we don't have enough space for the
 | 
						|
	 * recovery fork, move the attr fork down.
 | 
						|
	 */
 | 
						|
	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
 | 
						|
	    xfs_dfork_attr_extents(dip) == 0 &&
 | 
						|
	    ri->attr_extents > 0 &&
 | 
						|
	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
 | 
						|
		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
 | 
						|
			/*
 | 
						|
			 * If the data fork is in btree format then we can't
 | 
						|
			 * adjust forkoff because that runs the risk of
 | 
						|
			 * violating the extents/btree format transition rules.
 | 
						|
			 */
 | 
						|
		} else if (bmdr_minsz + dfork_min > lit_sz) {
 | 
						|
			/*
 | 
						|
			 * If we can't move the attr fork, too bad, we lose the
 | 
						|
			 * attr fork and leak its blocks.
 | 
						|
			 */
 | 
						|
			xrep_dinode_zap_afork(ri, dip, mode);
 | 
						|
		} else {
 | 
						|
			/*
 | 
						|
			 * Otherwise, just slide the attr fork down.  The attr
 | 
						|
			 * fork is empty, so we don't have any old contents to
 | 
						|
			 * move here.
 | 
						|
			 */
 | 
						|
			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Zap the data/attr forks if we spot anything that isn't going to pass the
 | 
						|
 * ifork verifiers or the ifork formatters, because we need to get the inode
 | 
						|
 * into good enough shape that the higher level repair functions can run.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_dinode_zap_forks(
 | 
						|
	struct xrep_inode	*ri,
 | 
						|
	struct xfs_dinode	*dip)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	xfs_extnum_t		data_extents;
 | 
						|
	xfs_extnum_t		attr_extents;
 | 
						|
	xfs_filblks_t		nblocks;
 | 
						|
	uint16_t		mode;
 | 
						|
	bool			zap_datafork = false;
 | 
						|
	bool			zap_attrfork = ri->zap_acls;
 | 
						|
 | 
						|
	trace_xrep_dinode_zap_forks(sc, dip);
 | 
						|
 | 
						|
	mode = be16_to_cpu(dip->di_mode);
 | 
						|
 | 
						|
	data_extents = xfs_dfork_data_extents(dip);
 | 
						|
	attr_extents = xfs_dfork_attr_extents(dip);
 | 
						|
	nblocks = be64_to_cpu(dip->di_nblocks);
 | 
						|
 | 
						|
	/* Inode counters don't make sense? */
 | 
						|
	if (data_extents > nblocks)
 | 
						|
		zap_datafork = true;
 | 
						|
	if (attr_extents > nblocks)
 | 
						|
		zap_attrfork = true;
 | 
						|
	if (data_extents + attr_extents > nblocks)
 | 
						|
		zap_datafork = zap_attrfork = true;
 | 
						|
 | 
						|
	if (!zap_datafork)
 | 
						|
		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
 | 
						|
	if (!zap_attrfork)
 | 
						|
		zap_attrfork = xrep_dinode_check_afork(sc, dip);
 | 
						|
 | 
						|
	/* Zap whatever's bad. */
 | 
						|
	if (zap_attrfork)
 | 
						|
		xrep_dinode_zap_afork(ri, dip, mode);
 | 
						|
	if (zap_datafork)
 | 
						|
		xrep_dinode_zap_dfork(ri, dip, mode);
 | 
						|
	xrep_dinode_ensure_forkoff(ri, dip, mode);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Zero di_nblocks if we don't have any extents at all to satisfy the
 | 
						|
	 * buffer verifier.
 | 
						|
	 */
 | 
						|
	data_extents = xfs_dfork_data_extents(dip);
 | 
						|
	attr_extents = xfs_dfork_attr_extents(dip);
 | 
						|
	if (data_extents + attr_extents == 0)
 | 
						|
		dip->di_nblocks = 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_core(
 | 
						|
	struct xrep_inode	*ri)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	struct xfs_buf		*bp;
 | 
						|
	struct xfs_dinode	*dip;
 | 
						|
	xfs_ino_t		ino = sc->sm->sm_ino;
 | 
						|
	int			error;
 | 
						|
	int			iget_error;
 | 
						|
 | 
						|
	/* Figure out what this inode had mapped in both forks. */
 | 
						|
	error = xrep_dinode_count_rmaps(ri);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	/* Read the inode cluster buffer. */
 | 
						|
	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
 | 
						|
			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
 | 
						|
			NULL);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	/* Make sure we can pass the inode buffer verifier. */
 | 
						|
	xrep_dinode_buf(sc, bp);
 | 
						|
	bp->b_ops = &xfs_inode_buf_ops;
 | 
						|
 | 
						|
	/* Fix everything the verifier will complain about. */
 | 
						|
	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
 | 
						|
	xrep_dinode_header(sc, dip);
 | 
						|
	iget_error = xrep_dinode_mode(ri, dip);
 | 
						|
	if (iget_error)
 | 
						|
		goto write;
 | 
						|
	xrep_dinode_nlinks(dip);
 | 
						|
	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
 | 
						|
	xrep_dinode_size(ri, dip);
 | 
						|
	xrep_dinode_extsize_hints(sc, dip);
 | 
						|
	xrep_dinode_zap_forks(ri, dip);
 | 
						|
 | 
						|
write:
 | 
						|
	/* Write out the inode. */
 | 
						|
	trace_xrep_dinode_fixed(sc, dip);
 | 
						|
	xfs_dinode_calc_crc(sc->mp, dip);
 | 
						|
	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
 | 
						|
	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
 | 
						|
			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * In theory, we've fixed the ondisk inode record enough that we should
 | 
						|
	 * be able to load the inode into the cache.  Try to iget that inode
 | 
						|
	 * now while we hold the AGI and the inode cluster buffer and take the
 | 
						|
	 * IOLOCK so that we can continue with repairs without anyone else
 | 
						|
	 * accessing the inode.  If iget fails, we still need to commit the
 | 
						|
	 * changes.
 | 
						|
	 */
 | 
						|
	if (!iget_error)
 | 
						|
		iget_error = xchk_iget(sc, ino, &sc->ip);
 | 
						|
	if (!iget_error)
 | 
						|
		xchk_ilock(sc, XFS_IOLOCK_EXCL);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Commit the inode cluster buffer updates and drop the AGI buffer that
 | 
						|
	 * we've been holding since scrub setup.  From here on out, repairs
 | 
						|
	 * deal only with the cached inode.
 | 
						|
	 */
 | 
						|
	error = xrep_trans_commit(sc);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	if (iget_error)
 | 
						|
		return iget_error;
 | 
						|
 | 
						|
	error = xchk_trans_alloc(sc, 0);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	error = xrep_ino_dqattach(sc);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	xchk_ilock(sc, XFS_ILOCK_EXCL);
 | 
						|
	if (ri->ino_sick_mask)
 | 
						|
		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Fix everything xfs_dinode_verify cares about. */
 | 
						|
STATIC int
 | 
						|
xrep_dinode_problems(
 | 
						|
	struct xrep_inode	*ri)
 | 
						|
{
 | 
						|
	struct xfs_scrub	*sc = ri->sc;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	error = xrep_dinode_core(ri);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	/* We had to fix a totally busted inode, schedule quotacheck. */
 | 
						|
	if (XFS_IS_UQUOTA_ON(sc->mp))
 | 
						|
		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
 | 
						|
	if (XFS_IS_GQUOTA_ON(sc->mp))
 | 
						|
		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
 | 
						|
	if (XFS_IS_PQUOTA_ON(sc->mp))
 | 
						|
		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Fix problems that the verifiers don't care about.  In general these are
 | 
						|
 * errors that don't cause problems elsewhere in the kernel that we can easily
 | 
						|
 * detect, so we don't check them all that rigorously.
 | 
						|
 */
 | 
						|
 | 
						|
/* Make sure block and extent counts are ok. */
 | 
						|
STATIC int
 | 
						|
xrep_inode_blockcounts(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	struct xfs_ifork	*ifp;
 | 
						|
	xfs_filblks_t		count;
 | 
						|
	xfs_filblks_t		acount;
 | 
						|
	xfs_extnum_t		nextents;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	trace_xrep_inode_blockcounts(sc);
 | 
						|
 | 
						|
	/* Set data fork counters from the data fork mappings. */
 | 
						|
	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
 | 
						|
			&nextents, &count);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
	if (xfs_is_reflink_inode(sc->ip)) {
 | 
						|
		/*
 | 
						|
		 * data fork blockcount can exceed physical storage if a user
 | 
						|
		 * reflinks the same block over and over again.
 | 
						|
		 */
 | 
						|
		;
 | 
						|
	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
 | 
						|
		if (count >= sc->mp->m_sb.sb_rblocks)
 | 
						|
			return -EFSCORRUPTED;
 | 
						|
	} else {
 | 
						|
		if (count >= sc->mp->m_sb.sb_dblocks)
 | 
						|
			return -EFSCORRUPTED;
 | 
						|
	}
 | 
						|
	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
	sc->ip->i_df.if_nextents = nextents;
 | 
						|
 | 
						|
	/* Set attr fork counters from the attr fork mappings. */
 | 
						|
	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
 | 
						|
	if (ifp) {
 | 
						|
		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
 | 
						|
				&nextents, &acount);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
		if (count >= sc->mp->m_sb.sb_dblocks)
 | 
						|
			return -EFSCORRUPTED;
 | 
						|
		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
 | 
						|
				nextents);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
		ifp->if_nextents = nextents;
 | 
						|
	} else {
 | 
						|
		acount = 0;
 | 
						|
	}
 | 
						|
 | 
						|
	sc->ip->i_nblocks = count + acount;
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Check for invalid uid/gid/prid. */
 | 
						|
STATIC void
 | 
						|
xrep_inode_ids(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	bool			dirty = false;
 | 
						|
 | 
						|
	trace_xrep_inode_ids(sc);
 | 
						|
 | 
						|
	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
 | 
						|
		i_uid_write(VFS_I(sc->ip), 0);
 | 
						|
		dirty = true;
 | 
						|
		if (XFS_IS_UQUOTA_ON(sc->mp))
 | 
						|
			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
 | 
						|
	}
 | 
						|
 | 
						|
	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
 | 
						|
		i_gid_write(VFS_I(sc->ip), 0);
 | 
						|
		dirty = true;
 | 
						|
		if (XFS_IS_GQUOTA_ON(sc->mp))
 | 
						|
			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
 | 
						|
	}
 | 
						|
 | 
						|
	if (sc->ip->i_projid == -1U) {
 | 
						|
		sc->ip->i_projid = 0;
 | 
						|
		dirty = true;
 | 
						|
		if (XFS_IS_PQUOTA_ON(sc->mp))
 | 
						|
			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
 | 
						|
	}
 | 
						|
 | 
						|
	/* strip setuid/setgid if we touched any of the ids */
 | 
						|
	if (dirty)
 | 
						|
		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
 | 
						|
}
 | 
						|
 | 
						|
static inline void
 | 
						|
xrep_clamp_timestamp(
 | 
						|
	struct xfs_inode	*ip,
 | 
						|
	struct timespec64	*ts)
 | 
						|
{
 | 
						|
	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
 | 
						|
	*ts = timestamp_truncate(*ts, VFS_I(ip));
 | 
						|
}
 | 
						|
 | 
						|
/* Nanosecond counters can't have more than 1 billion. */
 | 
						|
STATIC void
 | 
						|
xrep_inode_timestamps(
 | 
						|
	struct xfs_inode	*ip)
 | 
						|
{
 | 
						|
	struct timespec64	tstamp;
 | 
						|
	struct inode		*inode = VFS_I(ip);
 | 
						|
 | 
						|
	tstamp = inode_get_atime(inode);
 | 
						|
	xrep_clamp_timestamp(ip, &tstamp);
 | 
						|
	inode_set_atime_to_ts(inode, tstamp);
 | 
						|
 | 
						|
	tstamp = inode_get_mtime(inode);
 | 
						|
	xrep_clamp_timestamp(ip, &tstamp);
 | 
						|
	inode_set_mtime_to_ts(inode, tstamp);
 | 
						|
 | 
						|
	tstamp = inode_get_ctime(inode);
 | 
						|
	xrep_clamp_timestamp(ip, &tstamp);
 | 
						|
	inode_set_ctime_to_ts(inode, tstamp);
 | 
						|
 | 
						|
	xrep_clamp_timestamp(ip, &ip->i_crtime);
 | 
						|
}
 | 
						|
 | 
						|
/* Fix inode flags that don't make sense together. */
 | 
						|
STATIC void
 | 
						|
xrep_inode_flags(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	uint16_t		mode;
 | 
						|
 | 
						|
	trace_xrep_inode_flags(sc);
 | 
						|
 | 
						|
	mode = VFS_I(sc->ip)->i_mode;
 | 
						|
 | 
						|
	/* Clear junk flags */
 | 
						|
	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
 | 
						|
		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
 | 
						|
 | 
						|
	/* NEWRTBM only applies to realtime bitmaps */
 | 
						|
	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
 | 
						|
		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
 | 
						|
	else
 | 
						|
		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
 | 
						|
 | 
						|
	/* These only make sense for directories. */
 | 
						|
	if (!S_ISDIR(mode))
 | 
						|
		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
 | 
						|
					  XFS_DIFLAG_EXTSZINHERIT |
 | 
						|
					  XFS_DIFLAG_PROJINHERIT |
 | 
						|
					  XFS_DIFLAG_NOSYMLINKS);
 | 
						|
 | 
						|
	/* These only make sense for files. */
 | 
						|
	if (!S_ISREG(mode))
 | 
						|
		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
 | 
						|
					  XFS_DIFLAG_EXTSIZE);
 | 
						|
 | 
						|
	/* These only make sense for non-rt files. */
 | 
						|
	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
 | 
						|
		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
 | 
						|
 | 
						|
	/* Immutable and append only?  Drop the append. */
 | 
						|
	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
 | 
						|
	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
 | 
						|
		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
 | 
						|
 | 
						|
	/* Clear junk flags. */
 | 
						|
	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
 | 
						|
		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
 | 
						|
 | 
						|
	/* No reflink flag unless we support it and it's a file. */
 | 
						|
	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
 | 
						|
		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
 | 
						|
 | 
						|
	/* DAX only applies to files and dirs. */
 | 
						|
	if (!(S_ISREG(mode) || S_ISDIR(mode)))
 | 
						|
		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
 | 
						|
 | 
						|
	/* No reflink files on the realtime device. */
 | 
						|
	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
 | 
						|
		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Fix size problems with block/node format directories.  If we fail to find
 | 
						|
 * the extent list, just bail out and let the bmapbtd repair functions clean
 | 
						|
 * up that mess.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_inode_blockdir_size(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	struct xfs_iext_cursor	icur;
 | 
						|
	struct xfs_bmbt_irec	got;
 | 
						|
	struct xfs_ifork	*ifp;
 | 
						|
	xfs_fileoff_t		off;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	trace_xrep_inode_blockdir_size(sc);
 | 
						|
 | 
						|
	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
 | 
						|
	if (error)
 | 
						|
		return;
 | 
						|
 | 
						|
	/* Find the last block before 32G; this is the dir size. */
 | 
						|
	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
 | 
						|
	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
 | 
						|
	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
 | 
						|
		/* zero-extents directory? */
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	off = got.br_startoff + got.br_blockcount;
 | 
						|
	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
 | 
						|
			XFS_FSB_TO_B(sc->mp, off));
 | 
						|
}
 | 
						|
 | 
						|
/* Fix size problems with short format directories. */
 | 
						|
STATIC void
 | 
						|
xrep_inode_sfdir_size(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	struct xfs_ifork	*ifp;
 | 
						|
 | 
						|
	trace_xrep_inode_sfdir_size(sc);
 | 
						|
 | 
						|
	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
 | 
						|
	sc->ip->i_disk_size = ifp->if_bytes;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Fix any irregularities in a directory inode's size now that we can iterate
 | 
						|
 * extent maps and access other regular inode data.
 | 
						|
 */
 | 
						|
STATIC void
 | 
						|
xrep_inode_dir_size(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	trace_xrep_inode_dir_size(sc);
 | 
						|
 | 
						|
	switch (sc->ip->i_df.if_format) {
 | 
						|
	case XFS_DINODE_FMT_EXTENTS:
 | 
						|
	case XFS_DINODE_FMT_BTREE:
 | 
						|
		xrep_inode_blockdir_size(sc);
 | 
						|
		break;
 | 
						|
	case XFS_DINODE_FMT_LOCAL:
 | 
						|
		xrep_inode_sfdir_size(sc);
 | 
						|
		break;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/* Fix extent size hint problems. */
 | 
						|
STATIC void
 | 
						|
xrep_inode_extsize(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	/* Fix misaligned extent size hints on a directory. */
 | 
						|
	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
 | 
						|
	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
 | 
						|
	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
 | 
						|
		sc->ip->i_extsize = 0;
 | 
						|
		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/* Ensure this file has an attr fork if it needs to hold a parent pointer. */
 | 
						|
STATIC int
 | 
						|
xrep_inode_pptr(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	struct xfs_mount	*mp = sc->mp;
 | 
						|
	struct xfs_inode	*ip = sc->ip;
 | 
						|
	struct inode		*inode = VFS_I(ip);
 | 
						|
 | 
						|
	if (!xfs_has_parent(mp))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Unlinked inodes that cannot be added to the directory tree will not
 | 
						|
	 * have a parent pointer.
 | 
						|
	 */
 | 
						|
	if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/* The root directory doesn't have a parent pointer. */
 | 
						|
	if (ip == mp->m_rootip)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Metadata inodes are rooted in the superblock and do not have any
 | 
						|
	 * parents.
 | 
						|
	 */
 | 
						|
	if (xfs_is_metadata_inode(ip))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	/* Inode already has an attr fork; no further work possible here. */
 | 
						|
	if (xfs_inode_has_attr_fork(ip))
 | 
						|
		return 0;
 | 
						|
 | 
						|
	return xfs_bmap_add_attrfork(sc->tp, ip,
 | 
						|
			sizeof(struct xfs_attr_sf_hdr), true);
 | 
						|
}
 | 
						|
 | 
						|
/* Fix any irregularities in an inode that the verifiers don't catch. */
 | 
						|
STATIC int
 | 
						|
xrep_inode_problems(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	int			error;
 | 
						|
 | 
						|
	error = xrep_inode_blockcounts(sc);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
	error = xrep_inode_pptr(sc);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
	xrep_inode_timestamps(sc->ip);
 | 
						|
	xrep_inode_flags(sc);
 | 
						|
	xrep_inode_ids(sc);
 | 
						|
	/*
 | 
						|
	 * We can now do a better job fixing the size of a directory now that
 | 
						|
	 * we can scan the data fork extents than we could in xrep_dinode_size.
 | 
						|
	 */
 | 
						|
	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
 | 
						|
		xrep_inode_dir_size(sc);
 | 
						|
	xrep_inode_extsize(sc);
 | 
						|
 | 
						|
	trace_xrep_inode_fixed(sc);
 | 
						|
	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
 | 
						|
	return xrep_roll_trans(sc);
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Make sure this inode's unlinked list pointers are consistent with its
 | 
						|
 * link count.
 | 
						|
 */
 | 
						|
STATIC int
 | 
						|
xrep_inode_unlinked(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	unsigned int		nlink = VFS_I(sc->ip)->i_nlink;
 | 
						|
	int			error;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If this inode is linked from the directory tree and on the unlinked
 | 
						|
	 * list, remove it from the unlinked list.
 | 
						|
	 */
 | 
						|
	if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
 | 
						|
		struct xfs_perag	*pag;
 | 
						|
		int			error;
 | 
						|
 | 
						|
		pag = xfs_perag_get(sc->mp,
 | 
						|
				XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
 | 
						|
		error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
 | 
						|
		xfs_perag_put(pag);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If this inode is not linked from the directory tree yet not on the
 | 
						|
	 * unlinked list, put it on the unlinked list.
 | 
						|
	 */
 | 
						|
	if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
 | 
						|
		error = xfs_iunlink(sc->tp, sc->ip);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
	}
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/* Repair an inode's fields. */
 | 
						|
int
 | 
						|
xrep_inode(
 | 
						|
	struct xfs_scrub	*sc)
 | 
						|
{
 | 
						|
	int			error = 0;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * No inode?  That means we failed the _iget verifiers.  Repair all
 | 
						|
	 * the things that the inode verifiers care about, then retry _iget.
 | 
						|
	 */
 | 
						|
	if (!sc->ip) {
 | 
						|
		struct xrep_inode	*ri = sc->buf;
 | 
						|
 | 
						|
		ASSERT(ri != NULL);
 | 
						|
 | 
						|
		error = xrep_dinode_problems(ri);
 | 
						|
		if (error == -EBUSY) {
 | 
						|
			/*
 | 
						|
			 * Directory scan to recover inode mode encountered a
 | 
						|
			 * busy inode, so we did not continue repairing things.
 | 
						|
			 */
 | 
						|
			return 0;
 | 
						|
		}
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
 | 
						|
		/* By this point we had better have a working incore inode. */
 | 
						|
		if (!sc->ip)
 | 
						|
			return -EFSCORRUPTED;
 | 
						|
	}
 | 
						|
 | 
						|
	xfs_trans_ijoin(sc->tp, sc->ip, 0);
 | 
						|
 | 
						|
	/* If we found corruption of any kind, try to fix it. */
 | 
						|
	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
 | 
						|
	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
 | 
						|
		error = xrep_inode_problems(sc);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
	}
 | 
						|
 | 
						|
	/* See if we can clear the reflink flag. */
 | 
						|
	if (xfs_is_reflink_inode(sc->ip)) {
 | 
						|
		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
 | 
						|
		if (error)
 | 
						|
			return error;
 | 
						|
	}
 | 
						|
 | 
						|
	/* Reconnect incore unlinked list */
 | 
						|
	error = xrep_inode_unlinked(sc);
 | 
						|
	if (error)
 | 
						|
		return error;
 | 
						|
 | 
						|
	return xrep_defer_finish(sc);
 | 
						|
}
 |