325 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			325 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0-or-later
 | |
| /*
 | |
|  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
 | |
|  * Author: Darrick J. Wong <djwong@kernel.org>
 | |
|  */
 | |
| #include "xfs.h"
 | |
| #include "xfs_fs.h"
 | |
| #include "xfs_shared.h"
 | |
| #include "xfs_format.h"
 | |
| #include "xfs_log_format.h"
 | |
| #include "xfs_trans_resv.h"
 | |
| #include "xfs_mount.h"
 | |
| #include "scrub/scrub.h"
 | |
| #include "scrub/xfile.h"
 | |
| #include "scrub/xfarray.h"
 | |
| #include "scrub/trace.h"
 | |
| #include <linux/shmem_fs.h>
 | |
| 
 | |
| /*
 | |
|  * Swappable Temporary Memory
 | |
|  * ==========================
 | |
|  *
 | |
|  * Online checking sometimes needs to be able to stage a large amount of data
 | |
|  * in memory.  This information might not fit in the available memory and it
 | |
|  * doesn't all need to be accessible at all times.  In other words, we want an
 | |
|  * indexed data buffer to store data that can be paged out.
 | |
|  *
 | |
|  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
 | |
|  * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
 | |
|  * store our staging data.  This file is not installed in the file descriptor
 | |
|  * table so that user programs cannot access the data, which means that the
 | |
|  * xfile must be freed with xfile_destroy.
 | |
|  *
 | |
|  * xfiles assume that the caller will handle all required concurrency
 | |
|  * management; standard vfs locks (freezer and inode) are not taken.  Reads
 | |
|  * and writes are satisfied directly from the page cache.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * xfiles must not be exposed to userspace and require upper layers to
 | |
|  * coordinate access to the one handle returned by the constructor, so
 | |
|  * establish a separate lock class for xfiles to avoid confusing lockdep.
 | |
|  */
 | |
| static struct lock_class_key xfile_i_mutex_key;
 | |
| 
 | |
| /*
 | |
|  * Create an xfile of the given size.  The description will be used in the
 | |
|  * trace output.
 | |
|  */
 | |
| int
 | |
| xfile_create(
 | |
| 	const char		*description,
 | |
| 	loff_t			isize,
 | |
| 	struct xfile		**xfilep)
 | |
| {
 | |
| 	struct inode		*inode;
 | |
| 	struct xfile		*xf;
 | |
| 	int			error;
 | |
| 
 | |
| 	xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
 | |
| 	if (!xf)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
 | |
| 	if (IS_ERR(xf->file)) {
 | |
| 		error = PTR_ERR(xf->file);
 | |
| 		goto out_xfile;
 | |
| 	}
 | |
| 
 | |
| 	inode = file_inode(xf->file);
 | |
| 	lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
 | |
| 
 | |
| 	/*
 | |
| 	 * We don't want to bother with kmapping data during repair, so don't
 | |
| 	 * allow highmem pages to back this mapping.
 | |
| 	 */
 | |
| 	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
 | |
| 
 | |
| 	trace_xfile_create(xf);
 | |
| 
 | |
| 	*xfilep = xf;
 | |
| 	return 0;
 | |
| out_xfile:
 | |
| 	kfree(xf);
 | |
| 	return error;
 | |
| }
 | |
| 
 | |
| /* Close the file and release all resources. */
 | |
| void
 | |
| xfile_destroy(
 | |
| 	struct xfile		*xf)
 | |
| {
 | |
| 	struct inode		*inode = file_inode(xf->file);
 | |
| 
 | |
| 	trace_xfile_destroy(xf);
 | |
| 
 | |
| 	lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
 | |
| 	fput(xf->file);
 | |
| 	kfree(xf);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Load an object.  Since we're treating this file as "memory", any error or
 | |
|  * short IO is treated as a failure to allocate memory.
 | |
|  */
 | |
| int
 | |
| xfile_load(
 | |
| 	struct xfile		*xf,
 | |
| 	void			*buf,
 | |
| 	size_t			count,
 | |
| 	loff_t			pos)
 | |
| {
 | |
| 	struct inode		*inode = file_inode(xf->file);
 | |
| 	unsigned int		pflags;
 | |
| 
 | |
| 	if (count > MAX_RW_COUNT)
 | |
| 		return -ENOMEM;
 | |
| 	if (inode->i_sb->s_maxbytes - pos < count)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	trace_xfile_load(xf, pos, count);
 | |
| 
 | |
| 	pflags = memalloc_nofs_save();
 | |
| 	while (count > 0) {
 | |
| 		struct folio	*folio;
 | |
| 		unsigned int	len;
 | |
| 		unsigned int	offset;
 | |
| 
 | |
| 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 | |
| 				SGP_READ) < 0)
 | |
| 			break;
 | |
| 		if (!folio) {
 | |
| 			/*
 | |
| 			 * No data stored at this offset, just zero the output
 | |
| 			 * buffer until the next page boundary.
 | |
| 			 */
 | |
| 			len = min_t(ssize_t, count,
 | |
| 				PAGE_SIZE - offset_in_page(pos));
 | |
| 			memset(buf, 0, len);
 | |
| 		} else {
 | |
| 			if (filemap_check_wb_err(inode->i_mapping, 0)) {
 | |
| 				folio_unlock(folio);
 | |
| 				folio_put(folio);
 | |
| 				break;
 | |
| 			}
 | |
| 
 | |
| 			offset = offset_in_folio(folio, pos);
 | |
| 			len = min_t(ssize_t, count, folio_size(folio) - offset);
 | |
| 			memcpy(buf, folio_address(folio) + offset, len);
 | |
| 
 | |
| 			folio_unlock(folio);
 | |
| 			folio_put(folio);
 | |
| 		}
 | |
| 		count -= len;
 | |
| 		pos += len;
 | |
| 		buf += len;
 | |
| 	}
 | |
| 	memalloc_nofs_restore(pflags);
 | |
| 
 | |
| 	if (count)
 | |
| 		return -ENOMEM;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Store an object.  Since we're treating this file as "memory", any error or
 | |
|  * short IO is treated as a failure to allocate memory.
 | |
|  */
 | |
| int
 | |
| xfile_store(
 | |
| 	struct xfile		*xf,
 | |
| 	const void		*buf,
 | |
| 	size_t			count,
 | |
| 	loff_t			pos)
 | |
| {
 | |
| 	struct inode		*inode = file_inode(xf->file);
 | |
| 	unsigned int		pflags;
 | |
| 
 | |
| 	if (count > MAX_RW_COUNT)
 | |
| 		return -ENOMEM;
 | |
| 	if (inode->i_sb->s_maxbytes - pos < count)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	trace_xfile_store(xf, pos, count);
 | |
| 
 | |
| 	/*
 | |
| 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
 | |
| 	 * actually allocates a folio instead of erroring out.
 | |
| 	 */
 | |
| 	if (pos + count > i_size_read(inode))
 | |
| 		i_size_write(inode, pos + count);
 | |
| 
 | |
| 	pflags = memalloc_nofs_save();
 | |
| 	while (count > 0) {
 | |
| 		struct folio	*folio;
 | |
| 		unsigned int	len;
 | |
| 		unsigned int	offset;
 | |
| 
 | |
| 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 | |
| 				SGP_CACHE) < 0)
 | |
| 			break;
 | |
| 		if (filemap_check_wb_err(inode->i_mapping, 0)) {
 | |
| 			folio_unlock(folio);
 | |
| 			folio_put(folio);
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		offset = offset_in_folio(folio, pos);
 | |
| 		len = min_t(ssize_t, count, folio_size(folio) - offset);
 | |
| 		memcpy(folio_address(folio) + offset, buf, len);
 | |
| 
 | |
| 		folio_mark_dirty(folio);
 | |
| 		folio_unlock(folio);
 | |
| 		folio_put(folio);
 | |
| 
 | |
| 		count -= len;
 | |
| 		pos += len;
 | |
| 		buf += len;
 | |
| 	}
 | |
| 	memalloc_nofs_restore(pflags);
 | |
| 
 | |
| 	if (count)
 | |
| 		return -ENOMEM;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /* Find the next written area in the xfile data for a given offset. */
 | |
| loff_t
 | |
| xfile_seek_data(
 | |
| 	struct xfile		*xf,
 | |
| 	loff_t			pos)
 | |
| {
 | |
| 	loff_t			ret;
 | |
| 
 | |
| 	ret = vfs_llseek(xf->file, pos, SEEK_DATA);
 | |
| 	trace_xfile_seek_data(xf, pos, ret);
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Grab the (locked) folio for a memory object.  The object cannot span a folio
 | |
|  * boundary.  Returns the locked folio if successful, NULL if there was no
 | |
|  * folio or it didn't cover the range requested, or an ERR_PTR on failure.
 | |
|  */
 | |
| struct folio *
 | |
| xfile_get_folio(
 | |
| 	struct xfile		*xf,
 | |
| 	loff_t			pos,
 | |
| 	size_t			len,
 | |
| 	unsigned int		flags)
 | |
| {
 | |
| 	struct inode		*inode = file_inode(xf->file);
 | |
| 	struct folio		*folio = NULL;
 | |
| 	unsigned int		pflags;
 | |
| 	int			error;
 | |
| 
 | |
| 	if (inode->i_sb->s_maxbytes - pos < len)
 | |
| 		return ERR_PTR(-ENOMEM);
 | |
| 
 | |
| 	trace_xfile_get_folio(xf, pos, len);
 | |
| 
 | |
| 	/*
 | |
| 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
 | |
| 	 * actually allocates a folio instead of erroring out.
 | |
| 	 */
 | |
| 	if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
 | |
| 		i_size_write(inode, pos + len);
 | |
| 
 | |
| 	pflags = memalloc_nofs_save();
 | |
| 	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
 | |
| 			(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
 | |
| 	memalloc_nofs_restore(pflags);
 | |
| 	if (error)
 | |
| 		return ERR_PTR(error);
 | |
| 
 | |
| 	if (!folio)
 | |
| 		return NULL;
 | |
| 
 | |
| 	if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
 | |
| 		folio_unlock(folio);
 | |
| 		folio_put(folio);
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (filemap_check_wb_err(inode->i_mapping, 0)) {
 | |
| 		folio_unlock(folio);
 | |
| 		folio_put(folio);
 | |
| 		return ERR_PTR(-EIO);
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Mark the folio dirty so that it won't be reclaimed once we drop the
 | |
| 	 * (potentially last) reference in xfile_put_folio.
 | |
| 	 */
 | |
| 	if (flags & XFILE_ALLOC)
 | |
| 		folio_mark_dirty(folio);
 | |
| 	return folio;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Release the (locked) folio for a memory object.
 | |
|  */
 | |
| void
 | |
| xfile_put_folio(
 | |
| 	struct xfile		*xf,
 | |
| 	struct folio		*folio)
 | |
| {
 | |
| 	trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
 | |
| 
 | |
| 	folio_unlock(folio);
 | |
| 	folio_put(folio);
 | |
| }
 | |
| 
 | |
| /* Discard the page cache that's backing a range of the xfile. */
 | |
| void
 | |
| xfile_discard(
 | |
| 	struct xfile		*xf,
 | |
| 	loff_t			pos,
 | |
| 	u64			count)
 | |
| {
 | |
| 	trace_xfile_discard(xf, pos, count);
 | |
| 
 | |
| 	shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
 | |
| }
 |