Import of kernel-5.14.0-570.26.1.el9_6
This commit is contained in:
parent
f8f6ce0741
commit
bbc13f3f35
@ -12,7 +12,7 @@ RHEL_MINOR = 6
|
||||
#
|
||||
# Use this spot to avoid future merge conflicts.
|
||||
# Do not trim this comment.
|
||||
RHEL_RELEASE = 570.25.1
|
||||
RHEL_RELEASE = 570.26.1
|
||||
|
||||
#
|
||||
# ZSTREAM
|
||||
|
@ -97,6 +97,7 @@ config ARM64
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
|
||||
select ARCH_SUPPORTS_PER_VMA_LOCK
|
||||
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
|
||||
select ARCH_SUPPORTS_RT
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
|
||||
|
@ -354,6 +354,7 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
|
||||
/*
|
||||
* Select all bits except the pfn
|
||||
*/
|
||||
#define pte_pgprot pte_pgprot
|
||||
static inline pgprot_t pte_pgprot(pte_t pte)
|
||||
{
|
||||
unsigned long pfn = pte_pfn(pte);
|
||||
@ -527,6 +528,14 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
|
||||
return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
|
||||
#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL)))
|
||||
static inline pmd_t pmd_mkspecial(pmd_t pmd)
|
||||
{
|
||||
return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
|
||||
}
|
||||
#endif
|
||||
|
||||
#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
|
||||
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
|
||||
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
|
||||
@ -544,6 +553,27 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
|
||||
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
|
||||
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
||||
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
|
||||
#define pud_special(pte) pte_special(pud_pte(pud))
|
||||
#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud)))
|
||||
#endif
|
||||
|
||||
#define pmd_pgprot pmd_pgprot
|
||||
static inline pgprot_t pmd_pgprot(pmd_t pmd)
|
||||
{
|
||||
unsigned long pfn = pmd_pfn(pmd);
|
||||
|
||||
return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
|
||||
}
|
||||
|
||||
#define pud_pgprot pud_pgprot
|
||||
static inline pgprot_t pud_pgprot(pud_t pud)
|
||||
{
|
||||
unsigned long pfn = pud_pfn(pud);
|
||||
|
||||
return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
|
||||
}
|
||||
|
||||
static inline void __set_pte_at(struct mm_struct *mm,
|
||||
unsigned long __always_unused addr,
|
||||
pte_t *ptep, pte_t pte, unsigned int nr)
|
||||
|
@ -396,33 +396,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
|
||||
#define __flush_tlb_range_op(op, start, pages, stride, \
|
||||
asid, tlb_level, tlbi_user, lpa2) \
|
||||
do { \
|
||||
typeof(start) __flush_start = start; \
|
||||
typeof(pages) __flush_pages = pages; \
|
||||
int num = 0; \
|
||||
int scale = 3; \
|
||||
int shift = lpa2 ? 16 : PAGE_SHIFT; \
|
||||
unsigned long addr; \
|
||||
\
|
||||
while (pages > 0) { \
|
||||
while (__flush_pages > 0) { \
|
||||
if (!system_supports_tlb_range() || \
|
||||
pages == 1 || \
|
||||
(lpa2 && start != ALIGN(start, SZ_64K))) { \
|
||||
addr = __TLBI_VADDR(start, asid); \
|
||||
__flush_pages == 1 || \
|
||||
(lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
|
||||
addr = __TLBI_VADDR(__flush_start, asid); \
|
||||
__tlbi_level(op, addr, tlb_level); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user_level(op, addr, tlb_level); \
|
||||
start += stride; \
|
||||
pages -= stride >> PAGE_SHIFT; \
|
||||
__flush_start += stride; \
|
||||
__flush_pages -= stride >> PAGE_SHIFT; \
|
||||
continue; \
|
||||
} \
|
||||
\
|
||||
num = __TLBI_RANGE_NUM(pages, scale); \
|
||||
num = __TLBI_RANGE_NUM(__flush_pages, scale); \
|
||||
if (num >= 0) { \
|
||||
addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
|
||||
addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
|
||||
scale, num, tlb_level); \
|
||||
__tlbi(r##op, addr); \
|
||||
if (tlbi_user) \
|
||||
__tlbi_user(r##op, addr); \
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale); \
|
||||
__flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
|
||||
__flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
|
||||
} \
|
||||
scale--; \
|
||||
} \
|
||||
|
@ -44,6 +44,7 @@ static inline unsigned long pte_pfn(pte_t pte)
|
||||
/*
|
||||
* Select all bits except the pfn
|
||||
*/
|
||||
#define pte_pgprot pte_pgprot
|
||||
static inline pgprot_t pte_pgprot(pte_t pte)
|
||||
{
|
||||
unsigned long pte_flags;
|
||||
|
@ -941,6 +941,7 @@ static inline int pte_unused(pte_t pte)
|
||||
* young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
|
||||
* must not be set.
|
||||
*/
|
||||
#define pte_pgprot pte_pgprot
|
||||
static inline pgprot_t pte_pgprot(pte_t pte)
|
||||
{
|
||||
unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
|
||||
|
@ -118,12 +118,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
|
||||
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
|
||||
const void __user *, user_buffer, size_t, length)
|
||||
{
|
||||
struct follow_pfnmap_args args = { };
|
||||
u8 local_buf[64];
|
||||
void __iomem *io_addr;
|
||||
void *buf;
|
||||
struct vm_area_struct *vma;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
long ret;
|
||||
|
||||
if (!zpci_is_enabled())
|
||||
@ -169,11 +168,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto out_unlock_mmap;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
|
||||
args.address = mmio_addr;
|
||||
args.vma = vma;
|
||||
ret = follow_pfnmap_start(&args);
|
||||
if (ret)
|
||||
goto out_unlock_mmap;
|
||||
|
||||
io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
|
||||
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
|
||||
(mmio_addr & ~PAGE_MASK));
|
||||
|
||||
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
|
||||
@ -181,7 +182,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
|
||||
|
||||
ret = zpci_memcpy_toio(io_addr, buf, length);
|
||||
out_unlock_pt:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
follow_pfnmap_end(&args);
|
||||
out_unlock_mmap:
|
||||
mmap_read_unlock(current->mm);
|
||||
out_free:
|
||||
@ -260,12 +261,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
|
||||
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
|
||||
void __user *, user_buffer, size_t, length)
|
||||
{
|
||||
struct follow_pfnmap_args args = { };
|
||||
u8 local_buf[64];
|
||||
void __iomem *io_addr;
|
||||
void *buf;
|
||||
struct vm_area_struct *vma;
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
long ret;
|
||||
|
||||
if (!zpci_is_enabled())
|
||||
@ -308,11 +308,13 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto out_unlock_mmap;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
|
||||
args.vma = vma;
|
||||
args.address = mmio_addr;
|
||||
ret = follow_pfnmap_start(&args);
|
||||
if (ret)
|
||||
goto out_unlock_mmap;
|
||||
|
||||
io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
|
||||
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
|
||||
(mmio_addr & ~PAGE_MASK));
|
||||
|
||||
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
|
||||
@ -322,7 +324,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
|
||||
ret = zpci_memcpy_fromio(buf, io_addr, length);
|
||||
|
||||
out_unlock_pt:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
follow_pfnmap_end(&args);
|
||||
out_unlock_mmap:
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
|
@ -782,6 +782,7 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
return __pmd(pte_val(pte));
|
||||
}
|
||||
|
||||
#define pmd_pgprot pmd_pgprot
|
||||
static inline pgprot_t pmd_pgprot(pmd_t entry)
|
||||
{
|
||||
unsigned long val = pmd_val(entry);
|
||||
|
@ -28,6 +28,7 @@ config X86_64
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
|
||||
select ARCH_SUPPORTS_PER_VMA_LOCK
|
||||
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
|
||||
select ARCH_SUPPORTS_RT
|
||||
select HAVE_ARCH_SOFT_DIRTY
|
||||
select MODULES_USE_ELF_RELA
|
||||
|
@ -121,6 +121,34 @@ extern pmdval_t early_pmd_flags;
|
||||
#define arch_end_context_switch(prev) do {} while(0)
|
||||
#endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return native_make_pmd(v | set);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return native_make_pmd(v & ~clear);
|
||||
}
|
||||
|
||||
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return native_make_pud(v | set);
|
||||
}
|
||||
|
||||
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return native_make_pud(v & ~clear);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following only work if pte_present() is true.
|
||||
* Undefined behaviour if not..
|
||||
@ -310,6 +338,30 @@ static inline int pud_devmap(pud_t pud)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
|
||||
static inline bool pmd_special(pmd_t pmd)
|
||||
{
|
||||
return pmd_flags(pmd) & _PAGE_SPECIAL;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkspecial(pmd_t pmd)
|
||||
{
|
||||
return pmd_set_flags(pmd, _PAGE_SPECIAL);
|
||||
}
|
||||
#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
|
||||
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
|
||||
static inline bool pud_special(pud_t pud)
|
||||
{
|
||||
return pud_flags(pud) & _PAGE_SPECIAL;
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkspecial(pud_t pud)
|
||||
{
|
||||
return pud_set_flags(pud, _PAGE_SPECIAL);
|
||||
}
|
||||
#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
|
||||
|
||||
static inline int pgd_devmap(pgd_t pgd)
|
||||
{
|
||||
return 0;
|
||||
@ -480,20 +532,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
|
||||
return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return native_make_pmd(v | set);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
|
||||
{
|
||||
pmdval_t v = native_pmd_val(pmd);
|
||||
|
||||
return native_make_pmd(v & ~clear);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
|
||||
{
|
||||
@ -588,20 +626,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
|
||||
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
|
||||
#define pmd_mkwrite pmd_mkwrite
|
||||
|
||||
static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return native_make_pud(v | set);
|
||||
}
|
||||
|
||||
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
|
||||
{
|
||||
pudval_t v = native_pud_val(pud);
|
||||
|
||||
return native_make_pud(v & ~clear);
|
||||
}
|
||||
|
||||
/* See comments above mksaveddirty_shift() */
|
||||
static inline pud_t pud_mksaveddirty(pud_t pud)
|
||||
{
|
||||
|
@ -834,7 +834,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
|
||||
return ret;
|
||||
}
|
||||
|
||||
for_each_node(nid) {
|
||||
for_each_node_with_cpus(nid) {
|
||||
cpu = cpumask_first(cpumask_of_node(nid));
|
||||
c = &cpu_data(cpu);
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
@ -947,6 +948,26 @@ static void free_pfn_range(u64 paddr, unsigned long size)
|
||||
memtype_free(paddr, paddr + size);
|
||||
}
|
||||
|
||||
static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
|
||||
resource_size_t *phys)
|
||||
{
|
||||
struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
|
||||
|
||||
if (follow_pfnmap_start(&args))
|
||||
return -EINVAL;
|
||||
|
||||
/* Never return PFNs of anon folios in COW mappings. */
|
||||
if (!args.special) {
|
||||
follow_pfnmap_end(&args);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*prot = pgprot_val(args.pgprot);
|
||||
*phys = (resource_size_t)args.pfn << PAGE_SHIFT;
|
||||
follow_pfnmap_end(&args);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
|
||||
pgprot_t *pgprot)
|
||||
{
|
||||
@ -964,7 +985,7 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
|
||||
* detect the PFN. If we need the cachemode as well, we're out of luck
|
||||
* for now and have to fail fork().
|
||||
*/
|
||||
if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
|
||||
if (!follow_phys(vma, &prot, paddr)) {
|
||||
if (pgprot)
|
||||
*pgprot = __pgprot(prot);
|
||||
return 0;
|
||||
|
@ -1076,6 +1076,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
|
||||
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -1072,6 +1072,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
|
||||
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -1079,6 +1079,8 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
CONFIG_THP_SWAP=y
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -1075,6 +1075,8 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
CONFIG_THP_SWAP=y
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -1138,6 +1138,9 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
CONFIG_THP_SWAP=y
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PUD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -1133,6 +1133,9 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
|
||||
# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
|
||||
CONFIG_THP_SWAP=y
|
||||
# CONFIG_READ_ONLY_THP_FOR_FS is not set
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_ARCH_SUPPORTS_PUD_PFNMAP=y
|
||||
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
|
||||
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
|
||||
CONFIG_USE_PERCPU_NUMA_NODE_ID=y
|
||||
|
@ -600,6 +600,9 @@ static bool turbo_is_disabled(void)
|
||||
{
|
||||
u64 misc_en;
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_IDA))
|
||||
return true;
|
||||
|
||||
rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
|
||||
|
||||
return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
@ -57,11 +58,6 @@ struct vfio_pci_vf_token {
|
||||
int users;
|
||||
};
|
||||
|
||||
struct vfio_pci_mmap_vma {
|
||||
struct vm_area_struct *vma;
|
||||
struct list_head vma_next;
|
||||
};
|
||||
|
||||
static inline bool vfio_vga_disabled(void)
|
||||
{
|
||||
#ifdef CONFIG_VFIO_PCI_VGA
|
||||
@ -1610,100 +1606,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_pci_core_write);
|
||||
|
||||
/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
|
||||
static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)
|
||||
static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
|
||||
{
|
||||
struct vfio_pci_mmap_vma *mmap_vma, *tmp;
|
||||
struct vfio_device *core_vdev = &vdev->vdev;
|
||||
loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
|
||||
loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
|
||||
loff_t len = end - start;
|
||||
|
||||
/*
|
||||
* Lock ordering:
|
||||
* vma_lock is nested under mmap_lock for vm_ops callback paths.
|
||||
* The memory_lock semaphore is used by both code paths calling
|
||||
* into this function to zap vmas and the vm_ops.fault callback
|
||||
* to protect the memory enable state of the device.
|
||||
*
|
||||
* When zapping vmas we need to maintain the mmap_lock => vma_lock
|
||||
* ordering, which requires using vma_lock to walk vma_list to
|
||||
* acquire an mm, then dropping vma_lock to get the mmap_lock and
|
||||
* reacquiring vma_lock. This logic is derived from similar
|
||||
* requirements in uverbs_user_mmap_disassociate().
|
||||
*
|
||||
* mmap_lock must always be the top-level lock when it is taken.
|
||||
* Therefore we can only hold the memory_lock write lock when
|
||||
* vma_list is empty, as we'd need to take mmap_lock to clear
|
||||
* entries. vma_list can only be guaranteed empty when holding
|
||||
* vma_lock, thus memory_lock is nested under vma_lock.
|
||||
*
|
||||
* This enables the vm_ops.fault callback to acquire vma_lock,
|
||||
* followed by memory_lock read lock, while already holding
|
||||
* mmap_lock without risk of deadlock.
|
||||
*/
|
||||
while (1) {
|
||||
struct mm_struct *mm = NULL;
|
||||
|
||||
if (try) {
|
||||
if (!mutex_trylock(&vdev->vma_lock))
|
||||
return 0;
|
||||
} else {
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
}
|
||||
while (!list_empty(&vdev->vma_list)) {
|
||||
mmap_vma = list_first_entry(&vdev->vma_list,
|
||||
struct vfio_pci_mmap_vma,
|
||||
vma_next);
|
||||
mm = mmap_vma->vma->vm_mm;
|
||||
if (mmget_not_zero(mm))
|
||||
break;
|
||||
|
||||
list_del(&mmap_vma->vma_next);
|
||||
kfree(mmap_vma);
|
||||
mm = NULL;
|
||||
}
|
||||
if (!mm)
|
||||
return 1;
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
|
||||
if (try) {
|
||||
if (!mmap_read_trylock(mm)) {
|
||||
mmput(mm);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
mmap_read_lock(mm);
|
||||
}
|
||||
if (try) {
|
||||
if (!mutex_trylock(&vdev->vma_lock)) {
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
}
|
||||
list_for_each_entry_safe(mmap_vma, tmp,
|
||||
&vdev->vma_list, vma_next) {
|
||||
struct vm_area_struct *vma = mmap_vma->vma;
|
||||
|
||||
if (vma->vm_mm != mm)
|
||||
continue;
|
||||
|
||||
list_del(&mmap_vma->vma_next);
|
||||
kfree(mmap_vma);
|
||||
|
||||
zap_vma_ptes(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start);
|
||||
}
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
|
||||
}
|
||||
|
||||
void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
|
||||
{
|
||||
vfio_pci_zap_and_vma_lock(vdev, false);
|
||||
down_write(&vdev->memory_lock);
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
vfio_pci_zap_bars(vdev);
|
||||
}
|
||||
|
||||
u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
|
||||
@ -1725,100 +1641,83 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c
|
||||
up_write(&vdev->memory_lock);
|
||||
}
|
||||
|
||||
/* Caller holds vma_lock */
|
||||
static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct vfio_pci_mmap_vma *mmap_vma;
|
||||
|
||||
mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
|
||||
if (!mmap_vma)
|
||||
return -ENOMEM;
|
||||
|
||||
mmap_vma->vma = vma;
|
||||
list_add(&mmap_vma->vma_next, &vdev->vma_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zap mmaps on open so that we can fault them in on access and therefore
|
||||
* our vma_list only tracks mappings accessed since last zap.
|
||||
*/
|
||||
static void vfio_pci_mmap_open(struct vm_area_struct *vma)
|
||||
{
|
||||
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
|
||||
}
|
||||
|
||||
static void vfio_pci_mmap_close(struct vm_area_struct *vma)
|
||||
static unsigned long vma_to_pfn(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vfio_pci_core_device *vdev = vma->vm_private_data;
|
||||
struct vfio_pci_mmap_vma *mmap_vma;
|
||||
int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
|
||||
u64 pgoff;
|
||||
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
|
||||
if (mmap_vma->vma == vma) {
|
||||
list_del(&mmap_vma->vma_next);
|
||||
kfree(mmap_vma);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
pgoff = vma->vm_pgoff &
|
||||
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
|
||||
|
||||
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
|
||||
}
|
||||
|
||||
static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
|
||||
static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
|
||||
unsigned int order)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct vfio_pci_core_device *vdev = vma->vm_private_data;
|
||||
struct vfio_pci_mmap_vma *mmap_vma;
|
||||
vm_fault_t ret = VM_FAULT_NOPAGE;
|
||||
unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
|
||||
unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
|
||||
unsigned long pfn = vma_to_pfn(vma) + pgoff;
|
||||
vm_fault_t ret = VM_FAULT_SIGBUS;
|
||||
|
||||
if (order && (addr < vma->vm_start ||
|
||||
addr + (PAGE_SIZE << order) > vma->vm_end ||
|
||||
pfn & ((1 << order) - 1))) {
|
||||
ret = VM_FAULT_FALLBACK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
down_read(&vdev->memory_lock);
|
||||
|
||||
/*
|
||||
* Memory region cannot be accessed if the low power feature is engaged
|
||||
* or memory access is disabled.
|
||||
*/
|
||||
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto up_out;
|
||||
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
|
||||
goto out_unlock;
|
||||
|
||||
switch (order) {
|
||||
case 0:
|
||||
ret = vmf_insert_pfn(vma, vmf->address, pfn);
|
||||
break;
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
|
||||
case PMD_ORDER:
|
||||
ret = vmf_insert_pfn_pmd(vmf,
|
||||
__pfn_to_pfn_t(pfn, PFN_DEV), false);
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
|
||||
case PUD_ORDER:
|
||||
ret = vmf_insert_pfn_pud(vmf,
|
||||
__pfn_to_pfn_t(pfn, PFN_DEV), false);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
ret = VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
/*
|
||||
* We populate the whole vma on fault, so we need to test whether
|
||||
* the vma has already been mapped, such as for concurrent faults
|
||||
* to the same vma. io_remap_pfn_range() will trigger a BUG_ON if
|
||||
* we ask it to fill the same range again.
|
||||
*/
|
||||
list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
|
||||
if (mmap_vma->vma == vma)
|
||||
goto up_out;
|
||||
}
|
||||
|
||||
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
|
||||
vma->vm_end - vma->vm_start,
|
||||
vma->vm_page_prot)) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
|
||||
goto up_out;
|
||||
}
|
||||
|
||||
if (__vfio_pci_add_vma(vdev, vma)) {
|
||||
ret = VM_FAULT_OOM;
|
||||
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
|
||||
}
|
||||
|
||||
up_out:
|
||||
out_unlock:
|
||||
up_read(&vdev->memory_lock);
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
out:
|
||||
dev_dbg_ratelimited(&vdev->pdev->dev,
|
||||
"%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
|
||||
__func__, order,
|
||||
vma->vm_pgoff >>
|
||||
(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
|
||||
pgoff, (unsigned int)ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
|
||||
{
|
||||
return vfio_pci_mmap_huge_fault(vmf, 0);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct vfio_pci_mmap_ops = {
|
||||
.open = vfio_pci_mmap_open,
|
||||
.close = vfio_pci_mmap_close,
|
||||
.fault = vfio_pci_mmap_fault,
|
||||
.fault = vfio_pci_mmap_page_fault,
|
||||
#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
|
||||
.huge_fault = vfio_pci_mmap_huge_fault,
|
||||
#endif
|
||||
};
|
||||
|
||||
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
|
||||
@ -1880,11 +1779,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
|
||||
|
||||
vma->vm_private_data = vdev;
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
|
||||
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
|
||||
|
||||
/*
|
||||
* See remap_pfn_range(), called from vfio_pci_fault() but we can't
|
||||
* change vm_flags within the fault handler. Set them now.
|
||||
* Set vm_flags now, they should not be changed in the fault handler.
|
||||
* We want the same flags and page protection (decrypted above) as
|
||||
* io_remap_pfn_range() would set.
|
||||
*
|
||||
* VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
|
||||
* allowing KVM stage 2 device mapping attributes to use Normal-NC
|
||||
@ -2202,8 +2102,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
|
||||
mutex_init(&vdev->ioeventfds_lock);
|
||||
INIT_LIST_HEAD(&vdev->dummy_resources_list);
|
||||
INIT_LIST_HEAD(&vdev->ioeventfds_list);
|
||||
mutex_init(&vdev->vma_lock);
|
||||
INIT_LIST_HEAD(&vdev->vma_list);
|
||||
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
|
||||
init_rwsem(&vdev->memory_lock);
|
||||
xa_init(&vdev->ctx);
|
||||
@ -2219,7 +2117,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
|
||||
|
||||
mutex_destroy(&vdev->igate);
|
||||
mutex_destroy(&vdev->ioeventfds_lock);
|
||||
mutex_destroy(&vdev->vma_lock);
|
||||
kfree(vdev->region);
|
||||
kfree(vdev->pm_save);
|
||||
}
|
||||
@ -2497,26 +2394,15 @@ unwind:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to get memory_lock for each device, but devices can share mmap_lock,
|
||||
* therefore we need to zap and hold the vma_lock for each device, and only then
|
||||
* get each memory_lock.
|
||||
*/
|
||||
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
|
||||
struct vfio_pci_group_info *groups,
|
||||
struct iommufd_ctx *iommufd_ctx)
|
||||
{
|
||||
struct vfio_pci_core_device *cur_mem;
|
||||
struct vfio_pci_core_device *cur_vma;
|
||||
struct vfio_pci_core_device *cur;
|
||||
struct vfio_pci_core_device *vdev;
|
||||
struct pci_dev *pdev;
|
||||
bool is_mem = true;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&dev_set->lock);
|
||||
cur_mem = list_first_entry(&dev_set->device_list,
|
||||
struct vfio_pci_core_device,
|
||||
vdev.dev_set_list);
|
||||
|
||||
pdev = vfio_pci_dev_set_resettable(dev_set);
|
||||
if (!pdev) {
|
||||
@ -2533,7 +2419,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
|
||||
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) {
|
||||
bool owned;
|
||||
|
||||
/*
|
||||
@ -2557,38 +2443,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
|
||||
* Otherwise, reset is not allowed.
|
||||
*/
|
||||
if (iommufd_ctx) {
|
||||
int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev,
|
||||
int devid = vfio_iommufd_get_dev_id(&vdev->vdev,
|
||||
iommufd_ctx);
|
||||
|
||||
owned = (devid > 0 || devid == -ENOENT);
|
||||
} else {
|
||||
owned = vfio_dev_in_groups(&cur_vma->vdev, groups);
|
||||
owned = vfio_dev_in_groups(&vdev->vdev, groups);
|
||||
}
|
||||
|
||||
if (!owned) {
|
||||
ret = -EINVAL;
|
||||
goto err_undo;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking multiple devices is prone to deadlock, runaway and
|
||||
* unwind if we hit contention.
|
||||
* Take the memory write lock for each device and zap BAR
|
||||
* mappings to prevent the user accessing the device while in
|
||||
* reset. Locking multiple devices is prone to deadlock,
|
||||
* runaway and unwind if we hit contention.
|
||||
*/
|
||||
if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
|
||||
if (!down_write_trylock(&vdev->memory_lock)) {
|
||||
ret = -EBUSY;
|
||||
goto err_undo;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cur_vma = NULL;
|
||||
|
||||
list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
|
||||
if (!down_write_trylock(&cur_mem->memory_lock)) {
|
||||
ret = -EBUSY;
|
||||
goto err_undo;
|
||||
}
|
||||
mutex_unlock(&cur_mem->vma_lock);
|
||||
vfio_pci_zap_bars(vdev);
|
||||
}
|
||||
|
||||
if (!list_entry_is_head(vdev,
|
||||
&dev_set->device_list, vdev.dev_set_list)) {
|
||||
vdev = list_prev_entry(vdev, vdev.dev_set_list);
|
||||
goto err_undo;
|
||||
}
|
||||
cur_mem = NULL;
|
||||
|
||||
/*
|
||||
* The pci_reset_bus() will reset all the devices in the bus.
|
||||
@ -2599,25 +2485,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
|
||||
* cause the PCI config space reset without restoring the original
|
||||
* state (saved locally in 'vdev->pm_save').
|
||||
*/
|
||||
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
|
||||
vfio_pci_set_power_state(cur, PCI_D0);
|
||||
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
|
||||
vfio_pci_set_power_state(vdev, PCI_D0);
|
||||
|
||||
ret = pci_reset_bus(pdev);
|
||||
|
||||
err_undo:
|
||||
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
|
||||
if (cur == cur_mem)
|
||||
is_mem = false;
|
||||
if (cur == cur_vma)
|
||||
break;
|
||||
if (is_mem)
|
||||
up_write(&cur->memory_lock);
|
||||
else
|
||||
mutex_unlock(&cur->vma_lock);
|
||||
}
|
||||
vdev = list_last_entry(&dev_set->device_list,
|
||||
struct vfio_pci_core_device, vdev.dev_set_list);
|
||||
|
||||
err_undo:
|
||||
list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
|
||||
vdev.dev_set_list)
|
||||
up_write(&vdev->memory_lock);
|
||||
|
||||
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
|
||||
pm_runtime_put(&vdev->pdev->dev);
|
||||
|
||||
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
|
||||
pm_runtime_put(&cur->pdev->dev);
|
||||
err_unlock:
|
||||
mutex_unlock(&dev_set->lock);
|
||||
return ret;
|
||||
|
@ -105,9 +105,9 @@ struct vfio_dma {
|
||||
struct vfio_batch {
|
||||
struct page **pages; /* for pin_user_pages_remote */
|
||||
struct page *fallback_page; /* if pages alloc fails */
|
||||
int capacity; /* length of pages array */
|
||||
int size; /* of batch currently */
|
||||
int offset; /* of next entry in pages */
|
||||
unsigned int capacity; /* length of pages array */
|
||||
unsigned int size; /* of batch currently */
|
||||
unsigned int offset; /* of next entry in pages */
|
||||
};
|
||||
|
||||
struct vfio_iommu_group {
|
||||
@ -474,12 +474,12 @@ static int put_pfn(unsigned long pfn, int prot)
|
||||
|
||||
#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
|
||||
|
||||
static void vfio_batch_init(struct vfio_batch *batch)
|
||||
static void __vfio_batch_init(struct vfio_batch *batch, bool single)
|
||||
{
|
||||
batch->size = 0;
|
||||
batch->offset = 0;
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
if (single || unlikely(disable_hugepages))
|
||||
goto fallback;
|
||||
|
||||
batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
|
||||
@ -494,6 +494,16 @@ fallback:
|
||||
batch->capacity = 1;
|
||||
}
|
||||
|
||||
static void vfio_batch_init(struct vfio_batch *batch)
|
||||
{
|
||||
__vfio_batch_init(batch, false);
|
||||
}
|
||||
|
||||
static void vfio_batch_init_single(struct vfio_batch *batch)
|
||||
{
|
||||
__vfio_batch_init(batch, true);
|
||||
}
|
||||
|
||||
static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
|
||||
{
|
||||
while (batch->size) {
|
||||
@ -513,14 +523,12 @@ static void vfio_batch_fini(struct vfio_batch *batch)
|
||||
|
||||
static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
|
||||
unsigned long vaddr, unsigned long *pfn,
|
||||
bool write_fault)
|
||||
unsigned long *addr_mask, bool write_fault)
|
||||
{
|
||||
pte_t *ptep;
|
||||
pte_t pte;
|
||||
spinlock_t *ptl;
|
||||
struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
|
||||
int ret;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
|
||||
ret = follow_pfnmap_start(&args);
|
||||
if (ret) {
|
||||
bool unlocked = false;
|
||||
|
||||
@ -534,43 +542,51 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
|
||||
ret = follow_pfnmap_start(&args);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
pte = ptep_get(ptep);
|
||||
|
||||
if (write_fault && !pte_write(pte))
|
||||
if (write_fault && !args.writable) {
|
||||
ret = -EFAULT;
|
||||
else
|
||||
*pfn = pte_pfn(pte);
|
||||
} else {
|
||||
*pfn = args.pfn;
|
||||
*addr_mask = args.addr_mask;
|
||||
}
|
||||
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
follow_pfnmap_end(&args);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the positive number of pfns successfully obtained or a negative
|
||||
* error code.
|
||||
* error code. The initial pfn is stored in the pfn arg. For page-backed
|
||||
* pfns, the provided batch is also updated to indicate the filled pages and
|
||||
* initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
|
||||
* returned initial pfn are provided; subsequent pfns are contiguous.
|
||||
*/
|
||||
static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
|
||||
long npages, int prot, unsigned long *pfn,
|
||||
struct page **pages)
|
||||
static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
|
||||
unsigned long npages, int prot, unsigned long *pfn,
|
||||
struct vfio_batch *batch)
|
||||
{
|
||||
unsigned long pin_pages = min_t(unsigned long, npages, batch->capacity);
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int flags = 0;
|
||||
int ret;
|
||||
long ret;
|
||||
|
||||
if (prot & IOMMU_WRITE)
|
||||
flags |= FOLL_WRITE;
|
||||
|
||||
mmap_read_lock(mm);
|
||||
ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
|
||||
pages, NULL);
|
||||
ret = pin_user_pages_remote(mm, vaddr, pin_pages, flags | FOLL_LONGTERM,
|
||||
batch->pages, NULL);
|
||||
if (ret > 0) {
|
||||
*pfn = page_to_pfn(pages[0]);
|
||||
*pfn = page_to_pfn(batch->pages[0]);
|
||||
batch->size = ret;
|
||||
batch->offset = 0;
|
||||
goto done;
|
||||
} else if (!ret) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
vaddr = untagged_addr_remote(mm, vaddr);
|
||||
@ -579,15 +595,22 @@ retry:
|
||||
vma = vma_lookup(mm, vaddr);
|
||||
|
||||
if (vma && vma->vm_flags & VM_PFNMAP) {
|
||||
ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
|
||||
unsigned long addr_mask;
|
||||
|
||||
ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
|
||||
prot & IOMMU_WRITE);
|
||||
if (ret == -EAGAIN)
|
||||
goto retry;
|
||||
|
||||
if (!ret) {
|
||||
if (is_invalid_reserved_pfn(*pfn))
|
||||
ret = 1;
|
||||
else
|
||||
if (is_invalid_reserved_pfn(*pfn)) {
|
||||
unsigned long epfn;
|
||||
|
||||
epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
|
||||
ret = min_t(long, npages, epfn - *pfn);
|
||||
} else {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
}
|
||||
done:
|
||||
@ -601,7 +624,7 @@ done:
|
||||
* first page and all consecutive pages with the same locking.
|
||||
*/
|
||||
static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
||||
long npage, unsigned long *pfn_base,
|
||||
unsigned long npage, unsigned long *pfn_base,
|
||||
unsigned long limit, struct vfio_batch *batch)
|
||||
{
|
||||
unsigned long pfn;
|
||||
@ -623,32 +646,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
||||
*pfn_base = 0;
|
||||
}
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
npage = 1;
|
||||
|
||||
while (npage) {
|
||||
if (!batch->size) {
|
||||
/* Empty batch, so refill it. */
|
||||
long req_pages = min_t(long, npage, batch->capacity);
|
||||
|
||||
ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
|
||||
&pfn, batch->pages);
|
||||
ret = vaddr_get_pfns(mm, vaddr, npage, dma->prot,
|
||||
&pfn, batch);
|
||||
if (ret < 0)
|
||||
goto unpin_out;
|
||||
|
||||
batch->size = ret;
|
||||
batch->offset = 0;
|
||||
|
||||
if (!*pfn_base) {
|
||||
*pfn_base = pfn;
|
||||
rsvd = is_invalid_reserved_pfn(*pfn_base);
|
||||
}
|
||||
|
||||
/* Handle pfnmap */
|
||||
if (!batch->size) {
|
||||
if (pfn != *pfn_base + pinned || !rsvd)
|
||||
goto out;
|
||||
|
||||
pinned += ret;
|
||||
npage -= ret;
|
||||
vaddr += (PAGE_SIZE * ret);
|
||||
iova += (PAGE_SIZE * ret);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* pfn is preset for the first iteration of this inner loop and
|
||||
* updated at the end to handle a VM_PFNMAP pfn. In that case,
|
||||
* batch->pages isn't valid (there's no struct page), so allow
|
||||
* batch->pages to be touched only when there's more than one
|
||||
* pfn to check, which guarantees the pfns are from a
|
||||
* !VM_PFNMAP vma.
|
||||
* pfn is preset for the first iteration of this inner loop
|
||||
* due to the fact that vaddr_get_pfns() needs to provide the
|
||||
* initial pfn for pfnmaps. Therefore to reduce redundancy,
|
||||
* the next pfn is fetched at the end of the loop.
|
||||
* A PageReserved() page could still qualify as page backed
|
||||
* and rsvd here, and therefore continues to use the batch.
|
||||
*/
|
||||
while (true) {
|
||||
if (pfn != *pfn_base + pinned ||
|
||||
@ -683,21 +716,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
|
||||
|
||||
pfn = page_to_pfn(batch->pages[batch->offset]);
|
||||
}
|
||||
|
||||
if (unlikely(disable_hugepages))
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
ret = vfio_lock_acct(dma, lock_acct, false);
|
||||
|
||||
unpin_out:
|
||||
if (batch->size == 1 && !batch->offset) {
|
||||
/* May be a VM_PFNMAP pfn, which the batch can't remember. */
|
||||
put_pfn(pfn, dma->prot);
|
||||
batch->size = 0;
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
if (pinned && !rsvd) {
|
||||
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
|
||||
@ -712,7 +736,7 @@ unpin_out:
|
||||
}
|
||||
|
||||
static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
|
||||
unsigned long pfn, long npage,
|
||||
unsigned long pfn, unsigned long npage,
|
||||
bool do_accounting)
|
||||
{
|
||||
long unlocked = 0, locked = 0;
|
||||
@ -735,7 +759,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
|
||||
static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
|
||||
unsigned long *pfn_base, bool do_accounting)
|
||||
{
|
||||
struct page *pages[1];
|
||||
struct vfio_batch batch;
|
||||
struct mm_struct *mm;
|
||||
int ret;
|
||||
|
||||
@ -743,7 +767,9 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
|
||||
if (!mmget_not_zero(mm))
|
||||
return -ENODEV;
|
||||
|
||||
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
|
||||
vfio_batch_init_single(&batch);
|
||||
|
||||
ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, &batch);
|
||||
if (ret != 1)
|
||||
goto out;
|
||||
|
||||
@ -762,6 +788,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
|
||||
}
|
||||
|
||||
out:
|
||||
vfio_batch_fini(&batch);
|
||||
mmput(mm);
|
||||
return ret;
|
||||
}
|
||||
|
@ -158,6 +158,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
|
||||
extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
|
||||
extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
|
||||
struct cifsFileInfo **ret_file);
|
||||
extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
|
||||
struct file *file);
|
||||
extern unsigned int smbCalcSize(void *buf);
|
||||
extern int decode_negTokenInit(unsigned char *security_blob, int length,
|
||||
struct TCP_Server_Info *server);
|
||||
|
@ -685,15 +685,23 @@ int cifs_open(struct inode *inode, struct file *file)
|
||||
rc = cifs_get_readable_path(tcon, full_path, &cfile);
|
||||
}
|
||||
if (rc == 0) {
|
||||
if (file->f_flags == cfile->f_flags) {
|
||||
unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
|
||||
unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
|
||||
|
||||
if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) &&
|
||||
(oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) {
|
||||
file->private_data = cfile;
|
||||
spin_lock(&CIFS_I(inode)->deferred_lock);
|
||||
cifs_del_deferred_close(cfile);
|
||||
spin_unlock(&CIFS_I(inode)->deferred_lock);
|
||||
goto use_cache;
|
||||
} else {
|
||||
_cifsFileInfo_put(cfile, true, false);
|
||||
}
|
||||
_cifsFileInfo_put(cfile, true, false);
|
||||
} else {
|
||||
/* hard link on the defeered close file */
|
||||
rc = cifs_get_hardlink_path(tcon, inode, file);
|
||||
if (rc)
|
||||
cifs_close_deferred_file(CIFS_I(inode));
|
||||
}
|
||||
|
||||
if (server->oplocks)
|
||||
@ -1754,6 +1762,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
|
||||
list_move(li, dest);
|
||||
}
|
||||
|
||||
int
|
||||
cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode,
|
||||
struct file *file)
|
||||
{
|
||||
struct cifsFileInfo *open_file = NULL;
|
||||
struct cifsInodeInfo *cinode = CIFS_I(inode);
|
||||
int rc = 0;
|
||||
|
||||
spin_lock(&tcon->open_file_lock);
|
||||
spin_lock(&cinode->open_file_lock);
|
||||
|
||||
list_for_each_entry(open_file, &cinode->openFileList, flist) {
|
||||
if (file->f_flags == open_file->f_flags) {
|
||||
rc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&cinode->open_file_lock);
|
||||
spin_unlock(&tcon->open_file_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void
|
||||
cifs_free_llist(struct list_head *llist)
|
||||
{
|
||||
|
0
include/config/ARCH_SUPPORTS_HUGE_PFNMAP
Normal file
0
include/config/ARCH_SUPPORTS_HUGE_PFNMAP
Normal file
0
include/config/ARCH_SUPPORTS_PMD_PFNMAP
Normal file
0
include/config/ARCH_SUPPORTS_PMD_PFNMAP
Normal file
@ -1369,6 +1369,7 @@ CONFIG_CRYPTO_AES_ARM64_CE=y
|
||||
CONFIG_CORESIGHT_LINK_AND_SINK_TMC=m
|
||||
CONFIG_NFT_FIB_NETDEV=m
|
||||
CONFIG_SERIAL_EARLYCON=y
|
||||
CONFIG_ARCH_SUPPORTS_PMD_PFNMAP=y
|
||||
CONFIG_CLS_U32_MARK=y
|
||||
CONFIG_SND_ICE1712=m
|
||||
CONFIG_GENERIC_IRQ_INJECTION=y
|
||||
@ -1680,6 +1681,7 @@ CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
|
||||
CONFIG_ATH9K_DEBUGFS=y
|
||||
CONFIG_NET_VENDOR_REALTEK=y
|
||||
CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
|
||||
CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP=y
|
||||
CONFIG_NETFILTER_XT_TARGET_HL=m
|
||||
CONFIG_MEGARAID_SAS=m
|
||||
CONFIG_MT792x_LIB=m
|
||||
|
@ -1371,6 +1371,7 @@
|
||||
#define CONFIG_CORESIGHT_LINK_AND_SINK_TMC_MODULE 1
|
||||
#define CONFIG_NFT_FIB_NETDEV_MODULE 1
|
||||
#define CONFIG_SERIAL_EARLYCON 1
|
||||
#define CONFIG_ARCH_SUPPORTS_PMD_PFNMAP 1
|
||||
#define CONFIG_CLS_U32_MARK 1
|
||||
#define CONFIG_SND_ICE1712_MODULE 1
|
||||
#define CONFIG_GENERIC_IRQ_INJECTION 1
|
||||
@ -1682,6 +1683,7 @@
|
||||
#define CONFIG_ATH9K_DEBUGFS 1
|
||||
#define CONFIG_NET_VENDOR_REALTEK 1
|
||||
#define CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 1
|
||||
#define CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP 1
|
||||
#define CONFIG_NETFILTER_XT_TARGET_HL_MODULE 1
|
||||
#define CONFIG_MEGARAID_SAS_MODULE 1
|
||||
#define CONFIG_MT792x_LIB_MODULE 1
|
||||
|
@ -256,11 +256,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
|
||||
return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
|
||||
}
|
||||
|
||||
static inline bool is_huge_zero_pud(pud_t pud)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
struct page *mm_get_huge_zero_page(struct mm_struct *mm);
|
||||
void mm_put_huge_zero_page(struct mm_struct *mm);
|
||||
|
||||
@ -379,11 +374,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_huge_zero_pud(pud_t pud)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void mm_put_huge_zero_page(struct mm_struct *mm)
|
||||
{
|
||||
return;
|
||||
|
@ -2427,15 +2427,42 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
|
||||
unsigned long end, unsigned long floor, unsigned long ceiling);
|
||||
int
|
||||
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
|
||||
int follow_pte(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, spinlock_t **ptlp);
|
||||
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long *pfn);
|
||||
int follow_phys(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned int flags, unsigned long *prot, resource_size_t *phys);
|
||||
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
|
||||
void *buf, int len, int write);
|
||||
|
||||
struct follow_pfnmap_args {
|
||||
/**
|
||||
* Inputs:
|
||||
* @vma: Pointer to @vm_area_struct struct
|
||||
* @address: the virtual address to walk
|
||||
*/
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long address;
|
||||
/**
|
||||
* Internals:
|
||||
*
|
||||
* The caller shouldn't touch any of these.
|
||||
*/
|
||||
spinlock_t *lock;
|
||||
pte_t *ptep;
|
||||
/**
|
||||
* Outputs:
|
||||
*
|
||||
* @pfn: the PFN of the address
|
||||
* @addr_mask: address mask covering pfn
|
||||
* @pgprot: the pgprot_t of the mapping
|
||||
* @writable: whether the mapping is writable
|
||||
* @special: whether the mapping is a special mapping (real PFN maps)
|
||||
*/
|
||||
unsigned long pfn;
|
||||
unsigned long addr_mask;
|
||||
pgprot_t pgprot;
|
||||
bool writable;
|
||||
bool special;
|
||||
};
|
||||
int follow_pfnmap_start(struct follow_pfnmap_args *args);
|
||||
void follow_pfnmap_end(struct follow_pfnmap_args *args);
|
||||
|
||||
extern void truncate_pagecache(struct inode *inode, loff_t new);
|
||||
extern void truncate_setsize(struct inode *inode, loff_t newsize);
|
||||
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
|
||||
@ -2730,6 +2757,30 @@ static inline pte_t pte_mkspecial(pte_t pte)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
|
||||
static inline bool pmd_special(pmd_t pmd)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mkspecial(pmd_t pmd)
|
||||
{
|
||||
return pmd;
|
||||
}
|
||||
#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
|
||||
|
||||
#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
|
||||
static inline bool pud_special(pud_t pud)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline pud_t pud_mkspecial(pud_t pud)
|
||||
{
|
||||
return pud;
|
||||
}
|
||||
#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
|
||||
|
||||
#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
|
||||
static inline int pte_devmap(pte_t pte)
|
||||
{
|
||||
|
@ -1685,6 +1685,18 @@ typedef unsigned int pgtbl_mod_mask;
|
||||
#define MAX_PTRS_PER_P4D PTRS_PER_P4D
|
||||
#endif
|
||||
|
||||
#ifndef pte_pgprot
|
||||
#define pte_pgprot(x) ((pgprot_t) {0})
|
||||
#endif
|
||||
|
||||
#ifndef pmd_pgprot
|
||||
#define pmd_pgprot(x) ((pgprot_t) {0})
|
||||
#endif
|
||||
|
||||
#ifndef pud_pgprot
|
||||
#define pud_pgprot(x) ((pgprot_t) {0})
|
||||
#endif
|
||||
|
||||
/* description of effects of mapping type and prot in current implementation.
|
||||
* this is due to the limited x86 page protection hardware. The expected
|
||||
* behavior is in parens:
|
||||
|
@ -93,8 +93,6 @@ struct vfio_pci_core_device {
|
||||
struct list_head sriov_pfs_item;
|
||||
struct vfio_pci_core_device *sriov_pf_core_dev;
|
||||
struct notifier_block nb;
|
||||
struct mutex vma_lock;
|
||||
struct list_head vma_list;
|
||||
struct rw_semaphore memory_lock;
|
||||
};
|
||||
|
||||
|
13
mm/Kconfig
13
mm/Kconfig
@ -897,6 +897,19 @@ config READ_ONLY_THP_FOR_FS
|
||||
|
||||
endif # TRANSPARENT_HUGEPAGE
|
||||
|
||||
# TODO: Allow to be enabled without THP
|
||||
config ARCH_SUPPORTS_HUGE_PFNMAP
|
||||
def_bool n
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
|
||||
config ARCH_SUPPORTS_PMD_PFNMAP
|
||||
def_bool y
|
||||
depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
|
||||
config ARCH_SUPPORTS_PUD_PFNMAP
|
||||
def_bool y
|
||||
depends on ARCH_SUPPORTS_HUGE_PFNMAP && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
||||
|
||||
#
|
||||
# UP and nommu archs use km based percpu allocator
|
||||
#
|
||||
|
6
mm/gup.c
6
mm/gup.c
@ -2903,6 +2903,9 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
||||
if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
|
||||
return 0;
|
||||
|
||||
if (pmd_special(orig))
|
||||
return 0;
|
||||
|
||||
if (pmd_devmap(orig)) {
|
||||
if (unlikely(flags & FOLL_LONGTERM))
|
||||
return 0;
|
||||
@ -2947,6 +2950,9 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
|
||||
if (!pud_access_permitted(orig, flags & FOLL_WRITE))
|
||||
return 0;
|
||||
|
||||
if (pud_special(orig))
|
||||
return 0;
|
||||
|
||||
if (pud_devmap(orig)) {
|
||||
if (unlikely(flags & FOLL_LONGTERM))
|
||||
return 0;
|
||||
|
@ -860,6 +860,8 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
|
||||
if (pfn_t_devmap(pfn))
|
||||
entry = pmd_mkdevmap(entry);
|
||||
else
|
||||
entry = pmd_mkspecial(entry);
|
||||
if (write) {
|
||||
entry = pmd_mkyoung(pmd_mkdirty(entry));
|
||||
entry = maybe_pmd_mkwrite(entry, vma);
|
||||
@ -943,10 +945,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
|
||||
ptl = pud_lock(mm, pud);
|
||||
if (!pud_none(*pud)) {
|
||||
if (write) {
|
||||
if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
|
||||
WARN_ON_ONCE(!is_huge_zero_pud(*pud));
|
||||
if (WARN_ON_ONCE(pud_pfn(*pud) != pfn_t_to_pfn(pfn)))
|
||||
goto out_unlock;
|
||||
}
|
||||
entry = pud_mkyoung(*pud);
|
||||
entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
|
||||
if (pudp_set_access_flags(vma, addr, pud, entry, 1))
|
||||
@ -958,6 +958,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
|
||||
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
|
||||
if (pfn_t_devmap(pfn))
|
||||
entry = pud_mkdevmap(entry);
|
||||
else
|
||||
entry = pud_mkspecial(entry);
|
||||
if (write) {
|
||||
entry = pud_mkyoung(pud_mkdirty(entry));
|
||||
entry = maybe_pud_mkwrite(entry, vma);
|
||||
@ -1070,6 +1072,24 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
pgtable_t pgtable = NULL;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
pmd = pmdp_get_lockless(src_pmd);
|
||||
if (unlikely(pmd_present(pmd) && pmd_special(pmd))) {
|
||||
dst_ptl = pmd_lock(dst_mm, dst_pmd);
|
||||
src_ptl = pmd_lockptr(src_mm, src_pmd);
|
||||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
/*
|
||||
* No need to recheck the pmd, it can't change with write
|
||||
* mmap lock held here.
|
||||
*
|
||||
* Meanwhile, making sure it's not a CoW VMA with writable
|
||||
* mapping, otherwise it means either the anon page wrongly
|
||||
* applied special bit, or we made the PRIVATE mapping be
|
||||
* able to wrongly write to the backend MMIO.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(is_cow_mapping(src_vma->vm_flags) && pmd_write(pmd));
|
||||
goto set_pmd;
|
||||
}
|
||||
|
||||
/* Skip if can be re-fill on fault */
|
||||
if (!vma_is_anonymous(dst_vma))
|
||||
return 0;
|
||||
@ -1150,7 +1170,9 @@ out_zero_page:
|
||||
pmdp_set_wrprotect(src_mm, addr, src_pmd);
|
||||
if (!userfaultfd_wp(dst_vma))
|
||||
pmd = pmd_clear_uffd_wp(pmd);
|
||||
pmd = pmd_mkold(pmd_wrprotect(pmd));
|
||||
pmd = pmd_wrprotect(pmd);
|
||||
set_pmd:
|
||||
pmd = pmd_mkold(pmd);
|
||||
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
|
||||
|
||||
ret = 0;
|
||||
@ -1235,21 +1257,15 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* When page table lock is held, the huge zero pud should not be
|
||||
* under splitting since we don't split the page itself, only pud to
|
||||
* a page table.
|
||||
*/
|
||||
if (is_huge_zero_pud(pud)) {
|
||||
/* No huge zero pud yet */
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: once we support anonymous pages, use page_try_dup_anon_rmap()
|
||||
* and split if duplicating fails.
|
||||
*/
|
||||
pudp_set_wrprotect(src_mm, addr, src_pud);
|
||||
pud = pud_mkold(pud_wrprotect(pud));
|
||||
if (is_cow_mapping(vma->vm_flags) && pud_write(pud)) {
|
||||
pudp_set_wrprotect(src_mm, addr, src_pud);
|
||||
pud = pud_wrprotect(pud);
|
||||
}
|
||||
pud = pud_mkold(pud);
|
||||
set_pud_at(dst_mm, addr, dst_pud, pud);
|
||||
|
||||
ret = 0;
|
||||
|
253
mm/memory.c
253
mm/memory.c
@ -659,11 +659,10 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
{
|
||||
unsigned long pfn = pmd_pfn(pmd);
|
||||
|
||||
/*
|
||||
* There is no pmd_special() but there may be special pmds, e.g.
|
||||
* in a direct-access (dax) mapping, so let's just replicate the
|
||||
* !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here.
|
||||
*/
|
||||
/* Currently it's only used for huge pfnmaps */
|
||||
if (unlikely(pmd_special(pmd)))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
|
||||
if (vma->vm_flags & VM_MIXEDMAP) {
|
||||
if (!pfn_valid(pfn))
|
||||
@ -5607,130 +5606,159 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
|
||||
}
|
||||
#endif /* __PAGETABLE_PMD_FOLDED */
|
||||
|
||||
static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
|
||||
spinlock_t *lock, pte_t *ptep,
|
||||
pgprot_t pgprot, unsigned long pfn_base,
|
||||
unsigned long addr_mask, bool writable,
|
||||
bool special)
|
||||
{
|
||||
args->lock = lock;
|
||||
args->ptep = ptep;
|
||||
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
|
||||
args->addr_mask = addr_mask;
|
||||
args->pgprot = pgprot;
|
||||
args->writable = writable;
|
||||
args->special = special;
|
||||
}
|
||||
|
||||
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
|
||||
{
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file ? file->f_mapping : NULL;
|
||||
|
||||
if (mapping)
|
||||
lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
|
||||
lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||
else
|
||||
lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* follow_pte - look up PTE at a user virtual address
|
||||
* @mm: the mm_struct of the target address space
|
||||
* @address: user virtual address
|
||||
* @ptepp: location to store found PTE
|
||||
* @ptlp: location to store the lock for the PTE
|
||||
* follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
|
||||
* @args: Pointer to struct @follow_pfnmap_args
|
||||
*
|
||||
* On a successful return, the pointer to the PTE is stored in @ptepp;
|
||||
* the corresponding lock is taken and its location is stored in @ptlp.
|
||||
* The contents of the PTE are only stable until @ptlp is released;
|
||||
* any further use, if any, must be protected against invalidation
|
||||
* with MMU notifiers.
|
||||
* The caller needs to setup args->vma and args->address to point to the
|
||||
* virtual address as the target of such lookup. On a successful return,
|
||||
* the results will be put into other output fields.
|
||||
*
|
||||
* After the caller finished using the fields, the caller must invoke
|
||||
* another follow_pfnmap_end() to proper releases the locks and resources
|
||||
* of such look up request.
|
||||
*
|
||||
* During the start() and end() calls, the results in @args will be valid
|
||||
* as proper locks will be held. After the end() is called, all the fields
|
||||
* in @follow_pfnmap_args will be invalid to be further accessed. Further
|
||||
* use of such information after end() may require proper synchronizations
|
||||
* by the caller with page table updates, otherwise it can create a
|
||||
* security bug.
|
||||
*
|
||||
* If the PTE maps a refcounted page, callers are responsible to protect
|
||||
* against invalidation with MMU notifiers; otherwise access to the PFN at
|
||||
* a later point in time can trigger use-after-free.
|
||||
*
|
||||
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
|
||||
* should be taken for read.
|
||||
* should be taken for read, and the mmap semaphore cannot be released
|
||||
* before the end() is invoked.
|
||||
*
|
||||
* KVM uses this function. While it is arguably less bad than ``follow_pfn``,
|
||||
* it is not a good general-purpose API.
|
||||
* This function must not be used to modify PTE content.
|
||||
*
|
||||
* Return: zero on success, -ve otherwise.
|
||||
* Return: zero on success, negative otherwise.
|
||||
*/
|
||||
int follow_pte(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, spinlock_t **ptlp)
|
||||
int follow_pfnmap_start(struct follow_pfnmap_args *args)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *ptep;
|
||||
struct vm_area_struct *vma = args->vma;
|
||||
unsigned long address = args->address;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
spinlock_t *lock;
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp, p4d;
|
||||
pud_t *pudp, pud;
|
||||
pmd_t *pmdp, pmd;
|
||||
pte_t *ptep, pte;
|
||||
|
||||
pgd = pgd_offset(mm, address);
|
||||
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
|
||||
pfnmap_lockdep_assert(vma);
|
||||
|
||||
if (unlikely(address < vma->vm_start || address >= vma->vm_end))
|
||||
goto out;
|
||||
|
||||
p4d = p4d_offset(pgd, address);
|
||||
if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
goto out;
|
||||
retry:
|
||||
pgdp = pgd_offset(mm, address);
|
||||
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
|
||||
goto out;
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
|
||||
p4dp = p4d_offset(pgdp, address);
|
||||
p4d = READ_ONCE(*p4dp);
|
||||
if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
|
||||
goto out;
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
VM_BUG_ON(pmd_trans_huge(*pmd));
|
||||
pudp = pud_offset(p4dp, address);
|
||||
pud = READ_ONCE(*pudp);
|
||||
if (pud_none(pud))
|
||||
goto out;
|
||||
if (pud_leaf(pud)) {
|
||||
lock = pud_lock(mm, pudp);
|
||||
if (!unlikely(pud_leaf(pud))) {
|
||||
spin_unlock(lock);
|
||||
goto retry;
|
||||
}
|
||||
pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
|
||||
pud_pfn(pud), PUD_MASK, pud_write(pud),
|
||||
pud_special(pud));
|
||||
return 0;
|
||||
}
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
||||
pmdp = pmd_offset(pudp, address);
|
||||
pmd = pmdp_get_lockless(pmdp);
|
||||
if (pmd_leaf(pmd)) {
|
||||
lock = pmd_lock(mm, pmdp);
|
||||
if (!unlikely(pmd_leaf(pmd))) {
|
||||
spin_unlock(lock);
|
||||
goto retry;
|
||||
}
|
||||
pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
|
||||
pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
|
||||
pmd_special(pmd));
|
||||
return 0;
|
||||
}
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
|
||||
if (!ptep)
|
||||
goto out;
|
||||
if (!pte_present(ptep_get(ptep)))
|
||||
pte = ptep_get(ptep);
|
||||
if (!pte_present(pte))
|
||||
goto unlock;
|
||||
*ptepp = ptep;
|
||||
pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
|
||||
pte_pfn(pte), PAGE_MASK, pte_write(pte),
|
||||
pte_special(pte));
|
||||
return 0;
|
||||
unlock:
|
||||
pte_unmap_unlock(ptep, *ptlp);
|
||||
pte_unmap_unlock(ptep, lock);
|
||||
out:
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(follow_pte);
|
||||
EXPORT_SYMBOL_GPL(follow_pfnmap_start);
|
||||
|
||||
/**
|
||||
* follow_pfn - look up PFN at a user virtual address
|
||||
* @vma: memory mapping
|
||||
* @address: user virtual address
|
||||
* @pfn: location to store found PFN
|
||||
* follow_pfnmap_end(): End a follow_pfnmap_start() process
|
||||
* @args: Pointer to struct @follow_pfnmap_args
|
||||
*
|
||||
* Only IO mappings and raw PFN mappings are allowed.
|
||||
*
|
||||
* This function does not allow the caller to read the permissions
|
||||
* of the PTE. Do not use it.
|
||||
*
|
||||
* Return: zero and the pfn at @pfn on success, -ve otherwise.
|
||||
* Must be used in pair of follow_pfnmap_start(). See the start() function
|
||||
* above for more information.
|
||||
*/
|
||||
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long *pfn)
|
||||
void follow_pfnmap_end(struct follow_pfnmap_args *args)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
return ret;
|
||||
|
||||
ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
|
||||
if (ret)
|
||||
return ret;
|
||||
*pfn = pte_pfn(ptep_get(ptep));
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
if (args->lock)
|
||||
spin_unlock(args->lock);
|
||||
if (args->ptep)
|
||||
pte_unmap(args->ptep);
|
||||
}
|
||||
EXPORT_SYMBOL(follow_pfn);
|
||||
EXPORT_SYMBOL_GPL(follow_pfnmap_end);
|
||||
|
||||
#ifdef CONFIG_HAVE_IOREMAP_PROT
|
||||
int follow_phys(struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags,
|
||||
unsigned long *prot, resource_size_t *phys)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
pte_t *ptep, pte;
|
||||
spinlock_t *ptl;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
goto out;
|
||||
|
||||
if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
|
||||
goto out;
|
||||
pte = ptep_get(ptep);
|
||||
|
||||
/* Never return PFNs of anon folios in COW mappings. */
|
||||
if (vm_normal_folio(vma, address, pte))
|
||||
goto unlock;
|
||||
|
||||
if ((flags & FOLL_WRITE) && !pte_write(pte))
|
||||
goto unlock;
|
||||
|
||||
*prot = pgprot_val(pte_pgprot(pte));
|
||||
*phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
|
||||
|
||||
ret = 0;
|
||||
unlock:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_access_phys - generic implementation for iomem mmap access
|
||||
* @vma: the vma to access
|
||||
@ -5749,37 +5777,34 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
|
||||
resource_size_t phys_addr;
|
||||
unsigned long prot = 0;
|
||||
void __iomem *maddr;
|
||||
pte_t *ptep, pte;
|
||||
spinlock_t *ptl;
|
||||
int offset = offset_in_page(addr);
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
return -EINVAL;
|
||||
bool writable;
|
||||
struct follow_pfnmap_args args = { .vma = vma, .address = addr };
|
||||
|
||||
retry:
|
||||
if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
|
||||
if (follow_pfnmap_start(&args))
|
||||
return -EINVAL;
|
||||
pte = ptep_get(ptep);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
prot = pgprot_val(args.pgprot);
|
||||
phys_addr = (resource_size_t)args.pfn << PAGE_SHIFT;
|
||||
writable = args.writable;
|
||||
follow_pfnmap_end(&args);
|
||||
|
||||
prot = pgprot_val(pte_pgprot(pte));
|
||||
phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
|
||||
|
||||
if ((write & FOLL_WRITE) && !pte_write(pte))
|
||||
if ((write & FOLL_WRITE) && !writable)
|
||||
return -EINVAL;
|
||||
|
||||
maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
|
||||
if (!maddr)
|
||||
return -ENOMEM;
|
||||
|
||||
if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
|
||||
if (follow_pfnmap_start(&args))
|
||||
goto out_unmap;
|
||||
|
||||
if (!pte_same(pte, ptep_get(ptep))) {
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
if ((prot != pgprot_val(args.pgprot)) ||
|
||||
(phys_addr != (args.pfn << PAGE_SHIFT)) ||
|
||||
(writable != args.writable)) {
|
||||
follow_pfnmap_end(&args);
|
||||
iounmap(maddr);
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@ -5788,7 +5813,7 @@ retry:
|
||||
else
|
||||
memcpy_fromio(buf, maddr + offset, len);
|
||||
ret = len;
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
follow_pfnmap_end(&args);
|
||||
out_unmap:
|
||||
iounmap(maddr);
|
||||
|
||||
|
21
mm/nommu.c
21
mm/nommu.c
@ -110,27 +110,6 @@ unsigned int kobjsize(const void *objp)
|
||||
return page_size(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* follow_pfn - look up PFN at a user virtual address
|
||||
* @vma: memory mapping
|
||||
* @address: user virtual address
|
||||
* @pfn: location to store found PFN
|
||||
*
|
||||
* Only IO mappings and raw PFN mappings are allowed.
|
||||
*
|
||||
* Returns zero and the pfn at @pfn on success, -ve otherwise.
|
||||
*/
|
||||
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long *pfn)
|
||||
{
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
return -EINVAL;
|
||||
|
||||
*pfn = address >> PAGE_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(follow_pfn);
|
||||
|
||||
LIST_HEAD(vmap_area_list);
|
||||
|
||||
void vfree(const void *addr)
|
||||
|
@ -1,3 +1,45 @@
|
||||
* Sat Jul 05 2025 CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> [5.14.0-570.26.1.el9_6]
|
||||
- x86/microcode/AMD: Fix out-of-bounds on systems with CPU-less NUMA nodes (CKI Backport Bot) [RHEL-98996] {CVE-2025-21991}
|
||||
- cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode (David Arcari) [RHEL-90212]
|
||||
- smb: client: fix perf regression with deferred closes (Paulo Alcantara) [RHEL-97482]
|
||||
- smb3 client: fix open hardlink on deferred close file error (Paulo Alcantara) [RHEL-97482]
|
||||
- Fix mmu notifiers for range-based invalidates (Jay Shin) [RHEL-93743]
|
||||
- vfio/pci: Align huge faults to order (Alex Williamson) [RHEL-88275]
|
||||
- vfio/type1: Use mapping page mask for pfnmaps (Alex Williamson) [RHEL-88275]
|
||||
- mm: Provide address mask in struct follow_pfnmap_args (Alex Williamson) [RHEL-88275]
|
||||
- vfio/type1: Use consistent types for page counts (Alex Williamson) [RHEL-88275]
|
||||
- vfio/type1: Use vfio_batch for vaddr_get_pfns() (Alex Williamson) [RHEL-88275]
|
||||
- vfio/type1: Convert all vaddr_get_pfns() callers to use vfio_batch (Alex Williamson) [RHEL-88275]
|
||||
- vfio/type1: Catch zero from pin_user_pages_remote() (Alex Williamson) [RHEL-88275]
|
||||
- vfio/pci: Fallback huge faults for unaligned pfn (Donald Dutile) [RHEL-85623]
|
||||
- vfio/pci: implement huge_fault support (Donald Dutile) [RHEL-85623]
|
||||
- vfio/pci: Remove unused struct 'vfio_pci_mmap_vma' (Donald Dutile) [RHEL-85623]
|
||||
- vfio/pci: Insert full vma on mmap'd MMIO fault (Donald Dutile) [RHEL-85623]
|
||||
- vfio/pci: Use unmap_mapping_range() (Donald Dutile) [RHEL-85623]
|
||||
- mm/arm64: support large pfn mappings (Donald Dutile) [RHEL-85623]
|
||||
- mm/x86: support large pfn mappings (Donald Dutile) [RHEL-85623]
|
||||
- mm: remove follow_pte() (Donald Dutile) [RHEL-85623]
|
||||
- mm: follow_pte() improvements (Donald Dutile) [RHEL-85623]
|
||||
- mm/access_process_vm: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- vfio: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- mm/x86/pat: use the new follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- s390/pci_mmio: use follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- KVM: use follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- mm: pass VMA instead of MM to follow_pte() (Donald Dutile) [RHEL-85623]
|
||||
- mm: move follow_phys to arch/x86/mm/pat/memtype.c (Donald Dutile) [RHEL-85623]
|
||||
- mm: fix follow_pfnmap API lockdep assert (Donald Dutile) [RHEL-85623]
|
||||
- mm: new follow_pfnmap API (Donald Dutile) [RHEL-85623]
|
||||
- mm: remove follow_pfn (Donald Dutile) [RHEL-85623]
|
||||
- mm: always define pxx_pgprot() (Donald Dutile) [RHEL-85623]
|
||||
- mm/huge_memory: check pmd_special() only after pmd_present() (Donald Dutile) [RHEL-85623]
|
||||
- mm/fork: accept huge pfnmap entries (Donald Dutile) [RHEL-85623]
|
||||
- mm/pagewalk: check pfnmap for folio_walk_start() (Donald Dutile) [RHEL-85623]
|
||||
- mm/gup: detect huge pfnmap entries in gup-fast (Donald Dutile) [RHEL-85623]
|
||||
- mm: mark special bits for huge pfn mappings when inject (Donald Dutile) [RHEL-85623]
|
||||
- mm: drop is_huge_zero_pud() (Donald Dutile) [RHEL-85623]
|
||||
- mm: introduce ARCH_SUPPORTS_HUGE_PFNMAP and special bits to pmd/pud (Donald Dutile) [RHEL-85623]
|
||||
Resolves: RHEL-85623, RHEL-88275, RHEL-90212, RHEL-93743, RHEL-97482, RHEL-98996
|
||||
|
||||
* Sat Jun 28 2025 CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> [5.14.0-570.25.1.el9_6]
|
||||
- udf: Fix a slab-out-of-bounds write bug in udf_find_entry() (CKI Backport Bot) [RHEL-99124] {CVE-2022-49846}
|
||||
- vmxnet3: Fix malformed packet sizing in vmxnet3_process_xdp (CKI Backport Bot) [RHEL-97110] {CVE-2025-37799}
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -2878,13 +2878,11 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
unsigned long addr, bool write_fault,
|
||||
bool *writable, kvm_pfn_t *p_pfn)
|
||||
{
|
||||
struct follow_pfnmap_args args = { .vma = vma, .address = addr };
|
||||
kvm_pfn_t pfn;
|
||||
pte_t *ptep;
|
||||
pte_t pte;
|
||||
spinlock_t *ptl;
|
||||
int r;
|
||||
|
||||
r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
|
||||
r = follow_pfnmap_start(&args);
|
||||
if (r) {
|
||||
/*
|
||||
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
|
||||
@ -2899,21 +2897,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
|
||||
r = follow_pfnmap_start(&args);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
pte = ptep_get(ptep);
|
||||
|
||||
if (write_fault && !pte_write(pte)) {
|
||||
if (write_fault && !args.writable) {
|
||||
pfn = KVM_PFN_ERR_RO_FAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (writable)
|
||||
*writable = pte_write(pte);
|
||||
pfn = pte_pfn(pte);
|
||||
*writable = args.writable;
|
||||
pfn = args.pfn;
|
||||
|
||||
/*
|
||||
* Get a reference here because callers of *hva_to_pfn* and
|
||||
@ -2934,9 +2930,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
|
||||
*/
|
||||
if (!kvm_try_get_pfn(pfn))
|
||||
r = -EFAULT;
|
||||
|
||||
out:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
follow_pfnmap_end(&args);
|
||||
*p_pfn = pfn;
|
||||
|
||||
return r;
|
||||
|
Loading…
Reference in New Issue
Block a user