Import of kernel-4.18.0-553.81.1.el8_10
This commit is contained in:
parent
89b59548e7
commit
b0aa308873
@ -12,7 +12,7 @@ RHEL_MINOR = 10
|
||||
#
|
||||
# Use this spot to avoid future merge conflicts.
|
||||
# Do not trim this comment.
|
||||
RHEL_RELEASE = 553.80.1
|
||||
RHEL_RELEASE = 553.81.1
|
||||
|
||||
#
|
||||
# ZSTREAM
|
||||
|
||||
@ -188,7 +188,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
|
||||
* clear), set the PTE_DIRTY bit.
|
||||
*/
|
||||
if (pte_hw_dirty(pte))
|
||||
pte = pte_mkdirty(pte);
|
||||
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
|
||||
|
||||
pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
|
||||
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
|
||||
@ -675,8 +675,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP;
|
||||
/* preserve the hardware dirty information */
|
||||
if (pte_hw_dirty(pte))
|
||||
pte = pte_mkdirty(pte);
|
||||
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
|
||||
|
||||
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
|
||||
/*
|
||||
* If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
|
||||
* dirtiness again.
|
||||
*/
|
||||
if (pte_sw_dirty(pte))
|
||||
pte = pte_mkdirty(pte);
|
||||
return pte;
|
||||
}
|
||||
|
||||
|
||||
@ -353,9 +353,6 @@ void __init setup_arch(char **cmdline_p)
|
||||
smp_init_cpus();
|
||||
smp_build_mpidr_hash();
|
||||
|
||||
/* Init percpu seeds for random tags after cpus are set up. */
|
||||
kasan_init_sw_tags();
|
||||
|
||||
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
||||
/*
|
||||
* Make sure init_thread_info.ttbr0 always generates translation
|
||||
|
||||
@ -474,6 +474,8 @@ void __init smp_prepare_boot_cpu(void)
|
||||
init_gic_priority_masking();
|
||||
|
||||
kasan_init_hw_tags();
|
||||
/* Init percpu seeds for random tags after cpus are set up. */
|
||||
kasan_init_sw_tags();
|
||||
}
|
||||
|
||||
static u64 __init of_get_cpu_mpidr(struct device_node *dn)
|
||||
|
||||
@ -1434,7 +1434,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
||||
__remove_pgd_mapping(swapper_pg_dir,
|
||||
__phys_to_virt(start), size);
|
||||
else {
|
||||
max_pfn = PFN_UP(start + size);
|
||||
/* Address of hotplugged memory can be smaller */
|
||||
max_pfn = max(max_pfn, PFN_UP(start + size));
|
||||
max_low_pfn = max_pfn;
|
||||
}
|
||||
|
||||
|
||||
@ -68,13 +68,9 @@ static inline void mark_initmem_nx(void) { }
|
||||
#define is_ioremap_addr is_ioremap_addr
|
||||
static inline bool is_ioremap_addr(const void *x)
|
||||
{
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long addr = (unsigned long)x;
|
||||
|
||||
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_PPC64 */
|
||||
|
||||
|
||||
@ -198,7 +198,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star
|
||||
unsigned long nr_pfn = page_size / sizeof(struct page);
|
||||
unsigned long start_pfn = page_to_pfn((struct page *)start);
|
||||
|
||||
if ((start_pfn + nr_pfn) > altmap->end_pfn)
|
||||
if ((start_pfn + nr_pfn - 1) > altmap->end_pfn)
|
||||
return true;
|
||||
|
||||
if (start_pfn < altmap->base_pfn)
|
||||
@ -305,8 +305,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
|
||||
start = ALIGN_DOWN(start, page_size);
|
||||
if (altmap) {
|
||||
alt_start = altmap->base_pfn;
|
||||
alt_end = altmap->base_pfn + altmap->reserve +
|
||||
altmap->free + altmap->alloc + altmap->align;
|
||||
alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
|
||||
}
|
||||
|
||||
pr_debug("vmemmap_free %lx...%lx\n", start, end);
|
||||
|
||||
@ -99,7 +99,6 @@ EXPORT_SYMBOL(__vmalloc_end);
|
||||
unsigned long __kernel_io_start;
|
||||
EXPORT_SYMBOL(__kernel_io_start);
|
||||
unsigned long __kernel_io_end;
|
||||
EXPORT_SYMBOL(__kernel_io_end);
|
||||
struct page *vmemmap;
|
||||
EXPORT_SYMBOL(vmemmap);
|
||||
unsigned long __pte_frag_nr;
|
||||
|
||||
@ -521,8 +521,10 @@ static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
|
||||
|
||||
if (cmd) {
|
||||
rc = init_cpu_associativity();
|
||||
if (rc)
|
||||
if (rc) {
|
||||
destroy_cpu_associativity();
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
disp = per_cpu_ptr(&vcpu_disp_data, cpu);
|
||||
|
||||
@ -71,8 +71,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
|
||||
{
|
||||
a->fixup = b->fixup + delta;
|
||||
b->fixup = tmp.fixup - delta;
|
||||
a->handler = b->handler + delta;
|
||||
b->handler = tmp.handler - delta;
|
||||
a->handler = b->handler;
|
||||
if (a->handler)
|
||||
a->handler += delta;
|
||||
b->handler = tmp.handler;
|
||||
if (b->handler)
|
||||
b->handler -= delta;
|
||||
}
|
||||
#define swap_ex_entry_fixup swap_ex_entry_fixup
|
||||
|
||||
#endif
|
||||
|
||||
@ -2652,7 +2652,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
|
||||
return 0;
|
||||
|
||||
start = pmd_val(*pmd) & HPAGE_MASK;
|
||||
end = start + HPAGE_SIZE - 1;
|
||||
end = start + HPAGE_SIZE;
|
||||
__storage_key_init_range(start, end);
|
||||
set_bit(PG_arch_1, &page->flags);
|
||||
cond_resched();
|
||||
|
||||
@ -146,7 +146,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(PG_arch_1, &page->flags))
|
||||
__storage_key_init_range(paddr, paddr + size - 1);
|
||||
__storage_key_init_range(paddr, paddr + size);
|
||||
}
|
||||
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
|
||||
@ -571,7 +571,7 @@ struct flush_tlb_info {
|
||||
flush_tlb_mm_range((vma)->vm_mm, start, end, \
|
||||
((vma)->vm_flags & VM_HUGETLB) \
|
||||
? huge_page_shift(hstate_vma(vma)) \
|
||||
: PAGE_SHIFT, false)
|
||||
: PAGE_SHIFT, true)
|
||||
|
||||
extern void flush_tlb_all(void);
|
||||
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
|
||||
|
||||
@ -182,11 +182,11 @@ static void __meminit init_trampoline_pud(void)
|
||||
set_p4d(p4d_tramp,
|
||||
__p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
|
||||
|
||||
set_pgd(&trampoline_pgd_entry,
|
||||
__pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
|
||||
trampoline_pgd_entry =
|
||||
__pgd(_KERNPG_TABLE | __pa(p4d_page_tramp));
|
||||
} else {
|
||||
set_pgd(&trampoline_pgd_entry,
|
||||
__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
|
||||
trampoline_pgd_entry =
|
||||
__pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -184,7 +184,7 @@ static int pageattr_test(void)
|
||||
break;
|
||||
|
||||
case 1:
|
||||
err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1);
|
||||
err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
static struct bus_type node_subsys = {
|
||||
.name = "node",
|
||||
@ -584,64 +585,9 @@ static const struct attribute_group *node_dev_groups[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HUGETLBFS
|
||||
/*
|
||||
* hugetlbfs per node attributes registration interface:
|
||||
* When/if hugetlb[fs] subsystem initializes [sometime after this module],
|
||||
* it will register its per node attributes for all online nodes with
|
||||
* memory. It will also call register_hugetlbfs_with_node(), below, to
|
||||
* register its attribute registration functions with this node driver.
|
||||
* Once these hooks have been initialized, the node driver will call into
|
||||
* the hugetlb module to [un]register attributes for hot-plugged nodes.
|
||||
*/
|
||||
static node_registration_func_t __hugetlb_register_node;
|
||||
static node_registration_func_t __hugetlb_unregister_node;
|
||||
|
||||
static inline bool hugetlb_register_node(struct node *node)
|
||||
{
|
||||
if (__hugetlb_register_node &&
|
||||
node_state(node->dev.id, N_MEMORY)) {
|
||||
__hugetlb_register_node(node);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void hugetlb_unregister_node(struct node *node)
|
||||
{
|
||||
if (__hugetlb_unregister_node)
|
||||
__hugetlb_unregister_node(node);
|
||||
}
|
||||
|
||||
void register_hugetlbfs_with_node(node_registration_func_t doregister,
|
||||
node_registration_func_t unregister)
|
||||
{
|
||||
__hugetlb_register_node = doregister;
|
||||
__hugetlb_unregister_node = unregister;
|
||||
}
|
||||
#else
|
||||
static inline void hugetlb_register_node(struct node *node) {}
|
||||
|
||||
static inline void hugetlb_unregister_node(struct node *node) {}
|
||||
#endif
|
||||
|
||||
static void node_device_release(struct device *dev)
|
||||
{
|
||||
struct node *node = to_node(dev);
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
|
||||
/*
|
||||
* We schedule the work only when a memory section is
|
||||
* onlined/offlined on this node. When we come here,
|
||||
* all the memory on this node has been offlined,
|
||||
* so we won't enqueue new work to this work.
|
||||
*
|
||||
* The work is using node->node_work, so we should
|
||||
* flush work before freeing the memory.
|
||||
*/
|
||||
flush_work(&node->node_work);
|
||||
#endif
|
||||
kfree(node);
|
||||
kfree(to_node(dev));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -660,13 +606,13 @@ static int register_node(struct node *node, int num)
|
||||
node->dev.groups = node_dev_groups;
|
||||
error = device_register(&node->dev);
|
||||
|
||||
if (error)
|
||||
if (error) {
|
||||
put_device(&node->dev);
|
||||
else {
|
||||
} else {
|
||||
hugetlb_register_node(node);
|
||||
|
||||
compaction_register_node(node);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -679,8 +625,8 @@ static int register_node(struct node *node, int num)
|
||||
*/
|
||||
void unregister_node(struct node *node)
|
||||
{
|
||||
hugetlb_unregister_node(node);
|
||||
compaction_unregister_node(node);
|
||||
hugetlb_unregister_node(node); /* no-op, if memoryless node */
|
||||
node_remove_accesses(node);
|
||||
node_remove_caches(node);
|
||||
device_unregister(&node->dev);
|
||||
@ -904,83 +850,21 @@ void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
||||
(void *)&nid, func);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLBFS
|
||||
/*
|
||||
* Handle per node hstate attribute [un]registration on transistions
|
||||
* to/from memoryless state.
|
||||
*/
|
||||
static void node_hugetlb_work(struct work_struct *work)
|
||||
{
|
||||
struct node *node = container_of(work, struct node, node_work);
|
||||
|
||||
/*
|
||||
* We only get here when a node transitions to/from memoryless state.
|
||||
* We can detect which transition occurred by examining whether the
|
||||
* node has memory now. hugetlb_register_node() already check this
|
||||
* so we try to register the attributes. If that fails, then the
|
||||
* node has transitioned to memoryless, try to unregister the
|
||||
* attributes.
|
||||
*/
|
||||
if (!hugetlb_register_node(node))
|
||||
hugetlb_unregister_node(node);
|
||||
}
|
||||
|
||||
static void init_node_hugetlb_work(int nid)
|
||||
{
|
||||
INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
|
||||
}
|
||||
|
||||
static int node_memory_callback(struct notifier_block *self,
|
||||
unsigned long action, void *arg)
|
||||
{
|
||||
struct memory_notify *mnb = arg;
|
||||
int nid = mnb->status_change_nid;
|
||||
|
||||
switch (action) {
|
||||
case MEM_ONLINE:
|
||||
case MEM_OFFLINE:
|
||||
/*
|
||||
* offload per node hstate [un]registration to a work thread
|
||||
* when transitioning to/from memoryless state.
|
||||
*/
|
||||
if (nid != NUMA_NO_NODE)
|
||||
schedule_work(&node_devices[nid]->node_work);
|
||||
break;
|
||||
|
||||
case MEM_GOING_ONLINE:
|
||||
case MEM_GOING_OFFLINE:
|
||||
case MEM_CANCEL_ONLINE:
|
||||
case MEM_CANCEL_OFFLINE:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif /* CONFIG_HUGETLBFS */
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
#if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS)
|
||||
static inline int node_memory_callback(struct notifier_block *self,
|
||||
unsigned long action, void *arg)
|
||||
{
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void init_node_hugetlb_work(int nid) { }
|
||||
|
||||
#endif
|
||||
|
||||
int __register_one_node(int nid)
|
||||
{
|
||||
int error;
|
||||
int cpu;
|
||||
struct node *node;
|
||||
|
||||
node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
|
||||
if (!node_devices[nid])
|
||||
node = kzalloc(sizeof(struct node), GFP_KERNEL);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&node->access_list);
|
||||
node_devices[nid] = node;
|
||||
|
||||
error = register_node(node_devices[nid], nid);
|
||||
|
||||
/* link cpu under this node */
|
||||
@ -989,9 +873,6 @@ int __register_one_node(int nid)
|
||||
register_cpu_under_node(cpu, nid);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&node_devices[nid]->access_list);
|
||||
/* initialize work queue for memory hot plug */
|
||||
init_node_hugetlb_work(nid);
|
||||
node_init_caches(nid);
|
||||
|
||||
return error;
|
||||
@ -1062,13 +943,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
#define NODE_CALLBACK_PRI 2 /* lower than SLAB */
|
||||
void __init node_dev_init(void)
|
||||
{
|
||||
static struct notifier_block node_memory_callback_nb = {
|
||||
.notifier_call = node_memory_callback,
|
||||
.priority = NODE_CALLBACK_PRI,
|
||||
};
|
||||
int ret, i;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
|
||||
@ -1078,8 +954,6 @@ void __init node_dev_init(void)
|
||||
if (ret)
|
||||
panic("%s() failed to register subsystem: %d\n", __func__, ret);
|
||||
|
||||
register_hotmemory_notifier(&node_memory_callback_nb);
|
||||
|
||||
/*
|
||||
* Create all node devices, which will properly link the node
|
||||
* to applicable memory block devices and already created cpu devices.
|
||||
|
||||
@ -372,6 +372,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
|
||||
do {
|
||||
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
|
||||
break;
|
||||
if (pci_channel_offline(dev->pdev)) {
|
||||
mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
} while (!time_after(jiffies, end));
|
||||
|
||||
@ -261,6 +261,10 @@ recover_from_sw_reset:
|
||||
do {
|
||||
if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
|
||||
break;
|
||||
if (pci_channel_offline(dev->pdev)) {
|
||||
mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
msleep(20);
|
||||
} while (!time_after(jiffies, end));
|
||||
@ -330,6 +334,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
|
||||
mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
if (pci_channel_offline(dev->pdev)) {
|
||||
mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n");
|
||||
return -EACCES;
|
||||
}
|
||||
msleep(100);
|
||||
}
|
||||
return 0;
|
||||
|
||||
@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
|
||||
ret = -EBUSY;
|
||||
goto pci_unlock;
|
||||
}
|
||||
if (pci_channel_offline(dev->pdev)) {
|
||||
ret = -EACCES;
|
||||
goto pci_unlock;
|
||||
}
|
||||
|
||||
/* Check if semaphore is already locked */
|
||||
ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
|
||||
|
||||
@ -1241,7 +1241,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
struct lpfc_nvmet_tgtport *tgtp;
|
||||
struct lpfc_async_xchg_ctx *ctxp =
|
||||
container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
|
||||
struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
|
||||
struct rqb_dmabuf *nvmebuf;
|
||||
struct lpfc_hba *phba = ctxp->phba;
|
||||
unsigned long iflag;
|
||||
|
||||
@ -1249,13 +1249,18 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n",
|
||||
ctxp->oxid, ctxp->size, raw_smp_processor_id());
|
||||
|
||||
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
||||
nvmebuf = ctxp->rqb_buffer;
|
||||
if (!nvmebuf) {
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
|
||||
"6425 Defer rcv: no buffer oxid x%x: "
|
||||
"flg %x ste %x\n",
|
||||
ctxp->oxid, ctxp->flag, ctxp->state);
|
||||
return;
|
||||
}
|
||||
ctxp->rqb_buffer = NULL;
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
|
||||
tgtp = phba->targetport->private;
|
||||
if (tgtp)
|
||||
@ -1263,9 +1268,6 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
|
||||
|
||||
/* Free the nvmebuf since a new buffer already replaced it */
|
||||
nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
|
||||
spin_lock_irqsave(&ctxp->ctxlock, iflag);
|
||||
ctxp->rqb_buffer = NULL;
|
||||
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@ -5650,7 +5650,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
|
||||
|
||||
if (rl->rl_status == DLM_LKSTS_CONVERT && middle_conversion(lkb)) {
|
||||
/* We may need to adjust grmode depending on other granted locks. */
|
||||
log_limit(ls, "%s %x middle convert gr %d rq %d remote %d %x",
|
||||
log_rinfo(ls, "%s %x middle convert gr %d rq %d remote %d %x",
|
||||
__func__, lkb->lkb_id, lkb->lkb_grmode,
|
||||
lkb->lkb_rqmode, lkb->lkb_nodeid, lkb->lkb_remid);
|
||||
rsb_set_flag(r, RSB_RECOVER_CONVERT);
|
||||
|
||||
@ -846,7 +846,7 @@ static void recover_conversion(struct dlm_rsb *r)
|
||||
*/
|
||||
if (((lkb->lkb_grmode == DLM_LOCK_PR) && (other_grmode == DLM_LOCK_CW)) ||
|
||||
((lkb->lkb_grmode == DLM_LOCK_CW) && (other_grmode == DLM_LOCK_PR))) {
|
||||
log_limit(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL",
|
||||
log_rinfo(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL",
|
||||
__func__, lkb->lkb_id, lkb->lkb_grmode,
|
||||
lkb->lkb_rqmode, lkb->lkb_nodeid,
|
||||
lkb->lkb_remid, other_lkid, other_grmode);
|
||||
|
||||
@ -51,6 +51,10 @@ static int efivarfs_d_compare(const struct dentry *dentry,
|
||||
{
|
||||
int guid = len - EFI_VARIABLE_GUID_LEN;
|
||||
|
||||
/* Parallel lookups may produce a temporary invalid filename */
|
||||
if (guid <= 0)
|
||||
return 1;
|
||||
|
||||
if (name->len != len)
|
||||
return 1;
|
||||
|
||||
|
||||
@ -608,6 +608,24 @@ out_free:
|
||||
kfree(isw);
|
||||
}
|
||||
|
||||
static bool isw_prepare_wbs_switch(struct inode_switch_wbs_context *isw,
|
||||
struct list_head *list, int *nr)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
list_for_each_entry(inode, list, i_io_list) {
|
||||
if (!inode_prepare_wbs_switch(inode, isw->new_wb))
|
||||
continue;
|
||||
|
||||
isw->inodes[*nr] = inode;
|
||||
(*nr)++;
|
||||
|
||||
if (*nr >= WB_MAX_INODES_PER_ISW - 1)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cleanup_offline_cgwb - detach associated inodes
|
||||
* @wb: target wb
|
||||
@ -620,7 +638,6 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
|
||||
{
|
||||
struct cgroup_subsys_state *memcg_css;
|
||||
struct inode_switch_wbs_context *isw;
|
||||
struct inode *inode;
|
||||
int nr;
|
||||
bool restart = false;
|
||||
|
||||
@ -642,17 +659,17 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
|
||||
|
||||
nr = 0;
|
||||
spin_lock(&wb->list_lock);
|
||||
list_for_each_entry(inode, &wb->b_attached, i_io_list) {
|
||||
if (!inode_prepare_wbs_switch(inode, isw->new_wb))
|
||||
continue;
|
||||
|
||||
isw->inodes[nr++] = inode;
|
||||
|
||||
if (nr >= WB_MAX_INODES_PER_ISW - 1) {
|
||||
restart = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* In addition to the inodes that have completed writeback, also switch
|
||||
* cgwbs for those inodes only with dirty timestamps. Otherwise, those
|
||||
* inodes won't be written back for a long time when lazytime is
|
||||
* enabled, and thus pinning the dying cgwbs. It won't break the
|
||||
* bandwidth restrictions, as writeback of inode metadata is not
|
||||
* accounted for.
|
||||
*/
|
||||
restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr);
|
||||
if (!restart)
|
||||
restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr);
|
||||
spin_unlock(&wb->list_lock);
|
||||
|
||||
/* no attached inodes? bail out */
|
||||
|
||||
@ -528,16 +528,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
|
||||
}
|
||||
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
|
||||
&& pte_none(*pte))) {
|
||||
page = find_get_entry(vma->vm_file->f_mapping,
|
||||
page = xa_load(&vma->vm_file->f_mapping->i_pages,
|
||||
linear_page_index(vma, addr));
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
if (xa_is_value(page))
|
||||
mss->swap += PAGE_SIZE;
|
||||
else
|
||||
put_page(page);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -596,6 +596,8 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
|
||||
extern void pm_restrict_gfp_mask(void);
|
||||
extern void pm_restore_gfp_mask(void);
|
||||
|
||||
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
extern bool pm_suspended_storage(void);
|
||||
#else
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
struct ctl_table;
|
||||
struct user_struct;
|
||||
struct mmu_gather;
|
||||
struct node;
|
||||
|
||||
#ifndef is_hugepd
|
||||
typedef struct { unsigned long pd; } hugepd_t;
|
||||
@ -584,6 +585,7 @@ struct huge_bootmem_page {
|
||||
struct hstate *hstate;
|
||||
};
|
||||
|
||||
void wait_for_freed_hugetlb_pages(void);
|
||||
struct page *alloc_huge_page(struct vm_area_struct *vma,
|
||||
unsigned long addr, int avoid_reserve);
|
||||
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
||||
@ -843,6 +845,11 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
void hugetlb_register_node(struct node *node);
|
||||
void hugetlb_unregister_node(struct node *node);
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_HUGETLB_PAGE */
|
||||
struct hstate {};
|
||||
|
||||
@ -851,6 +858,10 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void wait_for_freed_hugetlb_pages(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
int avoid_reserve)
|
||||
@ -1000,6 +1011,14 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
|
||||
pte_t *ptep, pte_t pte, unsigned long sz)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_register_node(struct node *node)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_unregister_node(struct node *node)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static inline spinlock_t *huge_pte_lock(struct hstate *h,
|
||||
|
||||
@ -943,7 +943,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
|
||||
return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);
|
||||
}
|
||||
|
||||
void mem_cgroup_handle_over_high(void);
|
||||
void mem_cgroup_handle_over_high(gfp_t gfp_mask);
|
||||
|
||||
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
|
||||
|
||||
@ -1403,7 +1403,7 @@ static inline void unlock_page_memcg(struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_handle_over_high(void)
|
||||
static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
|
||||
{
|
||||
}
|
||||
|
||||
@ -1615,10 +1615,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
|
||||
static inline void mem_cgroup_track_foreign_dirty(struct page *page,
|
||||
struct bdi_writeback *wb)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
if (unlikely(&page_memcg(page)->css != wb->memcg_css))
|
||||
memcg = page_memcg(page);
|
||||
if (unlikely(memcg && &memcg->css != wb->memcg_css))
|
||||
mem_cgroup_track_foreign_dirty_slowpath(page, wb);
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp)
|
||||
{
|
||||
return -ENOMEM;
|
||||
}
|
||||
void memregion_free(int id)
|
||||
static inline void memregion_free(int id)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2,15 +2,15 @@
|
||||
/*
|
||||
* include/linux/node.h - generic node definition
|
||||
*
|
||||
* This is mainly for topological representation. We define the
|
||||
* basic 'struct node' here, which can be embedded in per-arch
|
||||
* This is mainly for topological representation. We define the
|
||||
* basic 'struct node' here, which can be embedded in per-arch
|
||||
* definitions of processors.
|
||||
*
|
||||
* Basic handling of the devices is done in drivers/base/node.c
|
||||
* and system devices are handled in drivers/base/sys.c.
|
||||
* and system devices are handled in drivers/base/sys.c.
|
||||
*
|
||||
* Nodes are exported via driverfs in the class/node/devices/
|
||||
* directory.
|
||||
* directory.
|
||||
*/
|
||||
#ifndef _LINUX_NODE_H_
|
||||
#define _LINUX_NODE_H_
|
||||
@ -18,7 +18,6 @@
|
||||
#include <linux/device.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/**
|
||||
* struct node_hmem_attrs - heterogeneous memory performance attributes
|
||||
@ -84,10 +83,6 @@ static inline void node_set_perf_attrs(unsigned int nid,
|
||||
struct node {
|
||||
struct device dev;
|
||||
struct list_head access_list;
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
|
||||
struct work_struct node_work;
|
||||
#endif
|
||||
#ifdef CONFIG_HMEM_REPORTING
|
||||
struct list_head cache_attrs;
|
||||
struct device *cache_dev;
|
||||
@ -96,7 +91,6 @@ struct node {
|
||||
|
||||
struct memory_block;
|
||||
extern struct node *node_devices[];
|
||||
typedef void (*node_registration_func_t)(struct node *);
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
|
||||
void register_memory_blocks_under_node(int nid, unsigned long start_pfn,
|
||||
@ -144,11 +138,6 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
|
||||
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
|
||||
unsigned int cpu_nid,
|
||||
unsigned access);
|
||||
|
||||
#ifdef CONFIG_HUGETLBFS
|
||||
extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
|
||||
node_registration_func_t unregister);
|
||||
#endif
|
||||
#else
|
||||
static inline void node_dev_init(void)
|
||||
{
|
||||
@ -176,11 +165,6 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
|
||||
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
|
||||
node_registration_func_t unreg)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#define to_node(device) container_of(device, struct node, dev)
|
||||
|
||||
@ -243,6 +243,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
|
||||
#define FGP_NOFS 0x00000010
|
||||
#define FGP_NOWAIT 0x00000020
|
||||
#define FGP_FOR_MMAP 0x00000040
|
||||
#define FGP_HEAD 0x00000080
|
||||
|
||||
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
|
||||
int fgp_flags, gfp_t cache_gfp_mask);
|
||||
@ -272,20 +273,39 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
|
||||
/**
|
||||
* find_lock_page - locate, pin and lock a pagecache page
|
||||
* @mapping: the address_space to search
|
||||
* @offset: the page index
|
||||
* @index: the page index
|
||||
*
|
||||
* Looks up the page cache slot at @mapping & @offset. If there is a
|
||||
* Looks up the page cache entry at @mapping & @index. If there is a
|
||||
* page cache page, it is returned locked and with an increased
|
||||
* refcount.
|
||||
*
|
||||
* Otherwise, %NULL is returned.
|
||||
*
|
||||
* find_lock_page() may sleep.
|
||||
* Context: May sleep.
|
||||
* Return: A struct page or %NULL if there is no page in the cache for this
|
||||
* index.
|
||||
*/
|
||||
static inline struct page *find_lock_page(struct address_space *mapping,
|
||||
pgoff_t offset)
|
||||
pgoff_t index)
|
||||
{
|
||||
return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
|
||||
return pagecache_get_page(mapping, index, FGP_LOCK, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* find_lock_head - Locate, pin and lock a pagecache page.
|
||||
* @mapping: The address_space to search.
|
||||
* @index: The page index.
|
||||
*
|
||||
* Looks up the page cache entry at @mapping & @index. If there is a
|
||||
* page cache page, its head page is returned locked and with an increased
|
||||
* refcount.
|
||||
*
|
||||
* Context: May sleep.
|
||||
* Return: A struct page which is !PageTail, or %NULL if there is no page
|
||||
* in the cache for this index.
|
||||
*/
|
||||
static inline struct page *find_lock_head(struct address_space *mapping,
|
||||
pgoff_t index)
|
||||
{
|
||||
return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -336,18 +356,28 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
|
||||
mapping_gfp_mask(mapping));
|
||||
}
|
||||
|
||||
static inline struct page *find_subpage(struct page *page, pgoff_t offset)
|
||||
/* Does this page contain this index? */
|
||||
static inline bool thp_contains(struct page *head, pgoff_t index)
|
||||
{
|
||||
if (PageHuge(page))
|
||||
return page;
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
||||
return page + (offset & (compound_nr(page) - 1));
|
||||
/* HugeTLBfs indexes the page cache in units of hpage_size */
|
||||
if (PageHuge(head))
|
||||
return head->index == index;
|
||||
return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the page we found in the page cache, return the page corresponding
|
||||
* to this index in the file
|
||||
*/
|
||||
static inline struct page *find_subpage(struct page *head, pgoff_t index)
|
||||
{
|
||||
/* HugeTLBfs wants the head page regardless */
|
||||
if (PageHuge(head))
|
||||
return head;
|
||||
|
||||
return head + (index & (thp_nr_pages(head) - 1));
|
||||
}
|
||||
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
|
||||
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
|
||||
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
|
||||
unsigned int nr_entries, struct page **entries,
|
||||
pgoff_t *indices);
|
||||
|
||||
@ -79,7 +79,12 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
||||
extern int shmem_unuse(unsigned int type, bool frontswap,
|
||||
unsigned long *fs_pages_to_unuse);
|
||||
|
||||
extern bool shmem_huge_enabled(struct vm_area_struct *vma);
|
||||
extern bool shmem_is_huge(struct vm_area_struct *vma,
|
||||
struct inode *inode, pgoff_t index);
|
||||
static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
|
||||
{
|
||||
return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff);
|
||||
}
|
||||
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
||||
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
@ -87,9 +92,8 @@ extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
||||
/* Flag allocation requirements to shmem_getpage */
|
||||
enum sgp_type {
|
||||
SGP_READ, /* don't exceed i_size, don't allocate page */
|
||||
SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */
|
||||
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
||||
SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
|
||||
SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
|
||||
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
|
||||
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
|
||||
};
|
||||
|
||||
@ -423,6 +423,7 @@ extern void free_pages_and_swap_cache(struct page **, int);
|
||||
extern struct page *lookup_swap_cache(swp_entry_t entry,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr);
|
||||
struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index);
|
||||
extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
|
||||
struct vm_area_struct *vma, unsigned long addr,
|
||||
bool do_poll);
|
||||
@ -584,6 +585,12 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
return find_get_page(mapping, index);
|
||||
}
|
||||
|
||||
static inline int add_to_swap(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
|
||||
@ -187,7 +187,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
|
||||
if (unlikely(current->task_works))
|
||||
task_work_run();
|
||||
|
||||
mem_cgroup_handle_over_high();
|
||||
mem_cgroup_handle_over_high(GFP_KERNEL);
|
||||
blkcg_maybe_throttle_current();
|
||||
}
|
||||
|
||||
|
||||
@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_page_template,
|
||||
strscpy_pad(__entry->name,
|
||||
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
|
||||
NULL), 32);
|
||||
__entry->ino = mapping ? mapping->host->i_ino : 0;
|
||||
__entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
|
||||
__entry->index = page->index;
|
||||
),
|
||||
|
||||
|
||||
@ -1170,7 +1170,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
|
||||
void __rcu **radix_tree_iter_resume(void __rcu **slot,
|
||||
struct radix_tree_iter *iter)
|
||||
{
|
||||
slot++;
|
||||
iter->index = __radix_tree_iter_add(iter, 1);
|
||||
iter->next_index = iter->index;
|
||||
iter->tags = 0;
|
||||
|
||||
@ -39,7 +39,7 @@ static void test_next_pointer(struct kunit *test)
|
||||
|
||||
ptr_addr = (unsigned long *)(p + s->offset);
|
||||
tmp = *ptr_addr;
|
||||
p[s->offset] = 0x12;
|
||||
p[s->offset] = ~p[s->offset];
|
||||
|
||||
/*
|
||||
* Expecting three errors.
|
||||
|
||||
2
mm/cma.c
2
mm/cma.c
@ -490,7 +490,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
|
||||
*/
|
||||
if (page) {
|
||||
for (i = 0; i < count; i++)
|
||||
page_kasan_tag_reset(page + i);
|
||||
page_kasan_tag_reset(nth_page(page, i));
|
||||
}
|
||||
|
||||
if (ret && !no_warn) {
|
||||
|
||||
@ -1050,12 +1050,12 @@ isolate_success:
|
||||
|
||||
/*
|
||||
* Avoid isolating too much unless this block is being
|
||||
* rescanned (e.g. dirty/writeback pages, parallel allocation)
|
||||
* fully scanned (e.g. dirty/writeback pages, parallel allocation)
|
||||
* or a lock is contended. For contention, isolate quickly to
|
||||
* potentially remove one source of contention.
|
||||
*/
|
||||
if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX &&
|
||||
!cc->rescan && !cc->contended) {
|
||||
!cc->finish_pageblock && !cc->contended) {
|
||||
++low_pfn;
|
||||
break;
|
||||
}
|
||||
@ -1117,14 +1117,14 @@ isolate_abort:
|
||||
}
|
||||
|
||||
/*
|
||||
* Updated the cached scanner pfn once the pageblock has been scanned
|
||||
* Update the cached scanner pfn once the pageblock has been scanned.
|
||||
* Pages will either be migrated in which case there is no point
|
||||
* scanning in the near future or migration failed in which case the
|
||||
* failure reason may persist. The block is marked for skipping if
|
||||
* there were no pages isolated in the block or if the block is
|
||||
* rescanned twice in a row.
|
||||
*/
|
||||
if (low_pfn == end_pfn && (!nr_isolated || cc->rescan)) {
|
||||
if (low_pfn == end_pfn && (!nr_isolated || cc->finish_pageblock)) {
|
||||
if (valid_page && !skip_updated)
|
||||
set_pageblock_skip(valid_page);
|
||||
update_cached_migrate(cc, low_pfn);
|
||||
@ -1710,6 +1710,13 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
|
||||
if (cc->ignore_skip_hint)
|
||||
return pfn;
|
||||
|
||||
/*
|
||||
* If the pageblock should be finished then do not select a different
|
||||
* pageblock.
|
||||
*/
|
||||
if (cc->finish_pageblock)
|
||||
return pfn;
|
||||
|
||||
/*
|
||||
* If the migrate_pfn is not at the start of a zone or the start
|
||||
* of a pageblock then assume this is a continuation of a previous
|
||||
@ -2112,13 +2119,6 @@ static enum compact_result compact_finished(struct compact_control *cc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* compaction_suitable: Is this suitable to run compaction on this zone now?
|
||||
* Returns
|
||||
* COMPACT_SKIPPED - If there are too few free pages for compaction
|
||||
* COMPACT_SUCCESS - If the allocation would succeed without compaction
|
||||
* COMPACT_CONTINUE - If compaction should run now
|
||||
*/
|
||||
static enum compact_result __compaction_suitable(struct zone *zone, int order,
|
||||
unsigned int alloc_flags,
|
||||
int highest_zoneidx,
|
||||
@ -2162,6 +2162,13 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
|
||||
return COMPACT_CONTINUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* compaction_suitable: Is this suitable to run compaction on this zone now?
|
||||
* Returns
|
||||
* COMPACT_SKIPPED - If there are too few free pages for compaction
|
||||
* COMPACT_SUCCESS - If the allocation would succeed without compaction
|
||||
* COMPACT_CONTINUE - If compaction should run now
|
||||
*/
|
||||
enum compact_result compaction_suitable(struct zone *zone, int order,
|
||||
unsigned int alloc_flags,
|
||||
int highest_zoneidx)
|
||||
@ -2317,22 +2324,23 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
|
||||
|
||||
while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
|
||||
int err;
|
||||
unsigned long start_pfn = cc->migrate_pfn;
|
||||
unsigned long iteration_start_pfn = cc->migrate_pfn;
|
||||
|
||||
/*
|
||||
* Avoid multiple rescans which can happen if a page cannot be
|
||||
* isolated (dirty/writeback in async mode) or if the migrated
|
||||
* pages are being allocated before the pageblock is cleared.
|
||||
* The first rescan will capture the entire pageblock for
|
||||
* migration. If it fails, it'll be marked skip and scanning
|
||||
* will proceed as normal.
|
||||
* Avoid multiple rescans of the same pageblock which can
|
||||
* happen if a page cannot be isolated (dirty/writeback in
|
||||
* async mode) or if the migrated pages are being allocated
|
||||
* before the pageblock is cleared. The first rescan will
|
||||
* capture the entire pageblock for migration. If it fails,
|
||||
* it'll be marked skip and scanning will proceed as normal.
|
||||
*/
|
||||
cc->rescan = false;
|
||||
cc->finish_pageblock = false;
|
||||
if (pageblock_start_pfn(last_migrated_pfn) ==
|
||||
pageblock_start_pfn(start_pfn)) {
|
||||
cc->rescan = true;
|
||||
pageblock_start_pfn(iteration_start_pfn)) {
|
||||
cc->finish_pageblock = true;
|
||||
}
|
||||
|
||||
rescan:
|
||||
switch (isolate_migratepages(cc)) {
|
||||
case ISOLATE_ABORT:
|
||||
ret = COMPACT_CONTENDED;
|
||||
@ -2353,8 +2361,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
|
||||
goto check_drain;
|
||||
case ISOLATE_SUCCESS:
|
||||
update_cached = false;
|
||||
last_migrated_pfn = start_pfn;
|
||||
;
|
||||
last_migrated_pfn = iteration_start_pfn;
|
||||
}
|
||||
|
||||
err = migrate_pages(&cc->migratepages, compaction_alloc,
|
||||
@ -2377,18 +2384,37 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* We failed to migrate at least one page in the current
|
||||
* order-aligned block, so skip the rest of it.
|
||||
* If an ASYNC or SYNC_LIGHT fails to migrate a page
|
||||
* within the current order-aligned block, scan the
|
||||
* remainder of the pageblock. This will mark the
|
||||
* pageblock "skip" to avoid rescanning in the near
|
||||
* future. This will isolate more pages than necessary
|
||||
* for the request but avoid loops due to
|
||||
* fast_find_migrateblock revisiting blocks that were
|
||||
* recently partially scanned.
|
||||
*/
|
||||
if (cc->direct_compaction &&
|
||||
(cc->mode == MIGRATE_ASYNC)) {
|
||||
cc->migrate_pfn = block_end_pfn(
|
||||
cc->migrate_pfn - 1, cc->order);
|
||||
/* Draining pcplists is useless in this case */
|
||||
last_migrated_pfn = 0;
|
||||
if (cc->direct_compaction && !cc->finish_pageblock &&
|
||||
(cc->mode < MIGRATE_SYNC)) {
|
||||
cc->finish_pageblock = true;
|
||||
|
||||
/*
|
||||
* Draining pcplists does not help THP if
|
||||
* any page failed to migrate. Even after
|
||||
* drain, the pageblock will not be free.
|
||||
*/
|
||||
if (cc->order == COMPACTION_HPAGE_ORDER)
|
||||
last_migrated_pfn = 0;
|
||||
|
||||
goto rescan;
|
||||
}
|
||||
}
|
||||
|
||||
/* Stop if a page has been captured */
|
||||
if (capc && capc->page) {
|
||||
ret = COMPACT_SUCCESS;
|
||||
break;
|
||||
}
|
||||
|
||||
check_drain:
|
||||
/*
|
||||
* Has the migration scanner moved away from the previous
|
||||
@ -2407,12 +2433,6 @@ check_drain:
|
||||
last_migrated_pfn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Stop if a page has been captured */
|
||||
if (capc && capc->page) {
|
||||
ret = COMPACT_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
72
mm/filemap.c
72
mm/filemap.c
@ -1733,19 +1733,19 @@ EXPORT_SYMBOL(page_cache_prev_miss);
|
||||
/**
|
||||
* find_get_entry - find and get a page cache entry
|
||||
* @mapping: the address_space to search
|
||||
* @offset: the page cache index
|
||||
* @index: The page cache index.
|
||||
*
|
||||
* Looks up the page cache slot at @mapping & @offset. If there is a
|
||||
* page cache page, it is returned with an increased refcount.
|
||||
* page cache page, the head page is returned with an increased refcount.
|
||||
*
|
||||
* If the slot holds a shadow entry of a previously evicted page, or a
|
||||
* swap entry from shmem/tmpfs, it is returned.
|
||||
*
|
||||
* Return: the found page or shadow entry, %NULL if nothing is found.
|
||||
* Return: The head page or shadow entry, %NULL if nothing is found.
|
||||
*/
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
XA_STATE(xas, &mapping->i_pages, offset);
|
||||
XA_STATE(xas, &mapping->i_pages, index);
|
||||
struct page *page;
|
||||
|
||||
rcu_read_lock();
|
||||
@ -1773,49 +1773,44 @@ repeat:
|
||||
put_page(page);
|
||||
goto repeat;
|
||||
}
|
||||
page = find_subpage(page, offset);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(find_get_entry);
|
||||
|
||||
/**
|
||||
* find_lock_entry - locate, pin and lock a page cache entry
|
||||
* @mapping: the address_space to search
|
||||
* @offset: the page cache index
|
||||
* find_lock_entry - Locate and lock a page cache entry.
|
||||
* @mapping: The address_space to search.
|
||||
* @index: The page cache index.
|
||||
*
|
||||
* Looks up the page cache slot at @mapping & @offset. If there is a
|
||||
* page cache page, it is returned locked and with an increased
|
||||
* refcount.
|
||||
* Looks up the page at @mapping & @index. If there is a page in the
|
||||
* cache, the head page is returned locked and with an increased refcount.
|
||||
*
|
||||
* If the slot holds a shadow entry of a previously evicted page, or a
|
||||
* swap entry from shmem/tmpfs, it is returned.
|
||||
*
|
||||
* find_lock_entry() may sleep.
|
||||
*
|
||||
* Return: the found page or shadow entry, %NULL if nothing is found.
|
||||
* Context: May sleep.
|
||||
* Return: The head page or shadow entry, %NULL if nothing is found.
|
||||
*/
|
||||
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
|
||||
struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
repeat:
|
||||
page = find_get_entry(mapping, offset);
|
||||
page = find_get_entry(mapping, index);
|
||||
if (page && !xa_is_value(page)) {
|
||||
lock_page(page);
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(page_mapping(page) != mapping)) {
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto repeat;
|
||||
}
|
||||
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
|
||||
VM_BUG_ON_PAGE(!thp_contains(page, index), page);
|
||||
}
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(find_lock_entry);
|
||||
|
||||
/**
|
||||
* pagecache_get_page - Find and get a reference to a page.
|
||||
@ -1830,6 +1825,8 @@ EXPORT_SYMBOL(find_lock_entry);
|
||||
*
|
||||
* * %FGP_ACCESSED - The page will be marked accessed.
|
||||
* * %FGP_LOCK - The page is returned locked.
|
||||
* * %FGP_HEAD - If the page is present and a THP, return the head page
|
||||
* rather than the exact page specified by the index.
|
||||
* * %FGP_CREAT - If no page is present then a new page is allocated using
|
||||
* @gfp_mask and added to the page cache and the VM's LRU list.
|
||||
* The page is returned locked and with an increased refcount.
|
||||
@ -1870,12 +1867,12 @@ repeat:
|
||||
}
|
||||
|
||||
/* Has the page been truncated? */
|
||||
if (unlikely(compound_head(page)->mapping != mapping)) {
|
||||
if (unlikely(page->mapping != mapping)) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto repeat;
|
||||
}
|
||||
VM_BUG_ON_PAGE(page->index != index, page);
|
||||
VM_BUG_ON_PAGE(!thp_contains(page, index), page);
|
||||
}
|
||||
|
||||
if (page && (fgp_flags & FGP_ACCESSED))
|
||||
@ -1885,6 +1882,8 @@ repeat:
|
||||
if (page_is_idle(page))
|
||||
clear_page_idle(page);
|
||||
}
|
||||
if (!(fgp_flags & FGP_HEAD))
|
||||
page = find_subpage(page, index);
|
||||
|
||||
no_page:
|
||||
if (!page && (fgp_flags & FGP_CREAT)) {
|
||||
@ -1945,6 +1944,11 @@ EXPORT_SYMBOL(pagecache_get_page);
|
||||
* Any shadow entries of evicted pages, or swap entries from
|
||||
* shmem/tmpfs, are included in the returned array.
|
||||
*
|
||||
* If it finds a Transparent Huge Page, head or tail, find_get_entries()
|
||||
* stops at that page: the caller is likely to have a better way to handle
|
||||
* the compound page as a whole, and then skip its extent, than repeatedly
|
||||
* calling find_get_entries() to return all its tails.
|
||||
*
|
||||
* Return: the number of pages and shadow entries which were found.
|
||||
*/
|
||||
unsigned find_get_entries(struct address_space *mapping,
|
||||
@ -1976,8 +1980,15 @@ unsigned find_get_entries(struct address_space *mapping,
|
||||
/* Has the page moved or been split? */
|
||||
if (unlikely(page != xas_reload(&xas)))
|
||||
goto put_page;
|
||||
page = find_subpage(page, xas.xa_index);
|
||||
|
||||
/*
|
||||
* Terminate early on finding a THP, to allow the caller to
|
||||
* handle it all at once; but continue if this is hugetlbfs.
|
||||
*/
|
||||
if (PageTransHuge(page) && !PageHuge(page)) {
|
||||
page = find_subpage(page, xas.xa_index);
|
||||
nr_entries = ret + 1;
|
||||
}
|
||||
export:
|
||||
indices[ret] = xas.xa_index;
|
||||
entries[ret] = page;
|
||||
@ -2042,7 +2053,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
|
||||
|
||||
pages[ret] = find_subpage(page, xas.xa_index);
|
||||
if (++ret == nr_pages) {
|
||||
*start = page->index + 1;
|
||||
*start = xas.xa_index + 1;
|
||||
goto out;
|
||||
}
|
||||
continue;
|
||||
@ -2123,7 +2134,7 @@ retry:
|
||||
EXPORT_SYMBOL(find_get_pages_contig);
|
||||
|
||||
/**
|
||||
* find_get_pages_range_tag - find and return pages in given range matching @tag
|
||||
* find_get_pages_range_tag - Find and return head pages matching @tag.
|
||||
* @mapping: the address_space to search
|
||||
* @index: the starting page index
|
||||
* @end: The final page index (inclusive)
|
||||
@ -2131,8 +2142,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
|
||||
* @nr_pages: the maximum number of pages
|
||||
* @pages: where the resulting pages are placed
|
||||
*
|
||||
* Like find_get_pages, except we only return pages which are tagged with
|
||||
* @tag. We update @index to index the next page for the traversal.
|
||||
* Like find_get_pages(), except we only return head pages which are tagged
|
||||
* with @tag. @index is updated to the index immediately after the last
|
||||
* page we return, ready for the next iteration.
|
||||
*
|
||||
* Return: the number of pages which were found.
|
||||
*/
|
||||
@ -2166,9 +2178,9 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
|
||||
if (unlikely(page != xas_reload(&xas)))
|
||||
goto put_page;
|
||||
|
||||
pages[ret] = find_subpage(page, xas.xa_index);
|
||||
pages[ret] = page;
|
||||
if (++ret == nr_pages) {
|
||||
*index = page->index + 1;
|
||||
*index = page->index + thp_nr_pages(page);
|
||||
goto out;
|
||||
}
|
||||
continue;
|
||||
|
||||
4
mm/gup.c
4
mm/gup.c
@ -1921,8 +1921,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
|
||||
} while (start != end);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
if (size > (unsigned long)uaddr - start)
|
||||
return size - ((unsigned long)uaddr - start);
|
||||
if (size > start - (unsigned long)uaddr)
|
||||
return size - (start - (unsigned long)uaddr);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(fault_in_safe_writeable);
|
||||
|
||||
@ -664,9 +664,9 @@ release:
|
||||
* available
|
||||
* never: never stall for any thp allocation
|
||||
*/
|
||||
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
|
||||
gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma)
|
||||
{
|
||||
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
|
||||
const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE);
|
||||
|
||||
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
|
||||
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
|
||||
@ -749,7 +749,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
gfp = alloc_hugepage_direct_gfpmask(vma);
|
||||
gfp = vma_thp_gfp_mask(vma);
|
||||
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
|
||||
if (unlikely(!page)) {
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
|
||||
71
mm/hugetlb.c
71
mm/hugetlb.c
@ -28,6 +28,7 @@
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/numa.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/memory.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
@ -2002,6 +2003,14 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
|
||||
return page;
|
||||
}
|
||||
|
||||
void wait_for_freed_hugetlb_pages(void)
|
||||
{
|
||||
if (llist_empty(&hpage_freelist))
|
||||
return;
|
||||
|
||||
flush_work(&free_hpage_work);
|
||||
}
|
||||
|
||||
/* page migration callback function */
|
||||
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
|
||||
nodemask_t *nmask, gfp_t gfp_mask)
|
||||
@ -3061,24 +3070,8 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void __init hugetlb_sysfs_init(void)
|
||||
{
|
||||
struct hstate *h;
|
||||
int err;
|
||||
|
||||
hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
|
||||
if (!hugepages_kobj)
|
||||
return;
|
||||
|
||||
for_each_hstate(h) {
|
||||
err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
|
||||
hstate_kobjs, &hstate_attr_group);
|
||||
if (err)
|
||||
pr_err("HugeTLB: Unable to add hstate %s", h->name);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static bool hugetlb_sysfs_initialized __ro_after_init;
|
||||
|
||||
/*
|
||||
* node_hstate/s - associate per node hstate attributes, via their kobjects,
|
||||
@ -3134,7 +3127,7 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
|
||||
* Unregister hstate attributes from a single node device.
|
||||
* No-op if no hstate attributes attached.
|
||||
*/
|
||||
static void hugetlb_unregister_node(struct node *node)
|
||||
void hugetlb_unregister_node(struct node *node)
|
||||
{
|
||||
struct hstate *h;
|
||||
struct node_hstate *nhs = &node_hstates[node->dev.id];
|
||||
@ -3159,12 +3152,15 @@ static void hugetlb_unregister_node(struct node *node)
|
||||
* Register hstate attributes for a single node device.
|
||||
* No-op if attributes already registered.
|
||||
*/
|
||||
static void hugetlb_register_node(struct node *node)
|
||||
void hugetlb_register_node(struct node *node)
|
||||
{
|
||||
struct hstate *h;
|
||||
struct node_hstate *nhs = &node_hstates[node->dev.id];
|
||||
int err;
|
||||
|
||||
if (!hugetlb_sysfs_initialized)
|
||||
return;
|
||||
|
||||
if (nhs->hugepages_kobj)
|
||||
return; /* already allocated */
|
||||
|
||||
@ -3195,18 +3191,8 @@ static void __init hugetlb_register_all_nodes(void)
|
||||
{
|
||||
int nid;
|
||||
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
struct node *node = node_devices[nid];
|
||||
if (node->dev.id == nid)
|
||||
hugetlb_register_node(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Let the node device driver know we're here so it can
|
||||
* [un]register hstate attributes on node hotplug.
|
||||
*/
|
||||
register_hugetlbfs_with_node(hugetlb_register_node,
|
||||
hugetlb_unregister_node);
|
||||
for_each_online_node(nid)
|
||||
hugetlb_register_node(node_devices[nid]);
|
||||
}
|
||||
#else /* !CONFIG_NUMA */
|
||||
|
||||
@ -3222,6 +3208,28 @@ static void hugetlb_register_all_nodes(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
static void __init hugetlb_sysfs_init(void)
|
||||
{
|
||||
struct hstate *h;
|
||||
int err;
|
||||
|
||||
hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
|
||||
if (!hugepages_kobj)
|
||||
return;
|
||||
|
||||
for_each_hstate(h) {
|
||||
err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
|
||||
hstate_kobjs, &hstate_attr_group);
|
||||
if (err)
|
||||
pr_err("HugeTLB: Unable to add hstate %s", h->name);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
hugetlb_sysfs_initialized = true;
|
||||
#endif
|
||||
hugetlb_register_all_nodes();
|
||||
}
|
||||
|
||||
static int __init hugetlb_init(void)
|
||||
{
|
||||
int i;
|
||||
@ -3271,7 +3279,6 @@ static int __init hugetlb_init(void)
|
||||
report_hugepages();
|
||||
|
||||
hugetlb_sysfs_init();
|
||||
hugetlb_register_all_nodes();
|
||||
hugetlb_cgroup_file_init();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
@ -89,6 +89,9 @@ static inline void ra_submit(struct file_ra_state *ra,
|
||||
ra->start, ra->size, ra->async_size);
|
||||
}
|
||||
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t index);
|
||||
struct page *find_lock_entry(struct address_space *mapping, pgoff_t index);
|
||||
|
||||
/**
|
||||
* page_evictable - test whether a page is evictable
|
||||
* @page: the page to test
|
||||
@ -272,7 +275,11 @@ struct compact_control {
|
||||
bool proactive_compaction; /* kcompactd proactive compaction */
|
||||
bool whole_zone; /* Whole zone should/has been scanned */
|
||||
bool contended; /* Signal lock or sched contention */
|
||||
bool rescan; /* Rescanning the same pageblock */
|
||||
bool finish_pageblock; /* Scan the remainder of a pageblock. Used
|
||||
* when there are potentially transient
|
||||
* isolation or migration failures to
|
||||
* ensure forward progress.
|
||||
*/
|
||||
bool alloc_contig; /* alloc_contig_range allocation */
|
||||
};
|
||||
|
||||
|
||||
@ -1730,7 +1730,7 @@ static void collapse_file(struct mm_struct *mm,
|
||||
xas_unlock_irq(&xas);
|
||||
/* swap in or instantiate fallocated page */
|
||||
if (shmem_getpage(mapping->host, index, &page,
|
||||
SGP_NOHUGE)) {
|
||||
SGP_NOALLOC)) {
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
|
||||
21
mm/madvise.c
21
mm/madvise.c
@ -244,25 +244,28 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
struct address_space *mapping)
|
||||
{
|
||||
pgoff_t index;
|
||||
XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start));
|
||||
pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1);
|
||||
struct page *page;
|
||||
swp_entry_t swap;
|
||||
|
||||
for (; start < end; start += PAGE_SIZE) {
|
||||
index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
|
||||
rcu_read_lock();
|
||||
xas_for_each(&xas, page, end_index) {
|
||||
swp_entry_t swap;
|
||||
|
||||
page = find_get_entry(mapping, index);
|
||||
if (!xa_is_value(page)) {
|
||||
if (page)
|
||||
put_page(page);
|
||||
if (!xa_is_value(page))
|
||||
continue;
|
||||
}
|
||||
xas_pause(&xas);
|
||||
rcu_read_unlock();
|
||||
|
||||
swap = radix_to_swp_entry(page);
|
||||
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
|
||||
NULL, 0, false);
|
||||
if (page)
|
||||
put_page(page);
|
||||
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
lru_add_drain(); /* Push any new pages onto the LRU now */
|
||||
}
|
||||
|
||||
@ -2609,10 +2609,11 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
|
||||
}
|
||||
|
||||
/*
|
||||
* Scheduled by try_charge() to be executed from the userland return path
|
||||
* and reclaims memory over the high limit.
|
||||
* Reclaims memory over the high limit. Called directly from
|
||||
* try_charge() (context permitting), as well as from the userland
|
||||
* return path where reclaim is always able to block.
|
||||
*/
|
||||
void mem_cgroup_handle_over_high(void)
|
||||
void mem_cgroup_handle_over_high(gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long penalty_jiffies;
|
||||
unsigned long pflags;
|
||||
@ -2629,6 +2630,17 @@ void mem_cgroup_handle_over_high(void)
|
||||
current->memcg_nr_pages_over_high = 0;
|
||||
|
||||
retry_reclaim:
|
||||
/*
|
||||
* Bail if the task is already exiting. Unlike memory.max,
|
||||
* memory.high enforcement isn't as strict, and there is no
|
||||
* OOM killer involved, which means the excess could already
|
||||
* be much bigger (and still growing) than it could for
|
||||
* memory.max; the dying task could get stuck in fruitless
|
||||
* reclaim for a long time, which isn't desirable.
|
||||
*/
|
||||
if (task_is_dying())
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* The allocating task should reclaim at least the batch size, but for
|
||||
* subsequent retries we only want to do what's necessary to prevent oom
|
||||
@ -2640,7 +2652,7 @@ retry_reclaim:
|
||||
*/
|
||||
nr_reclaimed = reclaim_high(memcg,
|
||||
in_retry ? SWAP_CLUSTER_MAX : nr_pages,
|
||||
GFP_KERNEL);
|
||||
gfp_mask);
|
||||
|
||||
/*
|
||||
* memory.high is breached and reclaim is unable to keep up. Throttle
|
||||
@ -2679,6 +2691,9 @@ retry_reclaim:
|
||||
}
|
||||
|
||||
/*
|
||||
* Reclaim didn't manage to push usage below the limit, slow
|
||||
* this allocating task down.
|
||||
*
|
||||
* If we exit early, we're guaranteed to die (since
|
||||
* schedule_timeout_killable sets TASK_KILLABLE). This means we don't
|
||||
* need to account for any ill-begotten jiffies to pay them off later.
|
||||
@ -2864,11 +2879,17 @@ done_restock:
|
||||
}
|
||||
} while ((memcg = parent_mem_cgroup(memcg)));
|
||||
|
||||
/*
|
||||
* Reclaim is set up above to be called from the userland
|
||||
* return path. But also attempt synchronous reclaim to avoid
|
||||
* excessive overrun while the task is still inside the
|
||||
* kernel. If this is successful, the return path will see it
|
||||
* when it rechecks the overage and simply bail out.
|
||||
*/
|
||||
if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
|
||||
!(current->flags & PF_MEMALLOC) &&
|
||||
gfpflags_allow_blocking(gfp_mask)) {
|
||||
mem_cgroup_handle_over_high();
|
||||
}
|
||||
gfpflags_allow_blocking(gfp_mask))
|
||||
mem_cgroup_handle_over_high(gfp_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4677,7 +4698,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
||||
struct mem_cgroup *parent;
|
||||
|
||||
mem_cgroup_flush_stats();
|
||||
mem_cgroup_flush_stats_delayed();
|
||||
|
||||
*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
|
||||
*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
|
||||
@ -5548,7 +5569,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (unlikely(mem_cgroup_is_root(memcg)))
|
||||
if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
|
||||
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
|
||||
2UL*HZ);
|
||||
|
||||
@ -5850,35 +5871,15 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
||||
static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t ptent, swp_entry_t *entry)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
struct address_space *mapping;
|
||||
pgoff_t pgoff;
|
||||
|
||||
if (!vma->vm_file) /* anonymous vma */
|
||||
return NULL;
|
||||
if (!(mc.flags & MOVE_FILE))
|
||||
return NULL;
|
||||
|
||||
mapping = vma->vm_file->f_mapping;
|
||||
pgoff = linear_page_index(vma, addr);
|
||||
|
||||
/* page is moved even if it's not RSS of this task(page-faulted). */
|
||||
#ifdef CONFIG_SWAP
|
||||
/* shmem/tmpfs may report page out on swap: account for that too. */
|
||||
if (shmem_mapping(mapping)) {
|
||||
page = find_get_entry(mapping, pgoff);
|
||||
if (xa_is_value(page)) {
|
||||
swp_entry_t swp = radix_to_swp_entry(page);
|
||||
*entry = swp;
|
||||
page = find_get_page(swap_address_space(swp),
|
||||
swp_offset(swp));
|
||||
}
|
||||
} else
|
||||
page = find_get_page(mapping, pgoff);
|
||||
#else
|
||||
page = find_get_page(mapping, pgoff);
|
||||
#endif
|
||||
return page;
|
||||
return find_get_incore_page(vma->vm_file->f_mapping,
|
||||
linear_page_index(vma, addr));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -6774,6 +6775,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
||||
return err;
|
||||
|
||||
while (nr_reclaimed < nr_to_reclaim) {
|
||||
/* Will converge on zero, but reclaim enforces a minimum */
|
||||
unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
|
||||
unsigned long reclaimed;
|
||||
|
||||
if (signal_pending(current))
|
||||
@ -6788,8 +6791,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
||||
lru_add_drain_all();
|
||||
|
||||
reclaimed = try_to_free_mem_cgroup_pages(memcg,
|
||||
min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
|
||||
GFP_KERNEL, true);
|
||||
batch_size, GFP_KERNEL, true);
|
||||
|
||||
if (!reclaimed && !nr_retries--)
|
||||
return -EAGAIN;
|
||||
|
||||
@ -1973,16 +1973,6 @@ static int soft_offline_in_use_page(struct page *page)
|
||||
return __soft_offline_page(page);
|
||||
}
|
||||
|
||||
static int soft_offline_free_page(struct page *page)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (!page_handle_poison(page, true, false))
|
||||
rc = -EBUSY;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void put_ref_page(struct page *page)
|
||||
{
|
||||
if (page)
|
||||
@ -2045,10 +2035,13 @@ retry:
|
||||
if (ret > 0) {
|
||||
ret = soft_offline_in_use_page(page);
|
||||
} else if (ret == 0) {
|
||||
if (soft_offline_free_page(page) && try_again) {
|
||||
try_again = false;
|
||||
flags &= ~MF_COUNT_INCREASED;
|
||||
goto retry;
|
||||
if (!page_handle_poison(page, true, false)) {
|
||||
if (try_again) {
|
||||
try_again = false;
|
||||
flags &= ~MF_COUNT_INCREASED;
|
||||
goto retry;
|
||||
}
|
||||
ret = -EBUSY;
|
||||
}
|
||||
} else if (ret == -EIO) {
|
||||
pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n",
|
||||
|
||||
@ -2235,11 +2235,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
||||
if (fn) {
|
||||
do {
|
||||
if (create || !pte_none(*pte)) {
|
||||
err = fn(pte++, addr, data);
|
||||
err = fn(pte, addr, data);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
}
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
|
||||
@ -1290,7 +1290,7 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
|
||||
*/
|
||||
if (HPageMigratable(head))
|
||||
goto found;
|
||||
skip = compound_nr(head) - (page - head);
|
||||
skip = compound_nr(head) - (pfn - page_to_pfn(head));
|
||||
pfn += skip - 1;
|
||||
}
|
||||
return -ENOENT;
|
||||
|
||||
@ -2986,8 +2986,9 @@ out:
|
||||
* @pol: pointer to mempolicy to be formatted
|
||||
*
|
||||
* Convert @pol into a string. If @buffer is too short, truncate the string.
|
||||
* Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
|
||||
* longest flag, "relative", and to display at least a few node ids.
|
||||
* Recommend a @maxlen of at least 51 for the longest mode, "weighted
|
||||
* interleave", plus the longest flag flags, "relative|balancing", and to
|
||||
* display at least a few node ids.
|
||||
*/
|
||||
void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
{
|
||||
@ -2996,7 +2997,10 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
unsigned short mode = MPOL_DEFAULT;
|
||||
unsigned short flags = 0;
|
||||
|
||||
if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {
|
||||
if (pol &&
|
||||
pol != &default_policy &&
|
||||
!(pol >= &preferred_node_policy[0] &&
|
||||
pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) {
|
||||
mode = pol->mode;
|
||||
flags = pol->flags;
|
||||
}
|
||||
@ -3023,12 +3027,18 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
|
||||
p += snprintf(p, buffer + maxlen - p, "=");
|
||||
|
||||
/*
|
||||
* Currently, the only defined flags are mutually exclusive
|
||||
* Static and relative are mutually exclusive.
|
||||
*/
|
||||
if (flags & MPOL_F_STATIC_NODES)
|
||||
p += snprintf(p, buffer + maxlen - p, "static");
|
||||
else if (flags & MPOL_F_RELATIVE_NODES)
|
||||
p += snprintf(p, buffer + maxlen - p, "relative");
|
||||
|
||||
if (flags & MPOL_F_NUMA_BALANCING) {
|
||||
if (!is_power_of_2(flags & MPOL_MODE_FLAGS))
|
||||
p += snprintf(p, buffer + maxlen - p, "|");
|
||||
p += snprintf(p, buffer + maxlen - p, "balancing");
|
||||
}
|
||||
}
|
||||
|
||||
if (!nodes_empty(nodes))
|
||||
|
||||
28
mm/mincore.c
28
mm/mincore.c
@ -48,7 +48,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
|
||||
* and is up to date; i.e. that no page-in operation would be required
|
||||
* at this time if an application were to map and access this page.
|
||||
*/
|
||||
static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
|
||||
static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
unsigned char present = 0;
|
||||
struct page *page;
|
||||
@ -59,31 +59,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
|
||||
* any other file mapping (ie. marked !present and faulted in with
|
||||
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
|
||||
*/
|
||||
#ifdef CONFIG_SWAP
|
||||
if (shmem_mapping(mapping)) {
|
||||
page = find_get_entry(mapping, pgoff);
|
||||
/*
|
||||
* shmem/tmpfs may return swap: account for swapcache
|
||||
* page too.
|
||||
*/
|
||||
if (xa_is_value(page)) {
|
||||
swp_entry_t swp = radix_to_swp_entry(page);
|
||||
struct swap_info_struct *si;
|
||||
|
||||
/* Prevent swap device to being swapoff under us */
|
||||
si = get_swap_device(swp);
|
||||
if (si) {
|
||||
page = find_get_page(swap_address_space(swp),
|
||||
swp_offset(swp));
|
||||
put_swap_device(si);
|
||||
} else
|
||||
page = NULL;
|
||||
}
|
||||
} else
|
||||
page = find_get_page(mapping, pgoff);
|
||||
#else
|
||||
page = find_get_page(mapping, pgoff);
|
||||
#endif
|
||||
page = find_get_incore_page(mapping, index);
|
||||
if (page) {
|
||||
present = PageUptodate(page);
|
||||
put_page(page);
|
||||
|
||||
@ -2132,6 +2132,14 @@ void __init page_alloc_init_late(void)
|
||||
/* Block until all are initialised */
|
||||
wait_for_completion(&pgdat_init_all_done_comp);
|
||||
|
||||
/*
|
||||
* The number of managed pages has changed due to the initialisation
|
||||
* so the pcpu batch and high limits needs to be updated or the limits
|
||||
* will be artificially small.
|
||||
*/
|
||||
for_each_populated_zone(zone)
|
||||
zone_pcp_update(zone);
|
||||
|
||||
/*
|
||||
* We initialized the rest of the deferred pages. Permanently disable
|
||||
* on-demand struct page initialization.
|
||||
@ -8892,7 +8900,6 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
|
||||
}
|
||||
EXPORT_SYMBOL(free_contig_range);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* The zone indicated has a new number of managed_pages; batch sizes and percpu
|
||||
* page high values need to be recalculated.
|
||||
@ -8903,7 +8910,6 @@ void __meminit zone_pcp_update(struct zone *zone)
|
||||
zone_set_pageset_high_and_batch(zone);
|
||||
mutex_unlock(&pcp_batch_high_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Effectively disable pcplists for the zone by setting the high limit to 0
|
||||
@ -9034,6 +9040,7 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
|
||||
next_page = page;
|
||||
current_buddy = page + size;
|
||||
}
|
||||
page = next_page;
|
||||
|
||||
if (set_page_guard(zone, current_buddy, high, migratetype))
|
||||
continue;
|
||||
@ -9041,7 +9048,6 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
|
||||
if (current_buddy != target) {
|
||||
add_to_free_list(current_buddy, zone, high, migratetype);
|
||||
set_buddy_order(current_buddy, high);
|
||||
page = next_page;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -282,6 +282,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
|
||||
struct page *page;
|
||||
struct zone *zone;
|
||||
|
||||
/*
|
||||
* Due to the deferred freeing of hugetlb folios, the hugepage folios may
|
||||
* not immediately release to the buddy system. This can cause PageBuddy()
|
||||
* to fail in __test_page_isolated_in_pageblock(). To ensure that the
|
||||
* hugetlb folios are properly released back to the buddy system, we
|
||||
* invoke the wait_for_freed_hugetlb_folios() function to wait for the
|
||||
* release to complete.
|
||||
*/
|
||||
wait_for_freed_hugetlb_pages();
|
||||
|
||||
/*
|
||||
* Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
|
||||
* are not aligned to pageblock_nr_pages.
|
||||
|
||||
@ -319,7 +319,8 @@ int page_reporting_register(struct page_reporting_dev_info *prdev)
|
||||
mutex_lock(&page_reporting_mutex);
|
||||
|
||||
/* nothing to do if already in use */
|
||||
if (rcu_access_pointer(pr_dev_info)) {
|
||||
if (rcu_dereference_protected(pr_dev_info,
|
||||
lockdep_is_held(&page_reporting_mutex))) {
|
||||
err = -EBUSY;
|
||||
goto err_out;
|
||||
}
|
||||
@ -350,7 +351,8 @@ void page_reporting_unregister(struct page_reporting_dev_info *prdev)
|
||||
{
|
||||
mutex_lock(&page_reporting_mutex);
|
||||
|
||||
if (rcu_access_pointer(pr_dev_info) == prdev) {
|
||||
if (prdev == rcu_dereference_protected(pr_dev_info,
|
||||
lockdep_is_held(&page_reporting_mutex))) {
|
||||
/* Disable page reporting notification */
|
||||
RCU_INIT_POINTER(pr_dev_info, NULL);
|
||||
synchronize_rcu();
|
||||
|
||||
@ -1741,7 +1741,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||
gfp = current_gfp_context(gfp);
|
||||
/* whitelisted flags that can be passed to the backing allocators */
|
||||
pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
|
||||
is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
|
||||
is_atomic = !gfpflags_allow_blocking(gfp);
|
||||
do_warn = !(gfp & __GFP_NOWARN);
|
||||
|
||||
/*
|
||||
@ -2243,7 +2243,12 @@ static void pcpu_balance_workfn(struct work_struct *work)
|
||||
* to grow other chunks. This then gives pcpu_reclaim_populated() time
|
||||
* to move fully free chunks to the active list to be freed if
|
||||
* appropriate.
|
||||
*
|
||||
* Enforce GFP_NOIO allocations because we have pcpu_alloc users
|
||||
* constrained to GFP_NOIO/NOFS contexts and they could form lock
|
||||
* dependency through pcpu_alloc_mutex
|
||||
*/
|
||||
unsigned int flags = memalloc_noio_save();
|
||||
mutex_lock(&pcpu_alloc_mutex);
|
||||
spin_lock_irq(&pcpu_lock);
|
||||
|
||||
@ -2254,6 +2259,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
|
||||
|
||||
spin_unlock_irq(&pcpu_lock);
|
||||
mutex_unlock(&pcpu_alloc_mutex);
|
||||
memalloc_noio_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -673,7 +673,8 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
|
||||
!S_ISREG(file_inode(f.file)->i_mode))
|
||||
(!S_ISREG(file_inode(f.file)->i_mode) &&
|
||||
!S_ISBLK(file_inode(f.file)->i_mode)))
|
||||
goto out;
|
||||
|
||||
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
|
||||
|
||||
392
mm/shmem.c
392
mm/shmem.c
@ -413,7 +413,38 @@ static bool shmem_confirm_swap(struct address_space *mapping,
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/* ifdef here to avoid bloating shmem.o when not necessary */
|
||||
|
||||
static int shmem_huge __read_mostly;
|
||||
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
|
||||
|
||||
bool shmem_is_huge(struct vm_area_struct *vma,
|
||||
struct inode *inode, pgoff_t index)
|
||||
{
|
||||
loff_t i_size;
|
||||
|
||||
if (shmem_huge == SHMEM_HUGE_DENY)
|
||||
return false;
|
||||
if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
|
||||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
|
||||
return false;
|
||||
if (shmem_huge == SHMEM_HUGE_FORCE)
|
||||
return true;
|
||||
|
||||
switch (SHMEM_SB(inode->i_sb)->huge) {
|
||||
case SHMEM_HUGE_ALWAYS:
|
||||
return true;
|
||||
case SHMEM_HUGE_WITHIN_SIZE:
|
||||
index = round_up(index, HPAGE_PMD_NR);
|
||||
i_size = round_up(i_size_read(inode), PAGE_SIZE);
|
||||
if (i_size >= HPAGE_PMD_SIZE && (i_size >> PAGE_SHIFT) >= index)
|
||||
return true;
|
||||
/* fall through */
|
||||
case SHMEM_HUGE_ADVISE:
|
||||
if (vma && (vma->vm_flags & VM_HUGEPAGE))
|
||||
return true;
|
||||
fallthrough;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
|
||||
static int shmem_parse_huge(const char *str)
|
||||
@ -587,6 +618,12 @@ static long shmem_unused_huge_count(struct super_block *sb,
|
||||
|
||||
#define shmem_huge SHMEM_HUGE_DENY
|
||||
|
||||
bool shmem_is_huge(struct vm_area_struct *vma,
|
||||
struct inode *inode, pgoff_t index)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||
struct shrink_control *sc, unsigned long nr_to_split)
|
||||
{
|
||||
@ -594,15 +631,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
|
||||
(shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
|
||||
shmem_huge != SHMEM_HUGE_DENY)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like add_to_page_cache_locked, but error if expected item has gone.
|
||||
*/
|
||||
@ -786,7 +814,6 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma)
|
||||
void shmem_unlock_mapping(struct address_space *mapping)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
pgoff_t indices[PAGEVEC_SIZE];
|
||||
pgoff_t index = 0;
|
||||
|
||||
pagevec_init(&pvec);
|
||||
@ -794,22 +821,40 @@ void shmem_unlock_mapping(struct address_space *mapping)
|
||||
* Minor point, but we might as well stop if someone else SHM_LOCKs it.
|
||||
*/
|
||||
while (!mapping_unevictable(mapping)) {
|
||||
/*
|
||||
* Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
|
||||
* has finished, if it hits a row of PAGEVEC_SIZE swap entries.
|
||||
*/
|
||||
pvec.nr = find_get_entries(mapping, index,
|
||||
PAGEVEC_SIZE, pvec.pages, indices);
|
||||
if (!pvec.nr)
|
||||
if (!pagevec_lookup(&pvec, mapping, &index))
|
||||
break;
|
||||
index = indices[pvec.nr - 1] + 1;
|
||||
pagevec_remove_exceptionals(&pvec);
|
||||
check_move_unevictable_pages(&pvec);
|
||||
pagevec_release(&pvec);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a hole-punch or truncation needs to split a huge page,
|
||||
* returning true if no split was required, or the split has been successful.
|
||||
*
|
||||
* Eviction (or truncation to 0 size) should never need to split a huge page;
|
||||
* but in rare cases might do so, if shmem_undo_range() failed to trylock on
|
||||
* head, and then succeeded to trylock on tail.
|
||||
*
|
||||
* A split can only succeed when there are no additional references on the
|
||||
* huge page: so the split below relies upon find_get_entries() having stopped
|
||||
* when it found a subpage of the huge page, without getting further references.
|
||||
*/
|
||||
static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
|
||||
{
|
||||
if (!PageTransCompound(page))
|
||||
return true;
|
||||
|
||||
/* Just proceed to delete a huge page wholly within the range punched */
|
||||
if (PageHead(page) &&
|
||||
page->index >= start && page->index + HPAGE_PMD_NR <= end)
|
||||
return true;
|
||||
|
||||
/* Try to split huge page, so we can truly punch the hole or truncate */
|
||||
return split_huge_page(page) >= 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove range of pages and swap entries from page cache, and free them.
|
||||
* If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
|
||||
@ -863,31 +908,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||
if (!trylock_page(page))
|
||||
continue;
|
||||
|
||||
if (PageTransTail(page)) {
|
||||
/* Middle of THP: zero out the page */
|
||||
clear_highpage(page);
|
||||
unlock_page(page);
|
||||
continue;
|
||||
} else if (PageTransHuge(page)) {
|
||||
if (index == round_down(end, HPAGE_PMD_NR)) {
|
||||
/*
|
||||
* Range ends in the middle of THP:
|
||||
* zero out the page
|
||||
*/
|
||||
clear_highpage(page);
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
index += HPAGE_PMD_NR - 1;
|
||||
i += HPAGE_PMD_NR - 1;
|
||||
}
|
||||
|
||||
if (!unfalloc || !PageUptodate(page)) {
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
if (page_mapping(page) == mapping) {
|
||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||
if ((!unfalloc || !PageUptodate(page)) &&
|
||||
page_mapping(page) == mapping) {
|
||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||
if (shmem_punch_compound(page, start, end))
|
||||
truncate_inode_page(mapping, page);
|
||||
}
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
@ -961,43 +986,25 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
||||
|
||||
lock_page(page);
|
||||
|
||||
if (PageTransTail(page)) {
|
||||
/* Middle of THP: zero out the page */
|
||||
clear_highpage(page);
|
||||
unlock_page(page);
|
||||
/*
|
||||
* Partial thp truncate due 'start' in middle
|
||||
* of THP: don't need to look on these pages
|
||||
* again on !pvec.nr restart.
|
||||
*/
|
||||
if (index != round_down(end, HPAGE_PMD_NR))
|
||||
start++;
|
||||
continue;
|
||||
} else if (PageTransHuge(page)) {
|
||||
if (index == round_down(end, HPAGE_PMD_NR)) {
|
||||
/*
|
||||
* Range ends in the middle of THP:
|
||||
* zero out the page
|
||||
*/
|
||||
clear_highpage(page);
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
index += HPAGE_PMD_NR - 1;
|
||||
i += HPAGE_PMD_NR - 1;
|
||||
}
|
||||
|
||||
if (!unfalloc || !PageUptodate(page)) {
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
if (page_mapping(page) == mapping) {
|
||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||
truncate_inode_page(mapping, page);
|
||||
} else {
|
||||
if (page_mapping(page) != mapping) {
|
||||
/* Page was replaced by swap: retry */
|
||||
unlock_page(page);
|
||||
index--;
|
||||
break;
|
||||
}
|
||||
VM_BUG_ON_PAGE(PageWriteback(page), page);
|
||||
if (shmem_punch_compound(page, start, end))
|
||||
truncate_inode_page(mapping, page);
|
||||
else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
/* Wipe the page and don't get stuck */
|
||||
clear_highpage(page);
|
||||
flush_dcache_page(page);
|
||||
set_page_dirty(page);
|
||||
if (index <
|
||||
round_up(start, HPAGE_PMD_NR))
|
||||
start = index + 1;
|
||||
}
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
@ -1024,7 +1031,6 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
|
||||
{
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
|
||||
|
||||
if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
|
||||
spin_lock_irq(&info->lock);
|
||||
@ -1033,7 +1039,7 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
|
||||
}
|
||||
generic_fillattr(inode, stat);
|
||||
|
||||
if (is_huge_enabled(sb_info))
|
||||
if (shmem_is_huge(NULL, inode, 0))
|
||||
stat->blksize = HPAGE_PMD_SIZE;
|
||||
|
||||
return 0;
|
||||
@ -1043,7 +1049,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||
int error;
|
||||
|
||||
error = setattr_prepare(dentry, attr);
|
||||
@ -1079,24 +1084,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
if (oldsize > holebegin)
|
||||
unmap_mapping_range(inode->i_mapping,
|
||||
holebegin, 0, 1);
|
||||
|
||||
/*
|
||||
* Part of the huge page can be beyond i_size: subject
|
||||
* to shrink under memory pressure.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
spin_lock(&sbinfo->shrinklist_lock);
|
||||
/*
|
||||
* _careful to defend against unlocked access to
|
||||
* ->shrink_list in shmem_unused_huge_shrink()
|
||||
*/
|
||||
if (list_empty_careful(&info->shrinklist)) {
|
||||
list_add_tail(&info->shrinklist,
|
||||
&sbinfo->shrinklist);
|
||||
sbinfo->shrinklist_len++;
|
||||
}
|
||||
spin_unlock(&sbinfo->shrinklist_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1281,6 +1268,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
|
||||
return 0;
|
||||
|
||||
mutex_lock(&shmem_swaplist_mutex);
|
||||
start_over:
|
||||
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
|
||||
if (!info->swapped) {
|
||||
list_del_init(&info->swaplist);
|
||||
@ -1300,13 +1288,15 @@ int shmem_unuse(unsigned int type, bool frontswap,
|
||||
cond_resched();
|
||||
|
||||
mutex_lock(&shmem_swaplist_mutex);
|
||||
next = list_next_entry(info, swaplist);
|
||||
if (!info->swapped)
|
||||
list_del_init(&info->swaplist);
|
||||
if (atomic_dec_and_test(&info->stop_eviction))
|
||||
wake_up_var(&info->stop_eviction);
|
||||
if (error)
|
||||
break;
|
||||
if (list_empty(&info->swaplist))
|
||||
goto start_over;
|
||||
next = list_next_entry(info, swaplist);
|
||||
if (!info->swapped)
|
||||
list_del_init(&info->swaplist);
|
||||
}
|
||||
mutex_unlock(&shmem_swaplist_mutex);
|
||||
|
||||
@ -1324,7 +1314,19 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
||||
swp_entry_t swap;
|
||||
pgoff_t index;
|
||||
|
||||
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||
/*
|
||||
* If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
|
||||
* "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
|
||||
* and its shmem_writeback() needs them to be split when swapping.
|
||||
*/
|
||||
if (PageTransCompound(page)) {
|
||||
/* Ensure the subpages are still dirty */
|
||||
SetPageDirty(page);
|
||||
if (split_huge_page(page) < 0)
|
||||
goto redirty;
|
||||
ClearPageDirty(page);
|
||||
}
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
mapping = page->mapping;
|
||||
index = page->index;
|
||||
@ -1492,6 +1494,30 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
|
||||
return page;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure huge_gfp is always more limited than limit_gfp.
|
||||
* Some of the flags set permissions, while others set limitations.
|
||||
*/
|
||||
static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
|
||||
{
|
||||
gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
|
||||
gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
|
||||
gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
|
||||
gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
|
||||
|
||||
/* Allow allocations only from the originally specified zones. */
|
||||
result |= zoneflags;
|
||||
|
||||
/*
|
||||
* Minimize the result gfp by taking the union with the deny flags,
|
||||
* and the intersection of the allow flags.
|
||||
*/
|
||||
result |= (limit_gfp & denyflags);
|
||||
result |= (huge_gfp & limit_gfp) & allowflags;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static struct page *shmem_alloc_hugepage(gfp_t gfp,
|
||||
struct shmem_inode_info *info, pgoff_t index)
|
||||
{
|
||||
@ -1506,8 +1532,8 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
|
||||
return NULL;
|
||||
|
||||
shmem_pseudo_vma_init(&pvma, info, hindex);
|
||||
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
|
||||
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
|
||||
page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
|
||||
true);
|
||||
shmem_pseudo_vma_destroy(&pvma);
|
||||
if (page)
|
||||
prep_transhuge_page(page);
|
||||
@ -1755,16 +1781,14 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
||||
struct shmem_sb_info *sbinfo;
|
||||
struct mm_struct *charge_mm;
|
||||
struct page *page;
|
||||
enum sgp_type sgp_huge = sgp;
|
||||
pgoff_t hindex = index;
|
||||
gfp_t huge_gfp;
|
||||
int error;
|
||||
int once = 0;
|
||||
int alloced = 0;
|
||||
|
||||
if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
|
||||
return -EFBIG;
|
||||
if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
|
||||
sgp = SGP_CACHE;
|
||||
repeat:
|
||||
if (sgp <= SGP_CACHE &&
|
||||
((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
|
||||
@ -1785,25 +1809,31 @@ repeat:
|
||||
return error;
|
||||
}
|
||||
|
||||
if (page && sgp == SGP_WRITE)
|
||||
mark_page_accessed(page);
|
||||
|
||||
/* fallocated page? */
|
||||
if (page && !PageUptodate(page)) {
|
||||
if (page) {
|
||||
hindex = page->index;
|
||||
if (sgp == SGP_WRITE)
|
||||
mark_page_accessed(page);
|
||||
if (PageUptodate(page))
|
||||
goto out;
|
||||
/* fallocated page */
|
||||
if (sgp != SGP_READ)
|
||||
goto clear;
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
page = NULL;
|
||||
}
|
||||
if (page || sgp == SGP_READ) {
|
||||
*pagep = page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast cache lookup did not find it:
|
||||
* bring it back from swap or allocate.
|
||||
* SGP_READ: succeed on hole, with NULL page, letting caller zero.
|
||||
* SGP_NOALLOC: fail on hole, with NULL page, letting caller fail.
|
||||
*/
|
||||
*pagep = NULL;
|
||||
if (sgp == SGP_READ)
|
||||
return 0;
|
||||
if (sgp == SGP_NOALLOC)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Fast cache lookup and swap lookup did not find it: allocate.
|
||||
*/
|
||||
|
||||
if (vma && userfaultfd_missing(vma)) {
|
||||
@ -1811,34 +1841,15 @@ repeat:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* shmem_symlink() */
|
||||
if (mapping->a_ops != &shmem_aops)
|
||||
/* Never use a huge page for shmem_symlink() */
|
||||
if (S_ISLNK(inode->i_mode))
|
||||
goto alloc_nohuge;
|
||||
if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
|
||||
if (!shmem_is_huge(vma, inode, index))
|
||||
goto alloc_nohuge;
|
||||
if (shmem_huge == SHMEM_HUGE_FORCE)
|
||||
goto alloc_huge;
|
||||
switch (sbinfo->huge) {
|
||||
loff_t i_size;
|
||||
pgoff_t off;
|
||||
case SHMEM_HUGE_NEVER:
|
||||
goto alloc_nohuge;
|
||||
case SHMEM_HUGE_WITHIN_SIZE:
|
||||
off = round_up(index, HPAGE_PMD_NR);
|
||||
i_size = round_up(i_size_read(inode), PAGE_SIZE);
|
||||
if (i_size >= HPAGE_PMD_SIZE &&
|
||||
i_size >> PAGE_SHIFT >= off)
|
||||
goto alloc_huge;
|
||||
/* fallthrough */
|
||||
case SHMEM_HUGE_ADVISE:
|
||||
if (sgp_huge == SGP_HUGE)
|
||||
goto alloc_huge;
|
||||
/* TODO: implement fadvise() hints */
|
||||
goto alloc_nohuge;
|
||||
}
|
||||
|
||||
alloc_huge:
|
||||
page = shmem_alloc_and_acct_page(gfp, inode, index, true);
|
||||
huge_gfp = vma_thp_gfp_mask(vma);
|
||||
huge_gfp = limit_gfp_mask(huge_gfp, gfp);
|
||||
page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
|
||||
if (IS_ERR(page)) {
|
||||
alloc_nohuge:
|
||||
page = shmem_alloc_and_acct_page(gfp, inode,
|
||||
@ -1921,14 +1932,13 @@ clear:
|
||||
* it now, lest undo on failure cancel our earlier guarantee.
|
||||
*/
|
||||
if (sgp != SGP_WRITE && !PageUptodate(page)) {
|
||||
struct page *head = compound_head(page);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < compound_nr(head); i++) {
|
||||
clear_highpage(head + i);
|
||||
flush_dcache_page(head + i);
|
||||
for (i = 0; i < compound_nr(page); i++) {
|
||||
clear_highpage(page + i);
|
||||
flush_dcache_page(page + i);
|
||||
}
|
||||
SetPageUptodate(head);
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
/* Perhaps the file has been truncated since we checked */
|
||||
@ -1944,6 +1954,7 @@ clear:
|
||||
error = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
out:
|
||||
*pagep = page + index - hindex;
|
||||
return 0;
|
||||
|
||||
@ -1991,7 +2002,6 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
|
||||
enum sgp_type sgp;
|
||||
int err;
|
||||
vm_fault_t ret = VM_FAULT_LOCKED;
|
||||
|
||||
@ -2054,15 +2064,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
sgp = SGP_CACHE;
|
||||
|
||||
if ((vma->vm_flags & VM_NOHUGEPAGE) ||
|
||||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
|
||||
sgp = SGP_NOHUGE;
|
||||
else if (vma->vm_flags & VM_HUGEPAGE)
|
||||
sgp = SGP_HUGE;
|
||||
|
||||
err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
|
||||
err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
|
||||
gfp, vma, vmf, &ret);
|
||||
if (err)
|
||||
return vmf_error(err);
|
||||
@ -3138,12 +3140,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
|
||||
|
||||
error = security_inode_init_security(inode, dir, &dentry->d_name,
|
||||
shmem_initxattrs, NULL);
|
||||
if (error) {
|
||||
if (error != -EOPNOTSUPP) {
|
||||
iput(inode);
|
||||
return error;
|
||||
}
|
||||
error = 0;
|
||||
if (error && error != -EOPNOTSUPP) {
|
||||
iput(inode);
|
||||
return error;
|
||||
}
|
||||
|
||||
inode->i_size = len-1;
|
||||
@ -3441,6 +3440,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
|
||||
unsigned long long size;
|
||||
char *rest;
|
||||
int opt;
|
||||
kuid_t kuid;
|
||||
kgid_t kgid;
|
||||
|
||||
opt = fs_parse(fc, shmem_fs_parameters, param, &result);
|
||||
if (opt < 0)
|
||||
@ -3476,14 +3477,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
|
||||
ctx->mode = result.uint_32 & 07777;
|
||||
break;
|
||||
case Opt_uid:
|
||||
ctx->uid = make_kuid(current_user_ns(), result.uint_32);
|
||||
if (!uid_valid(ctx->uid))
|
||||
kuid = make_kuid(current_user_ns(), result.uint_32);
|
||||
if (!uid_valid(kuid))
|
||||
goto bad_value;
|
||||
|
||||
/*
|
||||
* The requested uid must be representable in the
|
||||
* filesystem's idmapping.
|
||||
*/
|
||||
if (!kuid_has_mapping(fc->user_ns, kuid))
|
||||
goto bad_value;
|
||||
|
||||
ctx->uid = kuid;
|
||||
break;
|
||||
case Opt_gid:
|
||||
ctx->gid = make_kgid(current_user_ns(), result.uint_32);
|
||||
if (!gid_valid(ctx->gid))
|
||||
kgid = make_kgid(current_user_ns(), result.uint_32);
|
||||
if (!gid_valid(kgid))
|
||||
goto bad_value;
|
||||
|
||||
/*
|
||||
* The requested gid must be representable in the
|
||||
* filesystem's idmapping.
|
||||
*/
|
||||
if (!kgid_has_mapping(fc->user_ns, kgid))
|
||||
goto bad_value;
|
||||
|
||||
ctx->gid = kgid;
|
||||
break;
|
||||
case Opt_huge:
|
||||
ctx->huge = result.uint_32;
|
||||
@ -3953,7 +3972,7 @@ int __init shmem_init(void)
|
||||
if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
|
||||
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
|
||||
else
|
||||
shmem_huge = 0; /* just in case it was patched */
|
||||
shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
@ -4019,43 +4038,6 @@ struct kobj_attribute shmem_enabled_attr =
|
||||
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
bool shmem_huge_enabled(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||
loff_t i_size;
|
||||
pgoff_t off;
|
||||
|
||||
if ((vma->vm_flags & VM_NOHUGEPAGE) ||
|
||||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
|
||||
return false;
|
||||
if (shmem_huge == SHMEM_HUGE_FORCE)
|
||||
return true;
|
||||
if (shmem_huge == SHMEM_HUGE_DENY)
|
||||
return false;
|
||||
switch (sbinfo->huge) {
|
||||
case SHMEM_HUGE_NEVER:
|
||||
return false;
|
||||
case SHMEM_HUGE_ALWAYS:
|
||||
return true;
|
||||
case SHMEM_HUGE_WITHIN_SIZE:
|
||||
off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
|
||||
i_size = round_up(i_size_read(inode), PAGE_SIZE);
|
||||
if (i_size >= HPAGE_PMD_SIZE &&
|
||||
i_size >> PAGE_SHIFT >= off)
|
||||
return true;
|
||||
/* fall through */
|
||||
case SHMEM_HUGE_ADVISE:
|
||||
/* TODO: implement fadvise() hints */
|
||||
return (vma->vm_flags & VM_HUGEPAGE);
|
||||
default:
|
||||
VM_BUG_ON(1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#else /* !CONFIG_SHMEM */
|
||||
|
||||
/*
|
||||
|
||||
@ -748,21 +748,23 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
|
||||
|
||||
size_t kmalloc_size_roundup(size_t size)
|
||||
{
|
||||
struct kmem_cache *c;
|
||||
if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
|
||||
/*
|
||||
* The flags don't matter since size_index is common to all.
|
||||
* Neither does the caller for just getting ->object_size.
|
||||
*/
|
||||
return kmalloc_slab(size, GFP_KERNEL)->object_size;
|
||||
}
|
||||
|
||||
/* Short-circuit the 0 size case. */
|
||||
if (unlikely(size == 0))
|
||||
return 0;
|
||||
/* Short-circuit saturated "too-large" case. */
|
||||
if (unlikely(size == SIZE_MAX))
|
||||
return SIZE_MAX;
|
||||
/* Above the smaller buckets, size is a multiple of page size. */
|
||||
if (size > KMALLOC_MAX_CACHE_SIZE)
|
||||
if (size && size <= KMALLOC_MAX_SIZE)
|
||||
return PAGE_SIZE << get_order(size);
|
||||
|
||||
/* The flags don't matter since size_index is common to all. */
|
||||
c = kmalloc_slab(size, GFP_KERNEL);
|
||||
return c ? c->object_size : 0;
|
||||
/*
|
||||
* Return 'size' for 0 - kmalloc() returns ZERO_SIZE_PTR
|
||||
* and very large size - kmalloc() may fail.
|
||||
*/
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_size_roundup);
|
||||
|
||||
|
||||
@ -969,6 +969,10 @@ void __pagevec_lru_add(struct pagevec *pvec)
|
||||
* ascending indexes. There may be holes in the indices due to
|
||||
* not-present entries.
|
||||
*
|
||||
* Only one subpage of a Transparent Huge Page is returned in one call:
|
||||
* allowing truncate_inode_pages_range() to evict the whole THP without
|
||||
* cycling through a pagevec of extra references.
|
||||
*
|
||||
* pagevec_lookup_entries() returns the number of entries which were
|
||||
* found.
|
||||
*/
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/swap_slots.h>
|
||||
#include <linux/huge_mm.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include "internal.h"
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
@ -88,11 +89,9 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
|
||||
pgoff_t idx = swp_offset(entry);
|
||||
struct page *page;
|
||||
|
||||
page = find_get_entry(address_space, idx);
|
||||
page = xa_load(&address_space->i_pages, idx);
|
||||
if (xa_is_value(page))
|
||||
return page;
|
||||
if (page)
|
||||
put_page(page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -392,6 +391,39 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
|
||||
return page;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_get_incore_page - Find and get a page from the page or swap caches.
|
||||
* @mapping: The address_space to search.
|
||||
* @index: The page cache index.
|
||||
*
|
||||
* This differs from find_get_page() in that it will also look for the
|
||||
* page in the swap cache.
|
||||
*
|
||||
* Return: The found page or %NULL.
|
||||
*/
|
||||
struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
struct swap_info_struct *si;
|
||||
struct page *page = find_get_entry(mapping, index);
|
||||
|
||||
if (!page)
|
||||
return page;
|
||||
if (!xa_is_value(page))
|
||||
return find_subpage(page, index);
|
||||
if (!shmem_mapping(mapping))
|
||||
return NULL;
|
||||
|
||||
swp = radix_to_swp_entry(page);
|
||||
/* Prevent swapoff from happening to us */
|
||||
si = get_swap_device(swp);
|
||||
if (!si)
|
||||
return NULL;
|
||||
page = find_get_page(swap_address_space(swp), swp_offset(swp));
|
||||
put_swap_device(si);
|
||||
return page;
|
||||
}
|
||||
|
||||
struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
||||
struct vm_area_struct *vma, unsigned long addr,
|
||||
bool *new_page_allocated)
|
||||
|
||||
48
mm/vmalloc.c
48
mm/vmalloc.c
@ -437,9 +437,9 @@ unsigned long vmalloc_nr_pages(void)
|
||||
return atomic_long_read(&nr_vmalloc_pages);
|
||||
}
|
||||
|
||||
static struct vmap_area *__find_vmap_area(unsigned long addr)
|
||||
static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
|
||||
{
|
||||
struct rb_node *n = vmap_area_root.rb_node;
|
||||
struct rb_node *n = root->rb_node;
|
||||
|
||||
while (n) {
|
||||
struct vmap_area *va;
|
||||
@ -1403,7 +1403,7 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
|
||||
struct vmap_area *va;
|
||||
|
||||
spin_lock(&vmap_area_lock);
|
||||
va = __find_vmap_area(addr);
|
||||
va = __find_vmap_area(addr, &vmap_area_root);
|
||||
spin_unlock(&vmap_area_lock);
|
||||
|
||||
return va;
|
||||
@ -2135,7 +2135,7 @@ struct vm_struct *remove_vm_area(const void *addr)
|
||||
might_sleep();
|
||||
|
||||
spin_lock(&vmap_area_lock);
|
||||
va = __find_vmap_area((unsigned long)addr);
|
||||
va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
|
||||
if (va && va->vm) {
|
||||
struct vm_struct *vm = va->vm;
|
||||
|
||||
@ -2397,10 +2397,16 @@ struct vmap_pfn_data {
|
||||
static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
|
||||
{
|
||||
struct vmap_pfn_data *data = private;
|
||||
unsigned long pfn = data->pfns[data->idx];
|
||||
pte_t ptent;
|
||||
|
||||
if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
|
||||
if (WARN_ON_ONCE(pfn_valid(pfn)))
|
||||
return -EINVAL;
|
||||
*pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
|
||||
|
||||
ptent = pte_mkspecial(pfn_pte(pfn, data->prot));
|
||||
set_pte_at(&init_mm, addr, pte, ptent);
|
||||
|
||||
data->idx++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2427,6 +2433,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
|
||||
free_vm_area(area);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
flush_cache_vmap((unsigned long)area->addr,
|
||||
(unsigned long)area->addr + count * PAGE_SIZE);
|
||||
|
||||
return area->addr;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vmap_pfn);
|
||||
@ -3533,14 +3543,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
|
||||
#ifdef CONFIG_PRINTK
|
||||
bool vmalloc_dump_obj(void *object)
|
||||
{
|
||||
struct vm_struct *vm;
|
||||
void *objp = (void *)PAGE_ALIGN((unsigned long)object);
|
||||
const void *caller;
|
||||
struct vm_struct *vm;
|
||||
struct vmap_area *va;
|
||||
unsigned long addr;
|
||||
unsigned int nr_pages;
|
||||
|
||||
vm = find_vm_area(objp);
|
||||
if (!vm)
|
||||
if (!spin_trylock(&vmap_area_lock))
|
||||
return false;
|
||||
va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
|
||||
if (!va) {
|
||||
spin_unlock(&vmap_area_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
vm = va->vm;
|
||||
if (!vm) {
|
||||
spin_unlock(&vmap_area_lock);
|
||||
return false;
|
||||
}
|
||||
addr = (unsigned long)vm->addr;
|
||||
caller = vm->caller;
|
||||
nr_pages = vm->nr_pages;
|
||||
spin_unlock(&vmap_area_lock);
|
||||
pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
|
||||
vm->nr_pages, (unsigned long)vm->addr, vm->caller);
|
||||
nr_pages, addr, caller);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
16
mm/zswap.c
16
mm/zswap.c
@ -43,6 +43,8 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
/*********************************
|
||||
* statistics
|
||||
**********************************/
|
||||
@ -533,9 +535,19 @@ static void shrink_worker(struct work_struct *w)
|
||||
{
|
||||
struct zswap_pool *pool = container_of(w, typeof(*pool),
|
||||
shrink_work);
|
||||
int ret, failures = 0;
|
||||
|
||||
if (zpool_shrink(pool->zpool, 1, NULL))
|
||||
zswap_reject_reclaim_fail++;
|
||||
do {
|
||||
ret = zpool_shrink(pool->zpool, 1, NULL);
|
||||
if (ret) {
|
||||
zswap_reject_reclaim_fail++;
|
||||
if (ret != -EAGAIN)
|
||||
break;
|
||||
if (++failures == MAX_RECLAIM_RETRIES)
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
} while (!zswap_can_accept());
|
||||
zswap_pool_put(pool);
|
||||
}
|
||||
|
||||
|
||||
@ -1072,9 +1072,9 @@ void hci_uuids_clear(struct hci_dev *hdev)
|
||||
|
||||
void hci_link_keys_clear(struct hci_dev *hdev)
|
||||
{
|
||||
struct link_key *key;
|
||||
struct link_key *key, *tmp;
|
||||
|
||||
list_for_each_entry(key, &hdev->link_keys, list) {
|
||||
list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) {
|
||||
list_del_rcu(&key->list);
|
||||
kfree_rcu(key, rcu);
|
||||
}
|
||||
@ -1082,9 +1082,9 @@ void hci_link_keys_clear(struct hci_dev *hdev)
|
||||
|
||||
void hci_smp_ltks_clear(struct hci_dev *hdev)
|
||||
{
|
||||
struct smp_ltk *k;
|
||||
struct smp_ltk *k, *tmp;
|
||||
|
||||
list_for_each_entry(k, &hdev->long_term_keys, list) {
|
||||
list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
|
||||
list_del_rcu(&k->list);
|
||||
kfree_rcu(k, rcu);
|
||||
}
|
||||
@ -1092,9 +1092,9 @@ void hci_smp_ltks_clear(struct hci_dev *hdev)
|
||||
|
||||
void hci_smp_irks_clear(struct hci_dev *hdev)
|
||||
{
|
||||
struct smp_irk *k;
|
||||
struct smp_irk *k, *tmp;
|
||||
|
||||
list_for_each_entry(k, &hdev->identity_resolving_keys, list) {
|
||||
list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
|
||||
list_del_rcu(&k->list);
|
||||
kfree_rcu(k, rcu);
|
||||
}
|
||||
@ -1102,9 +1102,9 @@ void hci_smp_irks_clear(struct hci_dev *hdev)
|
||||
|
||||
void hci_blocked_keys_clear(struct hci_dev *hdev)
|
||||
{
|
||||
struct blocked_key *b;
|
||||
struct blocked_key *b, *tmp;
|
||||
|
||||
list_for_each_entry(b, &hdev->blocked_keys, list) {
|
||||
list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) {
|
||||
list_del_rcu(&b->list);
|
||||
kfree_rcu(b, rcu);
|
||||
}
|
||||
|
||||
@ -4310,6 +4310,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
|
||||
}
|
||||
}
|
||||
|
||||
chan = l2cap_chan_hold_unless_zero(chan);
|
||||
if (!chan) {
|
||||
err = -EBADSLT;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
|
||||
l2cap_chan_lock(chan);
|
||||
@ -4339,6 +4345,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
|
||||
}
|
||||
|
||||
l2cap_chan_unlock(chan);
|
||||
l2cap_chan_put(chan);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&conn->chan_lock);
|
||||
@ -4671,7 +4678,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
|
||||
|
||||
chan = l2cap_get_chan_by_scid(conn, scid);
|
||||
if (!chan) {
|
||||
mutex_unlock(&conn->chan_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -898,13 +898,16 @@ void __cfg80211_connect_result(struct net_device *dev,
|
||||
if (!wdev->u.client.ssid_len) {
|
||||
rcu_read_lock();
|
||||
for_each_valid_link(cr, link) {
|
||||
u32 ssid_len;
|
||||
|
||||
ssid = ieee80211_bss_get_elem(cr->links[link].bss,
|
||||
WLAN_EID_SSID);
|
||||
|
||||
if (!ssid || !ssid->datalen)
|
||||
continue;
|
||||
|
||||
memcpy(wdev->u.client.ssid, ssid->data, ssid->datalen);
|
||||
ssid_len = min(ssid->datalen, IEEE80211_MAX_SSID_LEN);
|
||||
memcpy(wdev->u.client.ssid, ssid->data, ssid_len);
|
||||
wdev->u.client.ssid_len = ssid->datalen;
|
||||
break;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user