diff --git a/COPYING-6.12.0-55.25.1.el10 b/COPYING-6.12.0-55.27.1.el10 similarity index 100% rename from COPYING-6.12.0-55.25.1.el10 rename to COPYING-6.12.0-55.27.1.el10 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e2ee121064..543cd80f3c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4665,7 +4665,7 @@ '1' – force enabled 'x' – unchanged For example, - pci=config_acs=10x + pci=config_acs=10x@pci:0:0 would configure all devices that support ACS to enable P2P Request Redirect, disable Translation Blocking, and leave Source diff --git a/Makefile.rhelver b/Makefile.rhelver index 86c64de273..fdf5c423f9 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 0 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 55.25.1 +RHEL_RELEASE = 55.27.1 # # RHEL_REBASE_NUM diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 823f44f7bc..d8408aafee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -798,6 +798,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static const struct x86_cpu_desc erratum_1386_microcode[] = { AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e), AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052), + {}, }; static void fix_erratum_1386(struct cpuinfo_x86 *c) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 5498a87249..e3f1a48527 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -265,10 +265,6 @@ static int hash_accept(struct socket *sock, struct socket *newsock, goto out_free_state; err = crypto_ahash_import(&ctx2->req, state); - if (err) { - sock_orphan(sk2); - sock_put(sk2); - } out_free_state: kfree_sensitive(state); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 13012f0997..1b87b60019 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11267,6 +11267,26 @@ hwrm_phy_qcaps_exit: return rc; } +static void bnxt_hwrm_mac_qcaps(struct bnxt *bp) +{ + struct hwrm_port_mac_qcaps_output *resp; + struct hwrm_port_mac_qcaps_input *req; + int rc; + + if (bp->hwrm_spec_code < 0x10a03) + return; + + rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_QCAPS); + if (rc) + return; + + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send_silent(bp, req); + if (!rc) + bp->mac_flags = resp->flags; + hwrm_req_drop(bp, req); +} + static bool bnxt_support_dropped(u16 advertising, u16 supported) { u16 diff = advertising ^ supported; @@ -15389,6 +15409,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt) bp->dev->priv_flags |= IFF_SUPP_NOFCS; else bp->dev->priv_flags &= ~IFF_SUPP_NOFCS; + + bp->mac_flags = 0; + bnxt_hwrm_mac_qcaps(bp); + if (!fw_dflt) return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9e05704d94..3268adeb19 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2614,6 +2614,11 @@ struct bnxt { #define BNXT_PHY_FL_BANK_SEL (PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED << 8) #define BNXT_PHY_FL_SPEEDS2 (PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED << 8) + /* copied from flags in hwrm_port_mac_qcaps_output */ + u8 mac_flags; +#define BNXT_MAC_FL_NO_MAC_LPBK \ + PORT_MAC_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED + u8 num_tests; struct bnxt_test_info *test_info; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 54a7ff5054..b5ab958e69 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4894,35 +4894,44 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_close_nic(bp, true, false); bnxt_run_fw_tests(bp, test_mask, &test_results); - buf[BNXT_MACLPBK_TEST_IDX] = 1; - bnxt_hwrm_mac_loopback(bp, true); - msleep(250); rc = bnxt_half_open_nic(bp); if (rc) { - bnxt_hwrm_mac_loopback(bp, false); etest->flags |= ETH_TEST_FL_FAILED; return; } + buf[BNXT_MACLPBK_TEST_IDX] = 1; + if (bp->mac_flags & BNXT_MAC_FL_NO_MAC_LPBK) + goto skip_mac_loopback; + + bnxt_hwrm_mac_loopback(bp, true); + msleep(250); if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; else buf[BNXT_MACLPBK_TEST_IDX] = 0; bnxt_hwrm_mac_loopback(bp, false); +skip_mac_loopback: + buf[BNXT_PHYLPBK_TEST_IDX] = 1; + if (bp->phy_flags & BNXT_PHY_FL_NO_PHY_LPBK) + goto skip_phy_loopback; + bnxt_hwrm_phy_loopback(bp, true, false); msleep(1000); - if (bnxt_run_loopback(bp)) { - buf[BNXT_PHYLPBK_TEST_IDX] = 1; + if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; - } + else + buf[BNXT_PHYLPBK_TEST_IDX] = 0; +skip_phy_loopback: + buf[BNXT_EXTLPBK_TEST_IDX] = 1; if (do_ext_lpbk) { etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE; bnxt_hwrm_phy_loopback(bp, true, true); msleep(1000); - if (bnxt_run_loopback(bp)) { - buf[BNXT_EXTLPBK_TEST_IDX] = 1; + if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; - } + else + buf[BNXT_EXTLPBK_TEST_IDX] = 0; } bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 4cbba96121..09a08fa94d 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1768,6 +1768,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, struct hal_rx_desc *ldesc; int space_extra, rem_len, buf_len; u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz; + bool is_continuation; /* As the msdu is spread across multiple rx buffers, * find the offset to the start of msdu for computing @@ -1816,7 +1817,8 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, rem_len = msdu_len - buf_first_len; while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { rxcb = ATH12K_SKB_RXCB(skb); - if (rxcb->is_continuation) + is_continuation = rxcb->is_continuation; + if (is_continuation) buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; else buf_len = rem_len; @@ -1834,7 +1836,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, dev_kfree_skb_any(skb); rem_len -= buf_len; - if (!rxcb->is_continuation) + if (!is_continuation) break; } diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index a99776af56..c476e65c4d 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type) { struct rtw_coex *coex = &rtwdev->coex; struct rtw_coex_stat *coex_stat = &coex->stat; - u8 para[2] = {0}; + u8 para[6] = {}; u8 times; u16 tbtt_interval = coex_stat->wl_beacon_interval; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 225a6cd2e9..da914f6ce9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -955,8 +955,10 @@ struct pci_acs { }; static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps, - const char *p, u16 mask, u16 flags) + const char *p, const u16 acs_mask, const u16 acs_flags) { + u16 flags = acs_flags; + u16 mask = acs_mask; char *delimit; int ret = 0; @@ -964,7 +966,7 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps, return; while (*p) { - if (!mask) { + if (!acs_mask) { /* Check for ACS flags */ delimit = strstr(p, "@"); if (delimit) { @@ -972,6 +974,8 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps, u32 shift = 0; end = delimit - p - 1; + mask = 0; + flags = 0; while (end > -1) { if (*(p + end) == '0') { @@ -1028,10 +1032,14 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps, pci_dbg(dev, "ACS mask = %#06x\n", mask); pci_dbg(dev, "ACS flags = %#06x\n", flags); + pci_dbg(dev, "ACS control = %#06x\n", caps->ctrl); + pci_dbg(dev, "ACS fw_ctrl = %#06x\n", caps->fw_ctrl); - /* If mask is 0 then we copy the bit from the firmware setting. */ - caps->ctrl = (caps->ctrl & ~mask) | (caps->fw_ctrl & mask); - caps->ctrl |= flags; + /* + * For mask bits that are 0, copy them from the firmware setting + * and apply flags for all the mask bits that are 1. + */ + caps->ctrl = (caps->fw_ctrl & ~mask) | (flags & mask); pci_info(dev, "Configured ACS to %#06x\n", caps->ctrl); } diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c index 01d913b603..316c1bc05d 100644 --- a/drivers/pci/pwrctl/core.c +++ b/drivers/pci/pwrctl/core.c @@ -110,6 +110,8 @@ EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); */ void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) { + cancel_work_sync(&pwrctl->work); + /* * We don't have to delete the link here. Typically, this function * is only called when the power control device is being detached. If diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 23082bc0ca..a6e653a4f5 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -2105,8 +2105,7 @@ pci_root_bus_distribute_available_resources(struct pci_bus *bus, * in case of root bus. */ if (bridge && pci_bridge_resources_not_assigned(dev)) - pci_bridge_distribute_available_resources(bridge, - add_list); + pci_bridge_distribute_available_resources(dev, add_list); else pci_root_bus_distribute_available_resources(b, add_list); } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 880d7cf8b7..3910bbf16b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -972,13 +972,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server) msleep(125); if (cifs_rdma_enabled(server)) smbd_destroy(server); - if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; - - /* Release netns reference for the socket. */ - put_net(cifs_net_ns(server)); } if (!list_empty(&server->pending_mid_q)) { @@ -1026,7 +1022,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server) */ } - /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); kfree(server->hostname); @@ -1672,8 +1667,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; - - /* Grab netns reference for this server. */ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); @@ -1802,7 +1795,6 @@ smbd_connected: out_err_crypto_release: cifs_crypto_secmech_release(tcp_ses); - /* Release netns reference for this server. */ put_net(cifs_net_ns(tcp_ses)); out_err: @@ -1811,10 +1803,8 @@ out_err: cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); kfree(tcp_ses->leaf_fullpath); - if (tcp_ses->ssocket) { + if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); - put_net(cifs_net_ns(tcp_ses)); - } kfree(tcp_ses); } return ERR_PTR(rc); @@ -3101,20 +3091,20 @@ generic_ip_connect(struct TCP_Server_Info *server) socket = server->ssocket; } else { struct net *net = cifs_net_ns(server); + struct sock *sk; - rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); + rc = __sock_create(net, sfamily, SOCK_STREAM, + IPPROTO_TCP, &server->ssocket, 1); if (rc < 0) { cifs_server_dbg(VFS, "Error %d creating socket\n", rc); return rc; } - /* - * Grab netns reference for the socket. - * - * It'll be released here, on error, or in clean_demultiplex_info() upon server - * teardown. - */ - get_net(net); + sk = server->ssocket->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); @@ -3128,10 +3118,8 @@ generic_ip_connect(struct TCP_Server_Info *server) } rc = bind_socket(server); - if (rc < 0) { - put_net(cifs_net_ns(server)); + if (rc < 0) return rc; - } /* * Eventually check for other socket options to change from @@ -3168,7 +3156,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); - put_net(cifs_net_ns(server)); sock_release(socket); server->ssocket = NULL; return rc; @@ -3177,9 +3164,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); - if (rc < 0) - put_net(cifs_net_ns(server)); - return rc; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e4697539b6..131459f75e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -272,6 +272,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -465,6 +467,12 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fefddd7291..7103e9dccd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -87,7 +87,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static void hugetlb_free_folio(struct folio *folio) @@ -1218,7 +1218,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma) /* * Reset and decrement one ref on hugepage private reservation. * Called with mm->mmap_lock writer semaphore held. - * This function should be only used by move_vma() and operate on + * This function should be only used by mremap and operate on * same sized vma. It should never come here with last ref on the * reservation. */ @@ -5093,26 +5093,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + return 0; +} +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) +{ /* * PMD sharing is only possible for PUD_SIZE-aligned address ranges * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + * This function is called in the middle of a VMA split operation, with + * MM, VMA and rmap all write-locked to prevent concurrent page table + * walks (except hardware and gup_fast()). */ + vma_assert_write_locked(vma); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + if (addr & ~PUD_MASK) { - /* - * hugetlb_vm_op_split is called right before we attempt to - * split the VMA. We will need to unshare PMDs in the old and - * new VMAs, so let's unshare before we split. - */ unsigned long floor = addr & PUD_MASK; unsigned long ceil = floor + PUD_SIZE; - if (floor >= vma->vm_start && ceil <= vma->vm_end) - hugetlb_unshare_pmds(vma, floor, ceil); + if (floor >= vma->vm_start && ceil <= vma->vm_end) { + /* + * Locking: + * Use take_locks=false here. + * The file rmap lock is already held. + * The hugetlb VMA lock can't be taken when we already + * hold the file rmap lock, and we don't need it because + * its purpose is to synchronize against concurrent page + * table walks, which are not possible thanks to the + * locks held by our caller. + */ + hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); + } } - - return 0; } static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) @@ -7265,6 +7279,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, return 0; pud_clear(pud); + /* + * Once our caller drops the rmap lock, some other process might be + * using this page table as a normal, non-hugetlb page table. + * Wait for pending gup_fast() in other threads to finish before letting + * that happen. + */ + tlb_remove_table_sync_one(); put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); return 1; @@ -7497,9 +7518,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re } } +/* + * If @take_locks is false, the caller must ensure that no concurrent page table + * access can happen (except for gup_fast() and hardware page walks). + * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like + * concurrent page fault handling) and the file rmap lock. + */ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, - unsigned long end) + unsigned long end, + bool take_locks) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -7523,8 +7551,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, start, end); mmu_notifier_invalidate_range_start(&range); - hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (take_locks) { + hugetlb_vma_lock_write(vma); + i_mmap_lock_write(vma->vm_file->f_mapping); + } else { + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + } for (address = start; address < end; address += PUD_SIZE) { ptep = hugetlb_walk(vma, address, sz); if (!ptep) @@ -7534,8 +7566,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, spin_unlock(ptl); } flush_hugetlb_tlb_range(vma, start, end); - i_mmap_unlock_write(vma->vm_file->f_mapping); - hugetlb_vma_unlock_write(vma); + if (take_locks) { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } /* * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see * Documentation/mm/mmu_notifier.rst. @@ -7550,7 +7584,22 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), - ALIGN_DOWN(vma->vm_end, PUD_SIZE)); + ALIGN_DOWN(vma->vm_end, PUD_SIZE), + /* take_locks = */ true); +} + +/* + * For hugetlb, mremap() is an odd edge case - while the VMA copying is + * performed, we permit both the old and new VMAs to reference the same + * reservation. + * + * We fix this up after the operation succeeds, or if a newly allocated VMA + * is closed as a result of a failure to allocate memory. + */ +void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + if (is_vm_hugetlb_page(vma)) + clear_vma_resv_huge_pages(vma); } #ifdef CONFIG_CMA diff --git a/mm/mremap.c b/mm/mremap.c index dee98ff2bb..c2fce8f866 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -743,9 +743,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, mremap_userfaultfd_prep(new_vma, uf); } - if (is_vm_hugetlb_page(vma)) { - clear_vma_resv_huge_pages(vma); - } + fixup_hugetlb_reservations(vma); /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { diff --git a/mm/vma.c b/mm/vma.c index 7621384d64..108f383ffe 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -416,7 +416,14 @@ static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, init_vma_prep(&vp, vma); vp.insert = new; vma_prepare(&vp); + + /* + * Get rid of huge pages and shared page tables straddling the split + * boundary. + */ vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); + if (is_vm_hugetlb_page(vma)) + hugetlb_split(vma, addr); if (new_below) { vma->vm_start = addr; @@ -1681,6 +1688,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_vma_link: + fixup_hugetlb_reservations(new_vma); vma_close(new_vma); if (new_vma->vm_file) diff --git a/mm/vma_internal.h b/mm/vma_internal.h index b930ab12a5..1dd119f266 100644 --- a/mm/vma_internal.h +++ b/mm/vma_internal.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index c5b9da0345..5f39c5dc60 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -735,6 +735,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, (void)adjust_next; } +static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} + static inline void vma_iter_free(struct vma_iterator *vmi) { mas_destroy(&vmi->mas); @@ -920,4 +922,9 @@ static inline bool signal_pending(void *) return false; } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + (void)vma; +} + #endif /* __MM_VMA_INTERNAL_H */