Import of kernel-6.12.0-55.27.1.el10_0

This commit is contained in:
eabdullin 2025-09-05 16:58:25 +00:00
parent 051e45fafa
commit f39292704a
20 changed files with 173 additions and 72 deletions

View File

@ -4665,7 +4665,7 @@
'1' force enabled '1' force enabled
'x' unchanged 'x' unchanged
For example, For example,
pci=config_acs=10x pci=config_acs=10x@pci:0:0
would configure all devices that support would configure all devices that support
ACS to enable P2P Request Redirect, disable ACS to enable P2P Request Redirect, disable
Translation Blocking, and leave Source Translation Blocking, and leave Source

View File

@ -12,7 +12,7 @@ RHEL_MINOR = 0
# #
# Use this spot to avoid future merge conflicts. # Use this spot to avoid future merge conflicts.
# Do not trim this comment. # Do not trim this comment.
RHEL_RELEASE = 55.25.1 RHEL_RELEASE = 55.27.1
# #
# RHEL_REBASE_NUM # RHEL_REBASE_NUM

View File

@ -798,6 +798,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static const struct x86_cpu_desc erratum_1386_microcode[] = { static const struct x86_cpu_desc erratum_1386_microcode[] = {
AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e), AMD_CPU_DESC(0x17, 0x1, 0x2, 0x0800126e),
AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052), AMD_CPU_DESC(0x17, 0x31, 0x0, 0x08301052),
{},
}; };
static void fix_erratum_1386(struct cpuinfo_x86 *c) static void fix_erratum_1386(struct cpuinfo_x86 *c)

View File

@ -265,10 +265,6 @@ static int hash_accept(struct socket *sock, struct socket *newsock,
goto out_free_state; goto out_free_state;
err = crypto_ahash_import(&ctx2->req, state); err = crypto_ahash_import(&ctx2->req, state);
if (err) {
sock_orphan(sk2);
sock_put(sk2);
}
out_free_state: out_free_state:
kfree_sensitive(state); kfree_sensitive(state);

View File

@ -11267,6 +11267,26 @@ hwrm_phy_qcaps_exit:
return rc; return rc;
} }
static void bnxt_hwrm_mac_qcaps(struct bnxt *bp)
{
struct hwrm_port_mac_qcaps_output *resp;
struct hwrm_port_mac_qcaps_input *req;
int rc;
if (bp->hwrm_spec_code < 0x10a03)
return;
rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_QCAPS);
if (rc)
return;
resp = hwrm_req_hold(bp, req);
rc = hwrm_req_send_silent(bp, req);
if (!rc)
bp->mac_flags = resp->flags;
hwrm_req_drop(bp, req);
}
static bool bnxt_support_dropped(u16 advertising, u16 supported) static bool bnxt_support_dropped(u16 advertising, u16 supported)
{ {
u16 diff = advertising ^ supported; u16 diff = advertising ^ supported;
@ -15389,6 +15409,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
bp->dev->priv_flags |= IFF_SUPP_NOFCS; bp->dev->priv_flags |= IFF_SUPP_NOFCS;
else else
bp->dev->priv_flags &= ~IFF_SUPP_NOFCS; bp->dev->priv_flags &= ~IFF_SUPP_NOFCS;
bp->mac_flags = 0;
bnxt_hwrm_mac_qcaps(bp);
if (!fw_dflt) if (!fw_dflt)
return 0; return 0;

View File

@ -2614,6 +2614,11 @@ struct bnxt {
#define BNXT_PHY_FL_BANK_SEL (PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED << 8) #define BNXT_PHY_FL_BANK_SEL (PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED << 8)
#define BNXT_PHY_FL_SPEEDS2 (PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED << 8) #define BNXT_PHY_FL_SPEEDS2 (PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED << 8)
/* copied from flags in hwrm_port_mac_qcaps_output */
u8 mac_flags;
#define BNXT_MAC_FL_NO_MAC_LPBK \
PORT_MAC_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
u8 num_tests; u8 num_tests;
struct bnxt_test_info *test_info; struct bnxt_test_info *test_info;

View File

@ -4894,35 +4894,44 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
bnxt_close_nic(bp, true, false); bnxt_close_nic(bp, true, false);
bnxt_run_fw_tests(bp, test_mask, &test_results); bnxt_run_fw_tests(bp, test_mask, &test_results);
buf[BNXT_MACLPBK_TEST_IDX] = 1;
bnxt_hwrm_mac_loopback(bp, true);
msleep(250);
rc = bnxt_half_open_nic(bp); rc = bnxt_half_open_nic(bp);
if (rc) { if (rc) {
bnxt_hwrm_mac_loopback(bp, false);
etest->flags |= ETH_TEST_FL_FAILED; etest->flags |= ETH_TEST_FL_FAILED;
return; return;
} }
buf[BNXT_MACLPBK_TEST_IDX] = 1;
if (bp->mac_flags & BNXT_MAC_FL_NO_MAC_LPBK)
goto skip_mac_loopback;
bnxt_hwrm_mac_loopback(bp, true);
msleep(250);
if (bnxt_run_loopback(bp)) if (bnxt_run_loopback(bp))
etest->flags |= ETH_TEST_FL_FAILED; etest->flags |= ETH_TEST_FL_FAILED;
else else
buf[BNXT_MACLPBK_TEST_IDX] = 0; buf[BNXT_MACLPBK_TEST_IDX] = 0;
bnxt_hwrm_mac_loopback(bp, false); bnxt_hwrm_mac_loopback(bp, false);
skip_mac_loopback:
buf[BNXT_PHYLPBK_TEST_IDX] = 1;
if (bp->phy_flags & BNXT_PHY_FL_NO_PHY_LPBK)
goto skip_phy_loopback;
bnxt_hwrm_phy_loopback(bp, true, false); bnxt_hwrm_phy_loopback(bp, true, false);
msleep(1000); msleep(1000);
if (bnxt_run_loopback(bp)) { if (bnxt_run_loopback(bp))
buf[BNXT_PHYLPBK_TEST_IDX] = 1;
etest->flags |= ETH_TEST_FL_FAILED; etest->flags |= ETH_TEST_FL_FAILED;
} else
buf[BNXT_PHYLPBK_TEST_IDX] = 0;
skip_phy_loopback:
buf[BNXT_EXTLPBK_TEST_IDX] = 1;
if (do_ext_lpbk) { if (do_ext_lpbk) {
etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE; etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
bnxt_hwrm_phy_loopback(bp, true, true); bnxt_hwrm_phy_loopback(bp, true, true);
msleep(1000); msleep(1000);
if (bnxt_run_loopback(bp)) { if (bnxt_run_loopback(bp))
buf[BNXT_EXTLPBK_TEST_IDX] = 1;
etest->flags |= ETH_TEST_FL_FAILED; etest->flags |= ETH_TEST_FL_FAILED;
} else
buf[BNXT_EXTLPBK_TEST_IDX] = 0;
} }
bnxt_hwrm_phy_loopback(bp, false, false); bnxt_hwrm_phy_loopback(bp, false, false);
bnxt_half_close_nic(bp); bnxt_half_close_nic(bp);

View File

@ -1768,6 +1768,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar,
struct hal_rx_desc *ldesc; struct hal_rx_desc *ldesc;
int space_extra, rem_len, buf_len; int space_extra, rem_len, buf_len;
u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz; u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz;
bool is_continuation;
/* As the msdu is spread across multiple rx buffers, /* As the msdu is spread across multiple rx buffers,
* find the offset to the start of msdu for computing * find the offset to the start of msdu for computing
@ -1816,7 +1817,8 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar,
rem_len = msdu_len - buf_first_len; rem_len = msdu_len - buf_first_len;
while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) {
rxcb = ATH12K_SKB_RXCB(skb); rxcb = ATH12K_SKB_RXCB(skb);
if (rxcb->is_continuation) is_continuation = rxcb->is_continuation;
if (is_continuation)
buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz;
else else
buf_len = rem_len; buf_len = rem_len;
@ -1834,7 +1836,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar,
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
rem_len -= buf_len; rem_len -= buf_len;
if (!rxcb->is_continuation) if (!is_continuation)
break; break;
} }

View File

@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type)
{ {
struct rtw_coex *coex = &rtwdev->coex; struct rtw_coex *coex = &rtwdev->coex;
struct rtw_coex_stat *coex_stat = &coex->stat; struct rtw_coex_stat *coex_stat = &coex->stat;
u8 para[2] = {0}; u8 para[6] = {};
u8 times; u8 times;
u16 tbtt_interval = coex_stat->wl_beacon_interval; u16 tbtt_interval = coex_stat->wl_beacon_interval;

View File

@ -955,8 +955,10 @@ struct pci_acs {
}; };
static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps, static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps,
const char *p, u16 mask, u16 flags) const char *p, const u16 acs_mask, const u16 acs_flags)
{ {
u16 flags = acs_flags;
u16 mask = acs_mask;
char *delimit; char *delimit;
int ret = 0; int ret = 0;
@ -964,7 +966,7 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps,
return; return;
while (*p) { while (*p) {
if (!mask) { if (!acs_mask) {
/* Check for ACS flags */ /* Check for ACS flags */
delimit = strstr(p, "@"); delimit = strstr(p, "@");
if (delimit) { if (delimit) {
@ -972,6 +974,8 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps,
u32 shift = 0; u32 shift = 0;
end = delimit - p - 1; end = delimit - p - 1;
mask = 0;
flags = 0;
while (end > -1) { while (end > -1) {
if (*(p + end) == '0') { if (*(p + end) == '0') {
@ -1028,10 +1032,14 @@ static void __pci_config_acs(struct pci_dev *dev, struct pci_acs *caps,
pci_dbg(dev, "ACS mask = %#06x\n", mask); pci_dbg(dev, "ACS mask = %#06x\n", mask);
pci_dbg(dev, "ACS flags = %#06x\n", flags); pci_dbg(dev, "ACS flags = %#06x\n", flags);
pci_dbg(dev, "ACS control = %#06x\n", caps->ctrl);
pci_dbg(dev, "ACS fw_ctrl = %#06x\n", caps->fw_ctrl);
/* If mask is 0 then we copy the bit from the firmware setting. */ /*
caps->ctrl = (caps->ctrl & ~mask) | (caps->fw_ctrl & mask); * For mask bits that are 0, copy them from the firmware setting
caps->ctrl |= flags; * and apply flags for all the mask bits that are 1.
*/
caps->ctrl = (caps->fw_ctrl & ~mask) | (flags & mask);
pci_info(dev, "Configured ACS to %#06x\n", caps->ctrl); pci_info(dev, "Configured ACS to %#06x\n", caps->ctrl);
} }

View File

@ -110,6 +110,8 @@ EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready);
*/ */
void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl)
{ {
cancel_work_sync(&pwrctl->work);
/* /*
* We don't have to delete the link here. Typically, this function * We don't have to delete the link here. Typically, this function
* is only called when the power control device is being detached. If * is only called when the power control device is being detached. If

View File

@ -2105,8 +2105,7 @@ pci_root_bus_distribute_available_resources(struct pci_bus *bus,
* in case of root bus. * in case of root bus.
*/ */
if (bridge && pci_bridge_resources_not_assigned(dev)) if (bridge && pci_bridge_resources_not_assigned(dev))
pci_bridge_distribute_available_resources(bridge, pci_bridge_distribute_available_resources(dev, add_list);
add_list);
else else
pci_root_bus_distribute_available_resources(b, add_list); pci_root_bus_distribute_available_resources(b, add_list);
} }

View File

@ -972,13 +972,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
msleep(125); msleep(125);
if (cifs_rdma_enabled(server)) if (cifs_rdma_enabled(server))
smbd_destroy(server); smbd_destroy(server);
if (server->ssocket) { if (server->ssocket) {
sock_release(server->ssocket); sock_release(server->ssocket);
server->ssocket = NULL; server->ssocket = NULL;
/* Release netns reference for the socket. */
put_net(cifs_net_ns(server));
} }
if (!list_empty(&server->pending_mid_q)) { if (!list_empty(&server->pending_mid_q)) {
@ -1026,7 +1022,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
*/ */
} }
/* Release netns reference for this server. */
put_net(cifs_net_ns(server)); put_net(cifs_net_ns(server));
kfree(server->leaf_fullpath); kfree(server->leaf_fullpath);
kfree(server->hostname); kfree(server->hostname);
@ -1672,8 +1667,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
tcp_ses->ops = ctx->ops; tcp_ses->ops = ctx->ops;
tcp_ses->vals = ctx->vals; tcp_ses->vals = ctx->vals;
/* Grab netns reference for this server. */
cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
@ -1802,7 +1795,6 @@ smbd_connected:
out_err_crypto_release: out_err_crypto_release:
cifs_crypto_secmech_release(tcp_ses); cifs_crypto_secmech_release(tcp_ses);
/* Release netns reference for this server. */
put_net(cifs_net_ns(tcp_ses)); put_net(cifs_net_ns(tcp_ses));
out_err: out_err:
@ -1811,10 +1803,8 @@ out_err:
cifs_put_tcp_session(tcp_ses->primary_server, false); cifs_put_tcp_session(tcp_ses->primary_server, false);
kfree(tcp_ses->hostname); kfree(tcp_ses->hostname);
kfree(tcp_ses->leaf_fullpath); kfree(tcp_ses->leaf_fullpath);
if (tcp_ses->ssocket) { if (tcp_ses->ssocket)
sock_release(tcp_ses->ssocket); sock_release(tcp_ses->ssocket);
put_net(cifs_net_ns(tcp_ses));
}
kfree(tcp_ses); kfree(tcp_ses);
} }
return ERR_PTR(rc); return ERR_PTR(rc);
@ -3101,20 +3091,20 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket = server->ssocket; socket = server->ssocket;
} else { } else {
struct net *net = cifs_net_ns(server); struct net *net = cifs_net_ns(server);
struct sock *sk;
rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); rc = __sock_create(net, sfamily, SOCK_STREAM,
IPPROTO_TCP, &server->ssocket, 1);
if (rc < 0) { if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc); cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc; return rc;
} }
/* sk = server->ssocket->sk;
* Grab netns reference for the socket. __netns_tracker_free(net, &sk->ns_tracker, false);
* sk->sk_net_refcnt = 1;
* It'll be released here, on error, or in clean_demultiplex_info() upon server get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
* teardown. sock_inuse_add(net, 1);
*/
get_net(net);
/* BB other socket options to set KEEPALIVE, NODELAY? */ /* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n"); cifs_dbg(FYI, "Socket created\n");
@ -3128,10 +3118,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
} }
rc = bind_socket(server); rc = bind_socket(server);
if (rc < 0) { if (rc < 0)
put_net(cifs_net_ns(server));
return rc; return rc;
}
/* /*
* Eventually check for other socket options to change from * Eventually check for other socket options to change from
@ -3168,7 +3156,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (rc < 0) { if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc); cifs_dbg(FYI, "Error %d connecting to server\n", rc);
trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc);
put_net(cifs_net_ns(server));
sock_release(socket); sock_release(socket);
server->ssocket = NULL; server->ssocket = NULL;
return rc; return rc;
@ -3177,9 +3164,6 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (sport == htons(RFC1001_PORT)) if (sport == htons(RFC1001_PORT))
rc = ip_rfc1001_connect(server); rc = ip_rfc1001_connect(server);
if (rc < 0)
put_net(cifs_net_ns(server));
return rc; return rc;
} }

View File

@ -272,6 +272,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */ #else /* !CONFIG_HUGETLB_PAGE */
@ -465,6 +467,12 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
}
static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
#endif /* !CONFIG_HUGETLB_PAGE */ #endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write #ifndef pgd_write

View File

@ -87,7 +87,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma, static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end); unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio) static void hugetlb_free_folio(struct folio *folio)
@ -1218,7 +1218,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
/* /*
* Reset and decrement one ref on hugepage private reservation. * Reset and decrement one ref on hugepage private reservation.
* Called with mm->mmap_lock writer semaphore held. * Called with mm->mmap_lock writer semaphore held.
* This function should be only used by move_vma() and operate on * This function should be only used by mremap and operate on
* same sized vma. It should never come here with last ref on the * same sized vma. It should never come here with last ref on the
* reservation. * reservation.
*/ */
@ -5093,26 +5093,40 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{ {
if (addr & ~(huge_page_mask(hstate_vma(vma)))) if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL; return -EINVAL;
return 0;
}
void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
{
/* /*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges * PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now. * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
* This function is called in the middle of a VMA split operation, with
* MM, VMA and rmap all write-locked to prevent concurrent page table
* walks (except hardware and gup_fast()).
*/ */
vma_assert_write_locked(vma);
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
if (addr & ~PUD_MASK) { if (addr & ~PUD_MASK) {
/*
* hugetlb_vm_op_split is called right before we attempt to
* split the VMA. We will need to unshare PMDs in the old and
* new VMAs, so let's unshare before we split.
*/
unsigned long floor = addr & PUD_MASK; unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE; unsigned long ceil = floor + PUD_SIZE;
if (floor >= vma->vm_start && ceil <= vma->vm_end) if (floor >= vma->vm_start && ceil <= vma->vm_end) {
hugetlb_unshare_pmds(vma, floor, ceil); /*
* Locking:
* Use take_locks=false here.
* The file rmap lock is already held.
* The hugetlb VMA lock can't be taken when we already
* hold the file rmap lock, and we don't need it because
* its purpose is to synchronize against concurrent page
* table walks, which are not possible thanks to the
* locks held by our caller.
*/
hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
}
} }
return 0;
} }
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@ -7265,6 +7279,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0; return 0;
pud_clear(pud); pud_clear(pud);
/*
* Once our caller drops the rmap lock, some other process might be
* using this page table as a normal, non-hugetlb page table.
* Wait for pending gup_fast() in other threads to finish before letting
* that happen.
*/
tlb_remove_table_sync_one();
put_page(virt_to_page(ptep)); put_page(virt_to_page(ptep));
mm_dec_nr_pmds(mm); mm_dec_nr_pmds(mm);
return 1; return 1;
@ -7497,9 +7518,16 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
} }
} }
/*
* If @take_locks is false, the caller must ensure that no concurrent page table
* access can happen (except for gup_fast() and hardware page walks).
* If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
* concurrent page fault handling) and the file rmap lock.
*/
static void hugetlb_unshare_pmds(struct vm_area_struct *vma, static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long start,
unsigned long end) unsigned long end,
bool take_locks)
{ {
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
@ -7523,8 +7551,12 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end); start, end);
mmu_notifier_invalidate_range_start(&range); mmu_notifier_invalidate_range_start(&range);
if (take_locks) {
hugetlb_vma_lock_write(vma); hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping); i_mmap_lock_write(vma->vm_file->f_mapping);
} else {
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
}
for (address = start; address < end; address += PUD_SIZE) { for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz); ptep = hugetlb_walk(vma, address, sz);
if (!ptep) if (!ptep)
@ -7534,8 +7566,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
spin_unlock(ptl); spin_unlock(ptl);
} }
flush_hugetlb_tlb_range(vma, start, end); flush_hugetlb_tlb_range(vma, start, end);
if (take_locks) {
i_mmap_unlock_write(vma->vm_file->f_mapping); i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma); hugetlb_vma_unlock_write(vma);
}
/* /*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst. * Documentation/mm/mmu_notifier.rst.
@ -7550,7 +7584,22 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{ {
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
ALIGN_DOWN(vma->vm_end, PUD_SIZE)); ALIGN_DOWN(vma->vm_end, PUD_SIZE),
/* take_locks = */ true);
}
/*
* For hugetlb, mremap() is an odd edge case - while the VMA copying is
* performed, we permit both the old and new VMAs to reference the same
* reservation.
*
* We fix this up after the operation succeeds, or if a newly allocated VMA
* is closed as a result of a failure to allocate memory.
*/
void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
if (is_vm_hugetlb_page(vma))
clear_vma_resv_huge_pages(vma);
} }
#ifdef CONFIG_CMA #ifdef CONFIG_CMA

View File

@ -743,9 +743,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
mremap_userfaultfd_prep(new_vma, uf); mremap_userfaultfd_prep(new_vma, uf);
} }
if (is_vm_hugetlb_page(vma)) { fixup_hugetlb_reservations(vma);
clear_vma_resv_huge_pages(vma);
}
/* Conceal VM_ACCOUNT so old reservation is not undone */ /* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) {

View File

@ -416,7 +416,14 @@ static int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
init_vma_prep(&vp, vma); init_vma_prep(&vp, vma);
vp.insert = new; vp.insert = new;
vma_prepare(&vp); vma_prepare(&vp);
/*
* Get rid of huge pages and shared page tables straddling the split
* boundary.
*/
vma_adjust_trans_huge(vma, vma->vm_start, addr, 0); vma_adjust_trans_huge(vma, vma->vm_start, addr, 0);
if (is_vm_hugetlb_page(vma))
hugetlb_split(vma, addr);
if (new_below) { if (new_below) {
vma->vm_start = addr; vma->vm_start = addr;
@ -1681,6 +1688,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return new_vma; return new_vma;
out_vma_link: out_vma_link:
fixup_hugetlb_reservations(new_vma);
vma_close(new_vma); vma_close(new_vma);
if (new_vma->vm_file) if (new_vma->vm_file)

View File

@ -17,6 +17,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/huge_mm.h> #include <linux/huge_mm.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_inline.h> #include <linux/hugetlb_inline.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/khugepaged.h> #include <linux/khugepaged.h>

View File

@ -735,6 +735,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
(void)adjust_next; (void)adjust_next;
} }
static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
static inline void vma_iter_free(struct vma_iterator *vmi) static inline void vma_iter_free(struct vma_iterator *vmi)
{ {
mas_destroy(&vmi->mas); mas_destroy(&vmi->mas);
@ -920,4 +922,9 @@ static inline bool signal_pending(void *)
return false; return false;
} }
static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
{
(void)vma;
}
#endif /* __MM_VMA_INTERNAL_H */ #endif /* __MM_VMA_INTERNAL_H */