Import of kernel-4.18.0-553.72.1.el8_10

This commit is contained in:
eabdullin 2025-09-05 13:45:48 +00:00
parent d437976681
commit e45b724fb0
23 changed files with 428 additions and 268 deletions

View File

@ -12,7 +12,7 @@ RHEL_MINOR = 10
#
# Use this spot to avoid future merge conflicts.
# Do not trim this comment.
RHEL_RELEASE = 553.71.1
RHEL_RELEASE = 553.72.1
#
# ZSTREAM

View File

@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/pci.h>
#include <linux/ctype.h>
#include <asm/efi.h>
#include <asm/e820/types.h>
@ -863,6 +864,26 @@ static void error(char *str)
efi_printk(sys_table, "\n");
}
static const char *cmdline_memmap_override;
static efi_status_t parse_options(const char *cmdline)
{
static const char opts[][14] = {
"mem=", "memmap=", "efi_fake_mem=", "hugepages="
};
for (int i = 0; i < ARRAY_SIZE(opts); i++) {
const char *p = strstr(cmdline, opts[i]);
if (p == cmdline || (p > cmdline && isspace(p[-1]))) {
cmdline_memmap_override = opts[i];
break;
}
}
return efi_parse_options(cmdline);
}
static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
{
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
@ -893,6 +914,10 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
if (sys_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION &&
!memcmp(efistub_fw_vendor(), ami, sizeof(ami))) {
seed[0] = 0;
} else if (cmdline_memmap_override) {
efi_printk(sys_table,
"cmdline memmap override detected on the kernel command line - disabling physical KASLR\n");
seed[0] = 0;
}
boot_params_ptr->hdr.loadflags |= KASLR_FLAG;
@ -942,7 +967,6 @@ void __noreturn efi_main(struct efi_config *c,
void *handle;
efi_system_table_t *_table;
bool is64;
unsigned long cmdline_paddr;
extern char _bss[], _ebss[];
/*
@ -997,9 +1021,22 @@ void __noreturn efi_main(struct efi_config *c,
* case this is the second time we parse the cmdline. This is ok,
* parsing the cmdline multiple times does not have side-effects.
*/
cmdline_paddr = ((u64)hdr->cmd_line_ptr |
((u64)boot_params->ext_cmd_line_ptr << 32));
efi_parse_options((char *)cmdline_paddr);
#ifdef CONFIG_CMDLINE_BOOL
status = parse_options(CONFIG_CMDLINE);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to parse options\n");
goto fail;
}
#endif
if (!IS_ENABLED(CONFIG_CMDLINE_OVERRIDE)) {
unsigned long cmdline_paddr = ((u64)hdr->cmd_line_ptr |
((u64)boot_params->ext_cmd_line_ptr << 32));
status = parse_options((char *)cmdline_paddr);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to parse options\n");
goto fail;
}
}
status = efi_decompress_kernel(&kernel_entry);
if (status != EFI_SUCCESS) {

View File

@ -461,7 +461,7 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
seq_printf(m, "active_queues=%d\n",
atomic_read(&tags->active_queues));
READ_ONCE(tags->active_queues));
seq_puts(m, "\nbitmap_tags:\n");
sbitmap_queue_show(tags->bitmap_tags, m);

View File

@ -15,6 +15,21 @@
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
/*
* Recalculate wakeup batch when tag is shared by hctx.
*/
static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
unsigned int users)
{
if (!users)
return;
sbitmap_queue_recalculate_wake_batch(tags->bitmap_tags,
users);
sbitmap_queue_recalculate_wake_batch(tags->breserved_tags,
users);
}
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
@ -23,17 +38,33 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
unsigned int users;
unsigned long flags;
struct blk_mq_tags *tags = hctx->tags;
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
!test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
atomic_inc(&set->active_queues_shared_sbitmap);
if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
return true;
}
spin_lock_irqsave(&set->active_queues_lock, flags);
users = set->active_queues_shared_sbitmap + 1;
WRITE_ONCE(set->active_queues_shared_sbitmap, users);
blk_mq_update_wake_batch(hctx->tags, users);
spin_unlock_irqrestore(&set->active_queues_lock, flags);
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
atomic_inc(&hctx->tags->active_queues);
if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
return true;
}
spin_lock_irqsave(&tags->lock, flags);
users = hctx->tags->active_queues + 1;
WRITE_ONCE(tags->active_queues, users);
blk_mq_update_wake_batch(tags, users);
spin_unlock_irqrestore(&tags->lock, flags);
}
return true;
@ -58,16 +89,25 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
unsigned int users;
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
return;
atomic_dec(&set->active_queues_shared_sbitmap);
spin_lock_irq(&set->active_queues_lock);
users = set->active_queues_shared_sbitmap - 1;
WRITE_ONCE(set->active_queues_shared_sbitmap, users);
blk_mq_update_wake_batch(tags, users);
spin_unlock_irq(&set->active_queues_lock);
} else {
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
atomic_dec(&tags->active_queues);
spin_lock_irq(&tags->lock);
users = tags->active_queues - 1;
WRITE_ONCE(tags->active_queues, users);
blk_mq_update_wake_batch(tags, users);
spin_unlock_irq(&tags->lock);
}
blk_mq_tag_wakeup_all(tags, false);

View File

@ -9,7 +9,11 @@ struct blk_mq_tags {
unsigned int nr_tags;
unsigned int nr_reserved_tags;
#ifdef __GENKSYMS__
atomic_t active_queues;
#else
unsigned int active_queues;
#endif
struct sbitmap_queue RH_KABI_RENAME(bitmap_tags, __bitmap_tags);
struct sbitmap_queue RH_KABI_RENAME(breserved_tags, __breserved_tags);

View File

@ -3666,8 +3666,6 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
goto out_free_mq_map;
if (blk_mq_is_sbitmap_shared(set->flags)) {
atomic_set(&set->active_queues_shared_sbitmap, 0);
if (blk_mq_init_shared_sbitmap(set)) {
ret = -ENOMEM;
goto out_free_mq_rq_maps;
@ -3676,6 +3674,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list);
spin_lock_init(&set->active_queues_lock);
return 0;

View File

@ -343,11 +343,11 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
return true;
users = atomic_read(&set->active_queues_shared_sbitmap);
users = READ_ONCE(set->active_queues_shared_sbitmap);
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return true;
users = atomic_read(&hctx->tags->active_queues);
users = READ_ONCE(hctx->tags->active_queues);
}
if (!users)

View File

@ -367,8 +367,7 @@ EXPORT_SYMBOL(iw_cm_disconnect);
/*
* CM_ID <-- DESTROYING
*
* Clean up all resources associated with the connection and release
* the initial reference taken by iw_create_cm_id.
* Clean up all resources associated with the connection.
*/
static void destroy_cm_id(struct iw_cm_id *cm_id)
{
@ -439,19 +438,22 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
(void)iwcm_deref_id(cm_id_priv);
}
/*
* This function is only called by the application thread and cannot
* be called by the event thread. The function will wait for all
* references to be released on the cm_id and then kfree the cm_id
* object.
* Destroy cm_id. If the cm_id still has other references, wait for all
* references to be released on the cm_id and then release the initial
* reference taken by iw_create_cm_id.
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
destroy_cm_id(cm_id);
if (refcount_read(&cm_id_priv->refcount) > 1)
flush_workqueue(iwcm_wq);
iwcm_deref_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@ -1034,8 +1036,10 @@ static void cm_work_handler(struct work_struct *_work)
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
if (ret)
if (ret) {
destroy_cm_id(&cm_id_priv->id);
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
}
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
@ -1188,7 +1192,7 @@ static int __init iw_cm_init(void)
if (ret)
return ret;
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
if (!iwcm_wq)
goto err_alloc;

View File

@ -2084,7 +2084,7 @@ void t4_idma_monitor(struct adapter *adapter,
struct sge_idma_monitor_state *idma,
int hz, int ticks);
int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
unsigned int naddr, u8 *addr);
u8 start, unsigned int naddr, u8 *addr);
void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
u32 start_index, bool sleep_ok);
void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,

View File

@ -3248,7 +3248,7 @@ static int cxgb4_mgmt_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
dev_info(pi->adapter->pdev_dev,
"Setting MAC %pM on VF %d\n", mac, vf);
ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
ret = t4_set_vf_mac_acl(adap, vf + 1, pi->lport, 1, mac);
if (!ret)
ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
return ret;

View File

@ -10213,11 +10213,12 @@ out:
* t4_set_vf_mac_acl - Set MAC address for the specified VF
* @adapter: The adapter
* @vf: one of the VFs instantiated by the specified PF
* @start: The start port id associated with specified VF
* @naddr: the number of MAC addresses
* @addr: the MAC address(es) to be set to the specified VF
*/
int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
unsigned int naddr, u8 *addr)
u8 start, unsigned int naddr, u8 *addr)
{
struct fw_acl_mac_cmd cmd;
@ -10232,7 +10233,7 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
cmd.nmac = naddr;
switch (adapter->pf) {
switch (start) {
case 3:
memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
break;

View File

@ -1879,7 +1879,7 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_drvdata(pdev, shost);
shost->nr_hw_queues = aac->max_msix;
shost->can_queue = aac->vector_cap;
shost->can_queue = min((int)aac->vector_cap, shost->can_queue);
shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev);

View File

@ -5975,9 +5975,9 @@ lpfc_sli4_get_ctl_attr(struct lpfc_hba *phba)
phba->sli4_hba.flash_id = bf_get(lpfc_cntl_attr_flash_id, cntl_attr);
phba->sli4_hba.asic_rev = bf_get(lpfc_cntl_attr_asic_rev, cntl_attr);
memset(phba->BIOSVersion, 0, sizeof(phba->BIOSVersion));
strlcat(phba->BIOSVersion, (char *)cntl_attr->bios_ver_str,
memcpy(phba->BIOSVersion, cntl_attr->bios_ver_str,
sizeof(phba->BIOSVersion));
phba->BIOSVersion[sizeof(phba->BIOSVersion) - 1] = '\0';
lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
"3086 lnk_type:%d, lnk_numb:%d, bios_ver:%s, "

View File

@ -167,9 +167,9 @@ struct blk_mq_tag_set {
struct list_head tag_list;
RH_KABI_USE(1, struct blk_mq_tag_set_aux * aux)
RH_KABI_USE(2, atomic_t active_queues_shared_sbitmap)
RH_KABI_USE(2, unsigned int active_queues_shared_sbitmap)
RH_KABI_USE(3, spinlock_t active_queues_lock)
RH_KABI_RESERVE(3)
RH_KABI_RESERVE(4)
RH_KABI_RESERVE(5)
RH_KABI_RESERVE(6)

View File

@ -29,10 +29,7 @@ struct seq_file;
* struct sbitmap_word - Word in a &struct sbitmap.
*/
struct sbitmap_word {
/**
* @depth: Number of bits being used in @word/@cleared
*/
unsigned long depth;
RH_KABI_DEPRECATE(unsigned long, depth)
/**
* @word: word holding free bits
@ -47,7 +44,11 @@ struct sbitmap_word {
/**
* @swap_lock: Held while swapping word <-> cleared
*/
RH_KABI_DEPRECATE(spinlock_t, swap_lock)
#ifdef __GENKSYMS__
spinlock_t swap_lock;
#else
raw_spinlock_t swap_lock;
#endif
} ____cacheline_aligned_in_smp;
/**
@ -96,7 +97,7 @@ struct sbq_wait_state {
/**
* @wait_cnt: Number of frees remaining before we wake up.
*/
atomic_t wait_cnt;
RH_KABI_DEPRECATE(atomic_t, wait_cnt)
/**
* @wait: Wait queue.
@ -125,7 +126,16 @@ struct sbitmap_queue {
* This is per-cpu, which allows multiple users to stick to different
* cachelines until the map is exhausted.
*/
RH_KABI_DEPRECATE(unsigned int __percpu *, alloc_hint)
RH_KABI_REPLACE_SPLIT(unsigned int __percpu * alloc_hint,
/**
* @completion_cnt: Number of bits cleared passed to the
* wakeup function.
*/
atomic_t completion_cnt,
/**
* @wakeup_cnt: Number of thread wake ups issued.
*/
atomic_t wakeup_cnt)
/**
* @wake_batch: Number of bits which must be freed before we wake up any
@ -179,6 +189,14 @@ struct sbitmap_queue {
int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
gfp_t flags, int node, bool round_robin, bool alloc_hint);
/* sbitmap internal helper */
static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
{
if (index == sb->map_nr - 1)
return sb->depth - (index << sb->shift);
return 1U << sb->shift;
}
/**
* sbitmap_free() - Free memory used by a &struct sbitmap.
* @sb: Bitmap to free.
@ -276,7 +294,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb,
while (scanned < sb->depth) {
unsigned long word;
unsigned int depth = min_t(unsigned int,
sb->map[index].depth - nr,
__map_depth(sb, index) - nr,
sb->depth - scanned);
scanned += depth;
@ -442,6 +460,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
sbitmap_free(&sbq->sb);
}
/**
* sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
* @sbq: Bitmap queue to recalculate wake batch.
* @users: Number of shares.
*
* Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
* by depth. This interface is for HCTX shared tags or queue shared tags.
*/
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
unsigned int users);
/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
@ -462,6 +491,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
*/
int __sbitmap_queue_get(struct sbitmap_queue *sbq);
/**
* __sbitmap_queue_get_batch() - Try to allocate a batch of free bits
* @sbq: Bitmap queue to allocate from.
* @nr_tags: number of tags requested
* @offset: offset to add to returned bits
*
* Return: Mask of allocated tags, 0 if none are found. Each tag allocated is
* a bit in the mask returned, and the caller must add @offset to the value to
* get the absolute tag value.
*/
unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
unsigned int *offset);
/**
* __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
* sbitmap_queue, limiting the depth used from each word, with preemption

View File

@ -209,6 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
list_del(&wq_entry->entry);
}
int __wake_up_rh(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
@ -219,6 +220,7 @@ void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define wake_up_nr_rh(x, nr) __wake_up_rh(x, TASK_NORMAL, nr, NULL)
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)

View File

@ -119,29 +119,46 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
return nr_exclusive;
}
static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
unsigned long flags;
wait_queue_entry_t bookmark;
int remaining = nr_exclusive;
bookmark.flags = 0;
bookmark.private = NULL;
bookmark.func = NULL;
INIT_LIST_HEAD(&bookmark.entry);
spin_lock_irqsave(&wq_head->lock, flags);
nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
spin_unlock_irqrestore(&wq_head->lock, flags);
while (bookmark.flags & WQ_FLAG_BOOKMARK) {
do {
spin_lock_irqsave(&wq_head->lock, flags);
nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
remaining = __wake_up_common(wq_head, mode, remaining,
wake_flags, key, &bookmark);
spin_unlock_irqrestore(&wq_head->lock, flags);
}
} while (bookmark.flags & WQ_FLAG_BOOKMARK);
return nr_exclusive - remaining;
}
/**
* __wake_up_rh - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
* If this function wakes up a task, it executes a full memory barrier
* before accessing the task state. Returns the number of exclusive
* tasks that were awaken.
*/
int __wake_up_rh(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up_rh);
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue

View File

@ -91,11 +91,15 @@ static bool watchdog_check_timestamp(void)
__this_cpu_write(last_timestamp, now);
return true;
}
#else
static inline bool watchdog_check_timestamp(void)
static void watchdog_init_timestamp(void)
{
return true;
__this_cpu_write(nmi_rearmed, 0);
__this_cpu_write(last_timestamp, ktime_get_mono_fast_ns());
}
#else
static inline bool watchdog_check_timestamp(void) { return true; }
static inline void watchdog_init_timestamp(void) { }
#endif
static struct perf_event_attr wd_hw_attr = {
@ -195,6 +199,7 @@ void hardlockup_detector_perf_enable(void)
if (!atomic_fetch_inc(&watchdog_cpus))
pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
watchdog_init_timestamp();
perf_event_enable(this_cpu_read(watchdog_ev));
}

View File

@ -76,12 +76,30 @@ static inline void update_alloc_hint_after_get(struct sbitmap *sb,
/*
* See if we have deferred clears that we can batch move
*/
static inline bool sbitmap_deferred_clear(struct sbitmap_word *map)
static inline bool sbitmap_deferred_clear(struct sbitmap_word *map,
unsigned int depth, unsigned int alloc_hint, bool wrap)
{
unsigned long mask;
unsigned long mask, word_mask;
if (!READ_ONCE(map->cleared))
return false;
guard(raw_spinlock_irqsave)(&map->swap_lock);
if (!map->cleared) {
if (depth == 0)
return false;
word_mask = (~0UL) >> (BITS_PER_LONG - depth);
/*
* The current behavior is to always retry after moving
* ->cleared to word, and we change it to retry in case
* of any free bits. To avoid an infinite loop, we need
* to take wrap & alloc_hint into account, otherwise a
* soft lockup may occur.
*/
if (!wrap && alloc_hint)
word_mask &= ~((1UL << alloc_hint) - 1);
return (READ_ONCE(map->word) & word_mask) != word_mask;
}
/*
* First get a stable cleared mask, setting the old mask to 0.
@ -101,7 +119,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
bool alloc_hint)
{
unsigned int bits_per_word;
unsigned int i;
int i;
if (shift < 0)
shift = sbitmap_calculate_shift(depth);
@ -139,10 +157,9 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
*SB_ALLOC_HINT_PTR(sb) = NULL;
}
for (i = 0; i < sb->map_nr; i++) {
sb->map[i].depth = min(depth, bits_per_word);
depth -= sb->map[i].depth;
}
for (i = 0; i < sb->map_nr; i++)
raw_spin_lock_init(&sb->map[i].swap_lock);
return 0;
}
EXPORT_SYMBOL_GPL(sbitmap_init_node);
@ -153,15 +170,10 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
unsigned int i;
for (i = 0; i < sb->map_nr; i++)
sbitmap_deferred_clear(&sb->map[i]);
sbitmap_deferred_clear(&sb->map[i], 0, 0, 0);
sb->depth = depth;
sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
for (i = 0; i < sb->map_nr; i++) {
sb->map[i].depth = min(depth, bits_per_word);
depth -= sb->map[i].depth;
}
}
EXPORT_SYMBOL_GPL(sbitmap_resize);
@ -197,28 +209,58 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
return nr;
}
static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index,
unsigned int alloc_hint)
static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
unsigned int depth,
unsigned int alloc_hint,
bool wrap)
{
struct sbitmap_word *map = &sb->map[index];
int nr;
do {
nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint,
!sb->round_robin);
nr = __sbitmap_get_word(&map->word, depth,
alloc_hint, wrap);
if (nr != -1)
break;
if (!sbitmap_deferred_clear(map))
if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
break;
} while (1);
return nr;
}
static int sbitmap_find_bit(struct sbitmap *sb,
unsigned int depth,
unsigned int index,
unsigned int alloc_hint,
bool wrap)
{
unsigned int i;
int nr = -1;
for (i = 0; i < sb->map_nr; i++) {
nr = sbitmap_find_bit_in_word(&sb->map[index],
min_t(unsigned int,
__map_depth(sb, index),
depth),
alloc_hint, wrap);
if (nr != -1) {
nr += index << sb->shift;
break;
}
/* Jump to next index. */
alloc_hint = 0;
if (++index >= sb->map_nr)
index = 0;
}
return nr;
}
static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
{
unsigned int i, index;
int nr = -1;
unsigned int index;
index = SB_NR_TO_INDEX(sb, alloc_hint);
@ -232,20 +274,8 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint)
else
alloc_hint = 0;
for (i = 0; i < sb->map_nr; i++) {
nr = sbitmap_find_bit_in_index(sb, index, alloc_hint);
if (nr != -1) {
nr += index << sb->shift;
break;
}
/* Jump to next index. */
alloc_hint = 0;
if (++index >= sb->map_nr)
index = 0;
}
return nr;
return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint,
!sb->round_robin);
}
int sbitmap_get(struct sbitmap *sb)
@ -270,35 +300,12 @@ static int __sbitmap_get_shallow(struct sbitmap *sb,
unsigned int alloc_hint,
unsigned long shallow_depth)
{
unsigned int i, index;
int nr = -1;
unsigned int index;
index = SB_NR_TO_INDEX(sb, alloc_hint);
alloc_hint = SB_NR_TO_BIT(sb, alloc_hint);
for (i = 0; i < sb->map_nr; i++) {
again:
nr = __sbitmap_get_word(&sb->map[index].word,
min(sb->map[index].depth, shallow_depth),
SB_NR_TO_BIT(sb, alloc_hint), true);
if (nr != -1) {
nr += index << sb->shift;
break;
}
if (sbitmap_deferred_clear(&sb->map[index]))
goto again;
/* Jump to next index. */
index++;
alloc_hint = index << sb->shift;
if (index >= sb->map_nr) {
index = 0;
alloc_hint = 0;
}
}
return nr;
return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true);
}
int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth)
@ -337,11 +344,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
for (i = 0; i < sb->map_nr; i++) {
const struct sbitmap_word *word = &sb->map[i];
unsigned int word_depth = __map_depth(sb, i);
if (set)
weight += bitmap_weight(&word->word, word->depth);
weight += bitmap_weight(&word->word, word_depth);
else
weight += bitmap_weight(&word->cleared, word->depth);
weight += bitmap_weight(&word->cleared, word_depth);
}
return weight;
}
@ -389,7 +397,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
for (i = 0; i < sb->map_nr; i++) {
unsigned long word = READ_ONCE(sb->map[i].word);
unsigned long cleared = READ_ONCE(sb->map[i].cleared);
unsigned int word_bits = READ_ONCE(sb->map[i].depth);
unsigned int word_bits = __map_depth(sb, i);
word &= ~cleared;
@ -424,11 +432,6 @@ static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
unsigned int shallow_depth;
/*
* For each batch, we wake up one queue. We need to make sure that our
* batch size is small enough that the full depth of the bitmap,
* potentially limited by a shallow depth, is enough to wake up all of
* the queues.
*
* Each full word of the bitmap has bits_per_word bits, and there might
* be a partial word. There are depth / bits_per_word full words and
* depth % bits_per_word bits left over. In bitwise arithmetic:
@ -463,6 +466,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
atomic_set(&sbq->ws_active, 0);
atomic_set(&sbq->completion_cnt, 0);
atomic_set(&sbq->wakeup_cnt, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
if (!sbq->ws) {
@ -470,10 +475,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
return -ENOMEM;
}
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
init_waitqueue_head(&sbq->ws[i].wait);
atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
}
return 0;
}
@ -482,22 +485,26 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int depth)
{
unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
unsigned int wake_batch;
if (sbq->wake_batch != wake_batch) {
wake_batch = sbq_calc_wake_batch(sbq, depth);
if (sbq->wake_batch != wake_batch)
WRITE_ONCE(sbq->wake_batch, wake_batch);
/*
* Pairs with the memory barrier in sbitmap_queue_wake_up()
* to ensure that the batch size is updated before the wait
* counts.
*/
smp_mb();
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
}
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
unsigned int users)
{
unsigned int wake_batch;
unsigned int depth = (sbq->sb.depth + users - 1) / users;
wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
1, SBQ_WAKE_BATCH);
WRITE_ONCE(sbq->wake_batch, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);
@ -511,6 +518,58 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq)
}
EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
unsigned int *offset)
{
struct sbitmap *sb = &sbq->sb;
unsigned int hint, depth;
unsigned long index, nr;
int i;
if (unlikely(sb->round_robin))
return 0;
depth = READ_ONCE(sb->depth);
hint = update_alloc_hint_before_get(sb, depth);
index = SB_NR_TO_INDEX(sb, hint);
for (i = 0; i < sb->map_nr; i++) {
struct sbitmap_word *map = &sb->map[index];
unsigned long get_mask;
unsigned int map_depth = __map_depth(sb, index);
unsigned long val;
sbitmap_deferred_clear(map, 0, 0, 0);
val = READ_ONCE(map->word);
if (val == (1UL << (map_depth - 1)) - 1)
goto next;
nr = find_first_zero_bit(&val, map_depth);
if (nr + nr_tags <= map_depth) {
atomic_long_t *ptr = (atomic_long_t *) &map->word;
get_mask = ((1UL << nr_tags) - 1) << nr;
while (!atomic_long_try_cmpxchg(ptr, &val,
get_mask | val))
;
get_mask = (get_mask & ~val) >> nr;
if (get_mask) {
*offset = nr + (index << sb->shift);
update_alloc_hint_after_get(sb, depth, hint,
*offset + nr_tags - 1);
return get_mask;
}
}
next:
/* Jump to next index. */
if (++index >= sb->map_nr)
index = 0;
}
return 0;
}
int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
unsigned int shallow_depth)
{
@ -528,106 +587,55 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
int i, wake_index;
int i, wake_index, woken;
if (!atomic_read(&sbq->ws_active))
return NULL;
return;
wake_index = atomic_read(&sbq->wake_index);
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[wake_index];
if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) {
if (wake_index != atomic_read(&sbq->wake_index))
atomic_set(&sbq->wake_index, wake_index);
return ws;
}
/*
* Advance the index before checking the current queue.
* It improves fairness, by ensuring the queue doesn't
* need to be fully emptied before trying to wake up
* from the next one.
*/
wake_index = sbq_index_inc(wake_index);
if (waitqueue_active(&ws->wait)) {
woken = wake_up_nr_rh(&ws->wait, nr);
if (woken == nr)
break;
nr -= woken;
}
}
return NULL;
}
static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr)
{
struct sbq_wait_state *ws;
unsigned int wake_batch;
int wait_cnt, cur, sub;
bool ret;
if (*nr <= 0)
return false;
ws = sbq_wake_ptr(sbq);
if (!ws)
return false;
cur = atomic_read(&ws->wait_cnt);
do {
/*
* For concurrent callers of this, callers should call this
* function again to wakeup a new batch on a different 'ws'.
*/
if (cur == 0)
return true;
sub = min(*nr, cur);
wait_cnt = cur - sub;
} while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt));
/*
* If we decremented queue without waiters, retry to avoid lost
* wakeups.
*/
if (wait_cnt > 0)
return !waitqueue_active(&ws->wait);
*nr -= sub;
/*
* When wait_cnt == 0, we have to be particularly careful as we are
* responsible to reset wait_cnt regardless whether we've actually
* woken up anybody. But in case we didn't wakeup anybody, we still
* need to retry.
*/
ret = !waitqueue_active(&ws->wait);
wake_batch = READ_ONCE(sbq->wake_batch);
/*
* Wake up first in case that concurrent callers decrease wait_cnt
* while waitqueue is empty.
*/
wake_up_nr(&ws->wait, wake_batch);
/*
* Pairs with the memory barrier in sbitmap_queue_resize() to
* ensure that we see the batch size update before the wait
* count is reset.
*
* Also pairs with the implicit barrier between decrementing wait_cnt
* and checking for waitqueue_active() to make sure waitqueue_active()
* sees result of the wakeup if atomic_dec_return() has seen the result
* of atomic_set().
*/
smp_mb__before_atomic();
/*
* Increase wake_index before updating wait_cnt, otherwise concurrent
* callers can see valid wait_cnt in old waitqueue, which can cause
* invalid wakeup on the old waitqueue.
*/
sbq_index_atomic_inc(&sbq->wake_index);
atomic_set(&ws->wait_cnt, wake_batch);
return ret || *nr;
if (wake_index != atomic_read(&sbq->wake_index))
atomic_set(&sbq->wake_index, wake_index);
}
void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
while (__sbq_wake_up(sbq, &nr))
;
unsigned int wake_batch = READ_ONCE(sbq->wake_batch);
unsigned int wakeups;
if (!atomic_read(&sbq->ws_active))
return;
atomic_add(nr, &sbq->completion_cnt);
wakeups = atomic_read(&sbq->wakeup_cnt);
do {
if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch)
return;
} while (!atomic_try_cmpxchg(&sbq->wakeup_cnt,
&wakeups, wakeups + wake_batch));
__sbitmap_queue_wake_up(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
@ -748,9 +756,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_puts(m, "ws={\n");
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[i];
seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
atomic_read(&ws->wait_cnt),
seq_printf(m, "\t{.wait=%s},\n",
waitqueue_active(&ws->wait) ? "active" : "inactive");
}
seq_puts(m, "}\n");

View File

@ -1138,32 +1138,13 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
wait_queue_head_t *q = page_waitqueue(page);
struct wait_page_key key;
unsigned long flags;
wait_queue_entry_t bookmark;
key.page = page;
key.bit_nr = bit_nr;
key.page_match = 0;
bookmark.flags = 0;
bookmark.private = NULL;
bookmark.func = NULL;
INIT_LIST_HEAD(&bookmark.entry);
spin_lock_irqsave(&q->lock, flags);
__wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
while (bookmark.flags & WQ_FLAG_BOOKMARK) {
/*
* Take a breather from holding the lock,
* allow pages that finish wake up asynchronously
* to acquire the lock and remove themselves
* from wait queue
*/
spin_unlock_irqrestore(&q->lock, flags);
cpu_relax();
spin_lock_irqsave(&q->lock, flags);
__wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
}
__wake_up_locked_key(q, TASK_NORMAL, &key);
/*
* It is possible for other pages to have collided on the waitqueue

View File

@ -337,17 +337,22 @@ out:
return q;
}
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
if (cops == NULL)
return NULL;
if (cops == NULL) {
NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
return ERR_PTR(-EOPNOTSUPP);
}
cl = cops->find(p, classid);
if (cl == 0)
return NULL;
if (cl == 0) {
NL_SET_ERR_MSG(extack, "Specified class not found");
return ERR_PTR(-ENOENT);
}
return cops->leaf(p, cl);
}
@ -1491,7 +1496,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
q = qdisc_leaf(p, clid);
q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
q = dev_ingress_queue(dev)->qdisc_sleeping;
}
@ -1502,6 +1507,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
if (IS_ERR(q))
return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@ -1595,7 +1602,9 @@ replay:
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
q = qdisc_leaf(p, clid);
q = qdisc_leaf(p, clid, extack);
if (IS_ERR(q))
return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
q = dev_ingress_queue(dev)->qdisc_sleeping;
}

View File

@ -412,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
bool existing = false;
struct nlattr *tb[TCA_QFQ_MAX + 1];
struct qfq_aggregate *new_agg = NULL;
u32 weight, lmax, inv_w;
u32 weight, lmax, inv_w, old_weight, old_lmax;
int err;
int delta_w;
@ -446,12 +446,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
if (cl != NULL &&
lmax == cl->agg->lmax &&
weight == cl->agg->class_weight)
return 0; /* nothing to change */
if (cl != NULL) {
sch_tree_lock(sch);
old_weight = cl->agg->class_weight;
old_lmax = cl->agg->lmax;
sch_tree_unlock(sch);
if (lmax == old_lmax && weight == old_weight)
return 0; /* nothing to change */
}
delta_w = weight - (cl ? cl->agg->class_weight : 0);
delta_w = weight - (cl ? old_weight : 0);
if (q->wsum + delta_w > QFQ_MAX_WSUM) {
pr_notice("qfq: total weight out of range (%d + %u)\n",
@ -533,9 +537,6 @@ destroy_class:
static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
{
struct qfq_sched *q = qdisc_priv(sch);
qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
qdisc_put(cl->qdisc);
kfree(cl);
@ -554,6 +555,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
qfq_rm_from_agg(q, cl);
sch_tree_unlock(sch);
@ -624,6 +626,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
{
struct qfq_class *cl = (struct qfq_class *)arg;
struct nlattr *nest;
u32 class_weight, lmax;
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
@ -632,8 +635,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
sch_tree_lock(sch);
class_weight = cl->agg->class_weight;
lmax = cl->agg->lmax;
sch_tree_unlock(sch);
if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) ||
nla_put_u32(skb, TCA_QFQ_LMAX, lmax))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@ -650,8 +658,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
memset(&xstats, 0, sizeof(xstats));
sch_tree_lock(sch);
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
sch_tree_unlock(sch);
if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@ -1490,6 +1500,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
common.hnode) {
qfq_rm_from_agg(q, cl);
qfq_destroy_class(sch, cl);
}
}

View File

@ -700,8 +700,10 @@ static void tipc_topsrv_stop(struct net *net)
for (id = 0; srv->idr_in_use; id++) {
con = idr_find(&srv->conn_idr, id);
if (con) {
conn_get(con);
spin_unlock_bh(&srv->idr_lock);
tipc_conn_close(con);
conn_put(con);
spin_lock_bh(&srv->idr_lock);
}
}