diff --git a/COPYING-5.14.0-611.20.1.el9 b/COPYING-5.14.0-611.24.1.el9 similarity index 100% rename from COPYING-5.14.0-611.20.1.el9 rename to COPYING-5.14.0-611.24.1.el9 diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index adba677e35..f559af87f0 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -126,17 +126,17 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1490853 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | +| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | +| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | -+----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ @@ -148,10 +148,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A715 | #3456084 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | @@ -172,6 +180,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -180,12 +190,16 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #3324339 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N3 | #3456111 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #1619801 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Neoverse-V1 | #1619801 | N/A | -+----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA| | | | #562869,1047329 | | +----------------+-----------------+-----------------+-----------------------------+ @@ -282,3 +296,5 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | +----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #3324339 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/Makefile.rhelver b/Makefile.rhelver index dc11f32df6..6f3c7197b1 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 7 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 611.20.1 +RHEL_RELEASE = 611.24.1 # # ZSTREAM diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0cec5d8a98..e90888d48e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1058,18 +1058,30 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-*/Neoverse-*: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A76 erratum 3324349 + * ARM Cortex-A77 erratum 3324348 + * ARM Cortex-A78 erratum 3324344 + * ARM Cortex-A78C erratum 3324346 + * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 + * ARM Cortex-A715 errartum 3456084 * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-A725 erratum 3456106 + * ARM Cortex-X1 erratum 3324344 + * ARM Cortex-X1C erratum 3324346 * ARM Cortex-X2 erratum 3324338 * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-N3 erratum 3456111 + * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -1077,11 +1089,11 @@ config ARM64_ERRATUM_3194386 subsequent speculative instructions, which may permit unexepected speculative store bypassing. - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. + Work around this problem by placing a Speculation Barrier (SB) or + Instruction Synchronization Barrier (ISB) after kernel changes to + SSBS. The presence of the SSBS special-purpose register is hidden + from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such that userspace + will use the PR_SPEC_STORE_BYPASS prctl to change SSBS. If unsure, say Y. diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index c24e8c04e4..751b78cd31 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 @@ -87,12 +88,16 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_CORTEX_A720AE 0xD89 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -127,6 +132,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -156,6 +162,7 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) @@ -168,12 +175,16 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -198,6 +209,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ed5f411551..e08910c404 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,15 +434,28 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {} }; #endif diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index e1735fb3bd..1ca131876f 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -854,6 +854,9 @@ static bool is_spectre_bhb_safe(int scope) MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), MIDR_ALL_VERSIONS(MIDR_CORTEX_A520), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), {}, }; static bool all_safe = true; @@ -873,6 +876,17 @@ static u8 spectre_bhb_loop_affected(void) { u8 k = 0; + static const struct midr_range spectre_bhb_k132_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, + }; + static const struct midr_range spectre_bhb_k38_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), + {}, + }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), @@ -887,9 +901,11 @@ static u8 spectre_bhb_loop_affected(void) }; static const struct midr_range spectre_bhb_k24_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_HISI_HIP09), {}, }; static const struct midr_range spectre_bhb_k11_list[] = { @@ -902,7 +918,11 @@ static u8 spectre_bhb_loop_affected(void) {}, }; - if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k132_list)) + k = 132; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k38_list)) + k = 38; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) k = 32; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) k = 24; diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index 76df93fd1f..21fe7350ba 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -10,6 +10,7 @@ #include #include +#include #ifdef CONFIG_PPC64_ELF_ABI_V1 #define ARCH_FUNC_PREFIX "." @@ -25,6 +26,12 @@ static inline void disable_kfence(void) static inline bool arch_kfence_init_pool(void) { +#ifdef CONFIG_PPC64 + if (!radix_enabled()) { + pr_info("Not supported with Hash MMU; disabled at runtime.\n"); + return false; + } +#endif return !kfence_disabled; } #endif diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e50a08b347..f6fd3fedee 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -424,7 +424,7 @@ repeat: break; cond_resched(); - if (debug_pagealloc_enabled_or_kfence() && + if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; } @@ -807,7 +807,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled_or_kfence()) { + if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -1127,7 +1127,7 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); - if (debug_pagealloc_enabled_or_kfence()) { + if (debug_pagealloc_enabled()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; linear_map_hash_slots = memblock_alloc_try_nid( linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, @@ -2110,7 +2110,7 @@ void hpt_do_stress(unsigned long ea, unsigned long hpte_group) } } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_DEBUG_PAGEALLOC static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) @@ -2183,7 +2183,13 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable) } local_irq_restore(flags); } -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */ +#else /* CONFIG_DEBUG_PAGEALLOC */ +void hash__kernel_map_pages(struct page *page, int numpages, + int enable) +{ + +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index ea741bc4ac..60d644d805 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3679,6 +3679,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv, cmd_id = header->id; + if (header->size > SVGA_CMD_MAX_DATASIZE) { + VMW_DEBUG_USER("SVGA3D command: %d is too big.\n", + cmd_id + SVGA_3D_CMD_BASE); + return -E2BIG; + } *size = header->size + sizeof(SVGA3dCmdHeader); cmd_id -= SVGA_3D_CMD_BASE; diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index 4c39f01e4f..a3f421e2ad 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -20,6 +20,8 @@ struct xe_exec_queue; struct xe_guc_exec_queue { /** @q: Backpointer to parent xe_exec_queue */ struct xe_exec_queue *q; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; /** @sched: GPU scheduler for this xe_exec_queue */ struct xe_gpu_scheduler sched; /** @entity: Scheduler entity for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 71ddd26ec3..00ff6197e0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1282,7 +1282,11 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); - kfree(ge); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); xe_exec_queue_fini(q); xe_pm_runtime_put(guc_to_xe(guc)); } @@ -1465,6 +1469,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) q->guc = ge; ge->q = q; + init_rcu_head(&ge->rcu); init_waitqueue_head(&ge->suspend_wait); for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 0b4f12be36..6e2221b606 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); } + + /* Safe release of the irq->lock used in dma_fence_init. */ + synchronize_rcu(); } void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index f252d0b5a4..5e9ca4376d 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1619,15 +1619,25 @@ static void __receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, else if (ldata->raw || (L_EXTPROC(tty) && !preops)) n_tty_receive_buf_raw(tty, cp, fp, count); else if (tty->closing && !L_EXTPROC(tty)) { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_closing(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_closing(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_closing(tty, cp, fp, count, false); } else { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_standard(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_standard(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_standard(tty, cp, fp, count, false); flush_echoes(tty); if (tty->ops->flush_chars) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 666ac432f5..c8a1516b26 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -94,6 +94,7 @@ static int __dwc3_gadget_ep0_queue(struct dwc3_ep *dep, req->request.actual = 0; req->request.status = -EINPROGRESS; req->epnum = dep->number; + req->status = DWC3_REQUEST_STATUS_QUEUED; list_add_tail(&req->list, &dep->pending_list); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3213612889..54b9412b6e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -228,6 +228,13 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, { struct dwc3 *dwc = dep->dwc; + /* + * The request might have been processed and completed while the + * spinlock was released. Skip processing if already completed. + */ + if (req->status == DWC3_REQUEST_STATUS_COMPLETED) + return; + dwc3_gadget_del_and_unmap_request(dep, req, status); req->status = DWC3_REQUEST_STATUS_COMPLETED; diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index c17ed40508..c8d0d942e0 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -36,9 +36,8 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * fully cached or it may be in the process of * being deleted due to a lease break. */ - if (!cfid->time || !cfid->has_lease) { + if (!is_valid_cached_dir(cfid)) return NULL; - } kref_get(&cfid->refcount); return cfid; } @@ -193,7 +192,7 @@ replay_again: * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - if (cfid->has_lease && cfid->time) { + if (is_valid_cached_dir(cfid)) { cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; @@ -232,7 +231,7 @@ replay_again: list_for_each_entry(parent_cfid, &cfids->entries, entry) { if (parent_cfid->dentry == dentry->d_parent) { cifs_dbg(FYI, "found a parent cached file handle\n"); - if (parent_cfid->has_lease && parent_cfid->time) { + if (is_valid_cached_dir(parent_cfid)) { lease_flags |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE; memcpy(pfid->parent_lease_key, @@ -388,11 +387,11 @@ out: * lease. Release one here, and the second below. */ cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } else { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -416,12 +415,18 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, if (cfids == NULL) return -EOPNOTSUPP; + if (!dentry) + return -ENOENT; + spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { - if (dentry && cfid->dentry == dentry) { + if (cfid->dentry == dentry) { + if (!is_valid_cached_dir(cfid)) + break; cifs_dbg(FYI, "found a cached file handle by dentry\n"); kref_get(&cfid->refcount); *ret_cfid = cfid; + cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); return 0; } @@ -432,12 +437,14 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, static void smb2_close_cached_fid(struct kref *ref) +__releases(&cfid->cfids->cfid_list_lock) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); int rc; - spin_lock(&cfid->cfids->cfid_list_lock); + lockdep_assert_held(&cfid->cfids->cfid_list_lock); + if (cfid->on_list) { list_del(&cfid->entry); cfid->on_list = false; @@ -472,7 +479,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->has_lease) { cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfid->cfids->cfid_list_lock); close_cached_dir(cfid); @@ -481,7 +488,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, void close_cached_dir(struct cached_fid *cfid) { - kref_put(&cfid->refcount, smb2_close_cached_fid); + kref_put_lock(&cfid->refcount, smb2_close_cached_fid, &cfid->cfids->cfid_list_lock); } /* @@ -521,10 +528,9 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); goto done; } - spin_lock(&cfid->fid_lock); + tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); list_add_tail(&tmp_list->entry, &entry); } @@ -557,8 +563,8 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) /* * Mark all the cfids as closed, and move them to the cfids->dying list. - * They'll be cleaned up later by cfids_invalidation_worker. Take - * a reference to each cfid during this process. + * They'll be cleaned up by laundromat. Take a reference to each cfid + * during this process. */ spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { @@ -575,12 +581,11 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) } else kref_get(&cfid->refcount); } - /* - * Queue dropping of the dentries once locks have been dropped - */ - if (!list_empty(&cfids->dying)) - queue_work(cfid_put_wq, &cfids->invalidation_work); spin_unlock(&cfids->cfid_list_lock); + + /* run laundromat unconditionally now as there might have been previously queued work */ + mod_delayed_work(cfid_put_wq, &cfids->laundromat_work, 0); + flush_delayed_work(&cfids->laundromat_work); } static void @@ -592,7 +597,7 @@ cached_dir_offload_close(struct work_struct *work) WARN_ON(cfid->on_list); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); } @@ -607,14 +612,9 @@ static void cached_dir_put_work(struct work_struct *work) { struct cached_fid *cfid = container_of(work, struct cached_fid, put_work); - struct dentry *dentry; - - spin_lock(&cfid->fid_lock); - dentry = cfid->dentry; + dput(cfid->dentry); cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); - dput(dentry); queue_work(serverclose_wq, &cfid->close_work); } @@ -672,7 +672,6 @@ static struct cached_fid *init_cached_dir(const char *path) INIT_LIST_HEAD(&cfid->entry); INIT_LIST_HEAD(&cfid->dirents.entries); mutex_init(&cfid->dirents.de_mutex); - spin_lock_init(&cfid->fid_lock); kref_init(&cfid->refcount); return cfid; } @@ -696,40 +695,38 @@ static void free_cached_dir(struct cached_fid *cfid) kfree(dirent); } + /* adjust tcon-level counters and reset per-dir accounting */ + if (cfid->cfids) { + if (cfid->dirents.entries_count) + atomic_long_sub((long)cfid->dirents.entries_count, + &cfid->cfids->total_dirents_entries); + if (cfid->dirents.bytes_used) { + atomic64_sub((long long)cfid->dirents.bytes_used, + &cfid->cfids->total_dirents_bytes); + atomic64_sub((long long)cfid->dirents.bytes_used, + &cifs_dircache_bytes_used); + } + } + cfid->dirents.entries_count = 0; + cfid->dirents.bytes_used = 0; + kfree(cfid->path); cfid->path = NULL; kfree(cfid); } -static void cfids_invalidation_worker(struct work_struct *work) -{ - struct cached_fids *cfids = container_of(work, struct cached_fids, - invalidation_work); - struct cached_fid *cfid, *q; - LIST_HEAD(entry); - - spin_lock(&cfids->cfid_list_lock); - /* move cfids->dying to the local list */ - list_cut_before(&entry, &cfids->dying, &cfids->dying); - spin_unlock(&cfids->cfid_list_lock); - - list_for_each_entry_safe(cfid, q, &entry, entry) { - list_del(&cfid->entry); - /* Drop the ref-count acquired in invalidate_all_cached_dirs */ - kref_put(&cfid->refcount, smb2_close_cached_fid); - } -} - static void cfids_laundromat_worker(struct work_struct *work) { struct cached_fids *cfids; struct cached_fid *cfid, *q; - struct dentry *dentry; LIST_HEAD(entry); cfids = container_of(work, struct cached_fids, laundromat_work.work); spin_lock(&cfids->cfid_list_lock); + /* move cfids->dying to the local list */ + list_cut_before(&entry, &cfids->dying, &cfids->dying); + list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { if (cfid->last_access_time && time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) { @@ -751,12 +748,9 @@ static void cfids_laundromat_worker(struct work_struct *work) list_for_each_entry_safe(cfid, q, &entry, entry) { list_del(&cfid->entry); - spin_lock(&cfid->fid_lock); - dentry = cfid->dentry; + dput(cfid->dentry); cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); - dput(dentry); if (cfid->is_open) { spin_lock(&cifs_tcp_ses_lock); ++cfid->tcon->tc_count; @@ -769,7 +763,7 @@ static void cfids_laundromat_worker(struct work_struct *work) * Drop the ref-count from above, either the lease-ref (if there * was one) or the extra one acquired. */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); @@ -786,11 +780,13 @@ struct cached_fids *init_cached_dirs(void) INIT_LIST_HEAD(&cfids->entries); INIT_LIST_HEAD(&cfids->dying); - INIT_WORK(&cfids->invalidation_work, cfids_invalidation_worker); INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker); queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); + atomic_long_set(&cfids->total_dirents_entries, 0); + atomic64_set(&cfids->total_dirents_bytes, 0); + return cfids; } @@ -807,7 +803,6 @@ void free_cached_dirs(struct cached_fids *cfids) return; cancel_delayed_work_sync(&cfids->laundromat_work); - cancel_work_sync(&cfids->invalidation_work); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 46b5a2fdf1..1e383db7c3 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -27,6 +27,9 @@ struct cached_dirents { struct mutex de_mutex; loff_t pos; /* Expected ctx->pos */ struct list_head entries; + /* accounting for cached entries in this directory */ + unsigned long entries_count; + unsigned long bytes_used; }; struct cached_fid { @@ -41,7 +44,6 @@ struct cached_fid { unsigned long last_access_time; /* jiffies of when last accessed */ struct kref refcount; struct cifs_fid fid; - spinlock_t fid_lock; struct cifs_tcon *tcon; struct dentry *dentry; struct work_struct put_work; @@ -60,10 +62,21 @@ struct cached_fids { int num_entries; struct list_head entries; struct list_head dying; - struct work_struct invalidation_work; struct delayed_work laundromat_work; + /* aggregate accounting for all cached dirents under this tcon */ + atomic_long_t total_dirents_entries; + atomic64_t total_dirents_bytes; }; +/* Module-wide directory cache accounting (defined in cifsfs.c) */ +extern atomic64_t cifs_dircache_bytes_used; /* bytes across all mounts */ + +static inline bool +is_valid_cached_dir(struct cached_fid *cfid) +{ + return cfid->time && cfid->has_lease; +} + extern struct cached_fids *init_cached_dirs(void); extern void free_cached_dirs(struct cached_fids *cfids); extern int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index b3fe6d0b5b..86b29be6c6 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -239,14 +239,18 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsFileInfo *cfile; + struct inode *inode; + struct cifsInodeInfo *cinode; + char lease[4]; + int n; seq_puts(m, "# Version:1\n"); seq_puts(m, "# Format:\n"); seq_puts(m, "# "); #ifdef CONFIG_CIFS_DEBUG2 - seq_printf(m, " \n"); + seq_puts(m, " \n"); #else - seq_printf(m, " \n"); + seq_puts(m, " \n"); #endif /* CIFS_DEBUG2 */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { @@ -266,11 +270,30 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) cfile->pid, from_kuid(&init_user_ns, cfile->uid), cfile->dentry); + + /* Append lease/oplock caching state as RHW letters */ + inode = d_inode(cfile->dentry); + n = 0; + if (inode) { + cinode = CIFS_I(inode); + if (CIFS_CACHE_READ(cinode)) + lease[n++] = 'R'; + if (CIFS_CACHE_HANDLE(cinode)) + lease[n++] = 'H'; + if (CIFS_CACHE_WRITE(cinode)) + lease[n++] = 'W'; + } + lease[n] = '\0'; + seq_puts(m, " "); + if (n) + seq_printf(m, "%s", lease); + else + seq_puts(m, "NONE"); + #ifdef CONFIG_CIFS_DEBUG2 - seq_printf(m, " %llu\n", cfile->fid.mid); -#else + seq_printf(m, " %llu", cfile->fid.mid); +#endif /* CONFIG_CIFS_DEBUG2 */ seq_printf(m, "\n"); -#endif /* CIFS_DEBUG2 */ } spin_unlock(&tcon->open_file_lock); } @@ -304,8 +327,13 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); cfids = tcon->cfids; + if (!cfids) + continue; spin_lock(&cfids->cfid_list_lock); /* check lock ordering */ - seq_printf(m, "Num entries: %d\n", cfids->num_entries); + seq_printf(m, "Num entries: %d, cached_dirents: %lu entries, %llu bytes\n", + cfids->num_entries, + (unsigned long)atomic_long_read(&cfids->total_dirents_entries), + (unsigned long long)atomic64_read(&cfids->total_dirents_bytes)); list_for_each_entry(cfid, &cfids->entries, entry) { seq_printf(m, "0x%x 0x%llx 0x%llx %s", tcon->tid, @@ -316,11 +344,12 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) seq_printf(m, "\tvalid file info"); if (cfid->dirents.is_valid) seq_printf(m, ", valid dirents"); + if (!list_empty(&cfid->dirents.entries)) + seq_printf(m, ", dirents: %lu entries, %lu bytes", + cfid->dirents.entries_count, cfid->dirents.bytes_used); seq_printf(m, "\n"); } spin_unlock(&cfids->cfid_list_lock); - - } } } @@ -347,6 +376,22 @@ static __always_inline const char *compression_alg_str(__le16 alg) } } +static __always_inline const char *cipher_alg_str(__le16 cipher) +{ + switch (cipher) { + case SMB2_ENCRYPTION_AES128_CCM: + return "AES128-CCM"; + case SMB2_ENCRYPTION_AES128_GCM: + return "AES128-GCM"; + case SMB2_ENCRYPTION_AES256_CCM: + return "AES256-CCM"; + case SMB2_ENCRYPTION_AES256_GCM: + return "AES256-GCM"; + default: + return "UNKNOWN"; + } +} + static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct mid_q_entry *mid_entry; @@ -536,6 +581,11 @@ skip_rdma: else seq_puts(m, "disabled (not supported by this server)"); + /* Show negotiated encryption cipher, even if not required */ + seq_puts(m, "\nEncryption: "); + if (server->cipher_type) + seq_printf(m, "Negotiated cipher (%s)", cipher_alg_str(server->cipher_type)); + seq_printf(m, "\n\n\tSessions: "); i = 0; list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { @@ -573,12 +623,8 @@ skip_rdma: /* dump session id helpful for use with network trace */ seq_printf(m, " SessionId: 0x%llx", ses->Suid); - if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) { + if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) seq_puts(m, " encrypted"); - /* can help in debugging to show encryption type */ - if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM) - seq_puts(m, "(gcm256)"); - } if (ses->sign) seq_puts(m, " signed"); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index aa3422eebd..b68aa2e449 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -120,6 +120,46 @@ unsigned int dir_cache_timeout = 30; module_param(dir_cache_timeout, uint, 0644); MODULE_PARM_DESC(dir_cache_timeout, "Number of seconds to cache directory contents for which we have a lease. Default: 30 " "Range: 1 to 65000 seconds, 0 to disable caching dir contents"); +/* Module-wide total cached dirents (in bytes) across all tcons */ +atomic64_t cifs_dircache_bytes_used = ATOMIC64_INIT(0); + +/* + * Write-only module parameter to drop all cached directory entries across + * all CIFS mounts. Echo a non-zero value to trigger. + */ +static void cifs_drop_all_dir_caches(void) +{ + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) + invalidate_all_cached_dirs(tcon); + } + } + spin_unlock(&cifs_tcp_ses_lock); +} + +static int cifs_param_set_drop_dir_cache(const char *val, const struct kernel_param *kp) +{ + bool bv; + int rc = kstrtobool(val, &bv); + + if (rc) + return rc; + if (bv) + cifs_drop_all_dir_caches(); + return 0; +} + +module_param_call(drop_dir_cache, cifs_param_set_drop_dir_cache, NULL, NULL, 0200); +MODULE_PARM_DESC(drop_dir_cache, "Write 1 to drop all cached directory entries across all CIFS mounts"); + #ifdef CONFIG_CIFS_STATS2 unsigned int slow_rsp_threshold = 1; module_param(slow_rsp_threshold, uint, 0644); diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index bc890c9d36..6831ad2fa2 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -322,13 +322,14 @@ retry_open: list_for_each_entry(parent_cfid, &tcon->cfids->entries, entry) { if (parent_cfid->dentry == direntry->d_parent) { cifs_dbg(FYI, "found a parent cached file handle\n"); - if (parent_cfid->has_lease && parent_cfid->time) { + if (is_valid_cached_dir(parent_cfid)) { lease_flags |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE; memcpy(fid->parent_lease_key, parent_cfid->fid.lease_key, SMB2_LEASE_KEY_SIZE); parent_cfid->dirents.is_valid = false; + parent_cfid->dirents.is_failed = true; } break; } @@ -683,6 +684,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, const char *full_path; void *page; int retry_count = 0; + struct cached_fid *cfid = NULL; xid = get_xid(); @@ -722,6 +724,28 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, cifs_dbg(FYI, "non-NULL inode in lookup\n"); } else { cifs_dbg(FYI, "NULL inode in lookup\n"); + + /* + * We can only rely on negative dentries having the same + * spelling as the cached dirent if case insensitivity is + * forced on mount. + * + * XXX: if servers correctly announce Case Sensitivity Search + * on GetInfo of FileFSAttributeInformation, then we can take + * correct action even if case insensitive is not forced on + * mount. + */ + if (pTcon->nocase && !open_cached_dir_by_dentry(pTcon, direntry->d_parent, &cfid)) { + /* + * dentry is negative and parent is fully cached: + * we can assume file does not exist + */ + if (cfid->dirents.is_valid) { + close_cached_dir(cfid); + goto out; + } + close_cached_dir(cfid); + } } cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, d_inode(direntry)); @@ -755,6 +779,8 @@ again: } newInode = ERR_PTR(rc); } + +out: free_dentry_path(page); cifs_put_tlink(tlink); free_xid(xid); @@ -764,7 +790,8 @@ again: static int cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { - struct inode *inode; + struct inode *inode = NULL; + struct cached_fid *cfid; int rc; if (flags & LOOKUP_RCU) @@ -811,6 +838,21 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) return 1; } + } else { + struct cifs_sb_info *cifs_sb = CIFS_SB(d_inode(direntry->d_parent)->i_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + + if (!open_cached_dir_by_dentry(tcon, direntry->d_parent, &cfid)) { + /* + * dentry is negative and parent is fully cached: + * we can assume file does not exist + */ + if (cfid->dirents.is_valid) { + close_cached_dir(cfid); + return 1; + } + close_cached_dir(cfid); + } } /* diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 1088e3c244..9cde446efc 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2411,8 +2411,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, tcon = tlink_tcon(tlink); server = tcon->ses->server; - if (!server->ops->rename) - return -ENOSYS; + if (!server->ops->rename) { + rc = -ENOSYS; + goto do_rename_exit; + } /* try path-based rename first */ rc = server->ops->rename(xid, tcon, from_dentry, @@ -2681,7 +2683,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) return true; if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) { - if (cfid->time && cifs_i->time > cfid->time) { + if (cifs_i->time > cfid->time) { close_cached_dir(cfid); return false; } diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 2b121f6c0b..fb72f5881e 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, char *data_end; struct dfs_referral_level_3 *ref; + if (rsp_size < sizeof(*rsp)) { + cifs_dbg(VFS | ONCE, + "%s: header is malformed (size is %u, must be %zu)\n", + __func__, rsp_size, sizeof(*rsp)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals); if (*num_of_nodes < 1) { @@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, goto parse_DFS_referrals_exit; } + if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) { + cifs_dbg(VFS | ONCE, + "%s: malformed buffer (size is %u, must be at least %zu)\n", + __func__, rsp_size, + sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + ref = (struct dfs_referral_level_3 *) &(rsp->referrals); if (ref->VersionNumber != cpu_to_le16(3)) { cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n", diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 7bf3214117..7e194d2352 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -873,39 +873,42 @@ static void finished_cached_dirents_count(struct cached_dirents *cde, cde->is_valid = 1; } -static void add_cached_dirent(struct cached_dirents *cde, - struct dir_context *ctx, - const char *name, int namelen, - struct cifs_fattr *fattr, - struct file *file) +static bool add_cached_dirent(struct cached_dirents *cde, + struct dir_context *ctx, const char *name, + int namelen, struct cifs_fattr *fattr, + struct file *file) { struct cached_dirent *de; if (cde->file != file) - return; + return false; if (cde->is_valid || cde->is_failed) - return; + return false; if (ctx->pos != cde->pos) { cde->is_failed = 1; - return; + return false; } de = kzalloc(sizeof(*de), GFP_ATOMIC); if (de == NULL) { cde->is_failed = 1; - return; + return false; } de->namelen = namelen; de->name = kstrndup(name, namelen, GFP_ATOMIC); if (de->name == NULL) { kfree(de); cde->is_failed = 1; - return; + return false; } de->pos = ctx->pos; memcpy(&de->fattr, fattr, sizeof(struct cifs_fattr)); list_add_tail(&de->entry, &cde->entries); + /* update accounting */ + cde->entries_count++; + cde->bytes_used += sizeof(*de) + (size_t)namelen + 1; + return true; } static bool cifs_dir_emit(struct dir_context *ctx, @@ -914,7 +917,8 @@ static bool cifs_dir_emit(struct dir_context *ctx, struct cached_fid *cfid, struct file *file) { - bool rc; + size_t delta_bytes = 0; + bool rc, added = false; ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); rc = dir_emit(ctx, name, namelen, ino, fattr->cf_dtype); @@ -922,10 +926,20 @@ static bool cifs_dir_emit(struct dir_context *ctx, return rc; if (cfid) { + /* Cost of this entry */ + delta_bytes = sizeof(struct cached_dirent) + (size_t)namelen + 1; + mutex_lock(&cfid->dirents.de_mutex); - add_cached_dirent(&cfid->dirents, ctx, name, namelen, - fattr, file); + added = add_cached_dirent(&cfid->dirents, ctx, name, namelen, + fattr, file); mutex_unlock(&cfid->dirents.de_mutex); + + if (added) { + /* per-tcon then global for consistency with free path */ + atomic64_add((long long)delta_bytes, &cfid->cfids->total_dirents_bytes); + atomic_long_inc(&cfid->cfids->total_dirents_entries); + atomic64_add((long long)delta_bytes, &cifs_dircache_bytes_used); + } } return rc; diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index c534595ba9..ea85f7d9ab 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1294,6 +1294,8 @@ static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); if (smb2_to_name == NULL) { rc = -ENOMEM; + if (cfile) + cifsFileInfo_put(cfile); goto smb2_rename_path; } in_iov.iov_base = smb2_to_name; diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index cddf273c14..89d933b4a8 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -614,6 +614,15 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) struct cifs_tcon *tcon; struct cifs_pending_open *open; + /* Trace receipt of lease break request from server */ + trace_smb3_lease_break_enter(le32_to_cpu(rsp->CurrentLeaseState), + le32_to_cpu(rsp->Flags), + le16_to_cpu(rsp->Epoch), + le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), + le64_to_cpu(rsp->hdr.SessionId), + *((u64 *)rsp->LeaseKey), + *((u64 *)&rsp->LeaseKey[8])); + cifs_dbg(FYI, "Checking for lease break\n"); /* If server is a channel, select the primary channel */ @@ -660,10 +669,12 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); cifs_dbg(FYI, "Can not process lease break - no lease matched\n"); trace_smb3_lease_not_found(le32_to_cpu(rsp->CurrentLeaseState), - le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), - le64_to_cpu(rsp->hdr.SessionId), - *((u64 *)rsp->LeaseKey), - *((u64 *)&rsp->LeaseKey[8])); + le32_to_cpu(rsp->Flags), + le16_to_cpu(rsp->Epoch), + le32_to_cpu(rsp->hdr.Id.SyncId.TreeId), + le64_to_cpu(rsp->hdr.SessionId), + *((u64 *)rsp->LeaseKey), + *((u64 *)&rsp->LeaseKey[8])); return false; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 364f53a805..cdda54dd39 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -912,11 +912,8 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, rc = open_cached_dir(xid, tcon, full_path, cifs_sb, true, &cfid); if (!rc) { - if (cfid->has_lease) { - close_cached_dir(cfid); - return 0; - } close_cached_dir(cfid); + return 0; } utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); @@ -2672,11 +2669,12 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; int rc; __le16 *utf16_path; - struct cached_fid *cfid = NULL; + struct cached_fid *cfid; int retries = 0, cur_sleep = 1; replay_again: /* reinitialize for possible replay */ + cfid = NULL; flags = CIFS_CP_CREATE_CLOSE_OP; oplock = SMB2_OPLOCK_LEVEL_NONE; server = cifs_pick_channel(ses); @@ -3085,8 +3083,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return ERR_PTR(rc); + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3118,6 +3115,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); @@ -3158,8 +3156,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return rc; + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3180,6 +3177,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); return rc; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a6cf9e632f..7310dc74ad 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -6111,11 +6111,11 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, please_key_high = (__u64 *)(lease_key+8); if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); - trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid, + trace_smb3_lease_ack_err(le32_to_cpu(lease_state), tcon->tid, ses->Suid, *please_key_low, *please_key_high, rc); cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc); } else - trace_smb3_lease_done(le32_to_cpu(lease_state), tcon->tid, + trace_smb3_lease_ack_done(le32_to_cpu(lease_state), tcon->tid, ses->Suid, *please_key_low, *please_key_high); return rc; diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e2e0c9b2c6..7bdb54367b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -446,7 +446,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; - int data_length = 0; + u32 data_offset = 0; + u32 data_length = 0; + u32 remaining_data_length = 0; log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", response, response->type, wc->status, wc->opcode, @@ -478,7 +480,22 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* SMBD data transfer packet */ case SMBD_TRANSFER_DATA: data_transfer = smbd_response_payload(response); + + if (wc->byte_len < + offsetof(struct smbd_data_transfer, padding)) + goto error; + + remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); + data_offset = le32_to_cpu(data_transfer->data_offset); data_length = le32_to_cpu(data_transfer->data_length); + if (wc->byte_len < data_offset || + (u64)wc->byte_len < (u64)data_offset + data_length) + goto error; + + if (remaining_data_length > info->max_fragmented_recv_size || + data_length > info->max_fragmented_recv_size || + (u64)remaining_data_length + (u64)data_length > (u64)info->max_fragmented_recv_size) + goto error; /* * If this is a packet with data playload place the data in diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 8a2cf2887e..c1806b9ca6 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -1043,8 +1043,54 @@ DEFINE_EVENT(smb3_lease_done_class, smb3_##name, \ __u64 lease_key_high), \ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high)) -DEFINE_SMB3_LEASE_DONE_EVENT(lease_done); -DEFINE_SMB3_LEASE_DONE_EVENT(lease_not_found); +DEFINE_SMB3_LEASE_DONE_EVENT(lease_ack_done); +/* Tracepoint when a lease break request is received/entered (includes epoch and flags) */ +DECLARE_EVENT_CLASS(smb3_lease_enter_class, + TP_PROTO(__u32 lease_state, + __u32 flags, + __u16 epoch, + __u32 tid, + __u64 sesid, + __u64 lease_key_low, + __u64 lease_key_high), + TP_ARGS(lease_state, flags, epoch, tid, sesid, lease_key_low, lease_key_high), + TP_STRUCT__entry( + __field(__u32, lease_state) + __field(__u32, flags) + __field(__u16, epoch) + __field(__u32, tid) + __field(__u64, sesid) + __field(__u64, lease_key_low) + __field(__u64, lease_key_high) + ), + TP_fast_assign( + __entry->lease_state = lease_state; + __entry->flags = flags; + __entry->epoch = epoch; + __entry->tid = tid; + __entry->sesid = sesid; + __entry->lease_key_low = lease_key_low; + __entry->lease_key_high = lease_key_high; + ), + TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x flags=0x%x epoch=%u", + __entry->sesid, __entry->tid, __entry->lease_key_high, + __entry->lease_key_low, __entry->lease_state, __entry->flags, __entry->epoch) +) + +#define DEFINE_SMB3_LEASE_ENTER_EVENT(name) \ +DEFINE_EVENT(smb3_lease_enter_class, smb3_##name, \ + TP_PROTO(__u32 lease_state, \ + __u32 flags, \ + __u16 epoch, \ + __u32 tid, \ + __u64 sesid, \ + __u64 lease_key_low, \ + __u64 lease_key_high), \ + TP_ARGS(lease_state, flags, epoch, tid, sesid, lease_key_low, lease_key_high)) + +DEFINE_SMB3_LEASE_ENTER_EVENT(lease_break_enter); +/* Lease not found: reuse lease_enter payload (includes epoch and flags) */ +DEFINE_SMB3_LEASE_ENTER_EVENT(lease_not_found); DECLARE_EVENT_CLASS(smb3_lease_err_class, TP_PROTO(__u32 lease_state, @@ -1085,7 +1131,7 @@ DEFINE_EVENT(smb3_lease_err_class, smb3_##name, \ int rc), \ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc)) -DEFINE_SMB3_LEASE_ERR_EVENT(lease_err); +DEFINE_SMB3_LEASE_ERR_EVENT(lease_ack_err); DECLARE_EVENT_CLASS(smb3_connect_class, TP_PROTO(char *hostname, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 31205dbaf1..cb84bba3b8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -73,52 +73,47 @@ xfs_dir_fsync( return xfs_log_force_inode(ip); } -static xfs_csn_t -xfs_fsync_seq( - struct xfs_inode *ip, - bool datasync) -{ - if (!xfs_ipincount(ip)) - return 0; - if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) - return 0; - return ip->i_itemp->ili_commit_seq; -} - /* - * All metadata updates are logged, which means that we just have to flush the - * log up to the latest LSN that touched the inode. + * All metadata updates are logged, which means that we just have to push the + * journal to the required sequence number than holds the updates. We track + * datasync commits separately to full sync commits, and hence only need to + * select the correct sequence number for the log force here. * - * If we have concurrent fsync/fdatasync() calls, we need them to all block on - * the log force before we clear the ili_fsync_fields field. This ensures that - * we don't get a racing sync operation that does not wait for the metadata to - * hit the journal before returning. If we race with clearing ili_fsync_fields, - * then all that will happen is the log force will do nothing as the lsn will - * already be on disk. We can't race with setting ili_fsync_fields because that - * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock - * shared until after the ili_fsync_fields is cleared. + * We don't have to serialise against concurrent modifications, as we do not + * have to wait for modifications that have not yet completed. We define a + * transaction commit as completing when the commit sequence number is updated, + * hence if the sequence number has not updated, the sync operation has been + * run before the commit completed and we don't have to wait for it. + * + * If we have concurrent fsync/fdatasync() calls, the sequence numbers remain + * set on the log item until - at least - the journal flush completes. In + * reality, they are only cleared when the inode is fully unpinned (i.e. + * persistent in the journal and not dirty in the CIL), and so we rely on + * xfs_log_force_seq() either skipping sequences that have been persisted or + * waiting on sequences that are still in flight to correctly order concurrent + * sync operations. */ -static int +static int xfs_fsync_flush_log( struct xfs_inode *ip, bool datasync, int *log_flushed) { - int error = 0; - xfs_csn_t seq; + struct xfs_inode_log_item *iip = ip->i_itemp; + xfs_csn_t seq = 0; - xfs_ilock(ip, XFS_ILOCK_SHARED); - seq = xfs_fsync_seq(ip, datasync); - if (seq) { - error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, + spin_lock(&iip->ili_lock); + if (datasync) + seq = iip->ili_datasync_seq; + else + seq = iip->ili_commit_seq; + spin_unlock(&iip->ili_lock); + + if (!seq) + return 0; + + return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, log_flushed); - - spin_lock(&ip->i_itemp->ili_lock); - ip->i_itemp->ili_fsync_fields = 0; - spin_unlock(&ip->i_itemp->ili_lock); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; } STATIC int @@ -156,12 +151,10 @@ xfs_file_fsync( error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); /* - * Any inode that has dirty modifications in the log is pinned. The - * racy check here for a pinned inode will not catch modifications - * that happen concurrently to the fsync call, but fsync semantics - * only require to sync previously completed I/O. + * If the inode has a inode log item attached, it may need the journal + * flushed to persist any changes the log item might be tracking. */ - if (xfs_ipincount(ip)) { + if (ip->i_itemp) { err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed); if (err2 && !error) error = err2; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 19dcb569a3..b84684577b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1642,7 +1642,6 @@ retry: spin_lock(&iip->ili_lock); iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; - iip->ili_fsync_fields = 0; spin_unlock(&iip->ili_lock); ASSERT(iip->ili_last_fields); @@ -1808,12 +1807,20 @@ static void xfs_iunpin( struct xfs_inode *ip) { - xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED); + struct xfs_inode_log_item *iip = ip->i_itemp; + xfs_csn_t seq = 0; trace_xfs_inode_unpin_nowait(ip, _RET_IP_); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED); + + spin_lock(&iip->ili_lock); + seq = iip->ili_commit_seq; + spin_unlock(&iip->ili_lock); + if (!seq) + return; /* Give the log a push to start the unpinning I/O */ - xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); + xfs_log_force_seq(ip->i_mount, seq, 0, NULL); } @@ -2472,7 +2479,6 @@ flush_out: spin_lock(&iip->ili_lock); iip->ili_last_fields = iip->ili_fields; iip->ili_fields = 0; - iip->ili_fsync_fields = 0; set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags); spin_unlock(&iip->ili_lock); @@ -2631,12 +2637,15 @@ int xfs_log_force_inode( struct xfs_inode *ip) { + struct xfs_inode_log_item *iip = ip->i_itemp; xfs_csn_t seq = 0; - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) - seq = ip->i_itemp->ili_commit_seq; - xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (!iip) + return 0; + + spin_lock(&iip->ili_lock); + seq = iip->ili_commit_seq; + spin_unlock(&iip->ili_lock); if (!seq) return 0; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index ca407cc591..40e3bc753e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -131,31 +131,28 @@ xfs_inode_item_precommit( } /* - * Inode verifiers do not check that the extent size hint is an integer - * multiple of the rt extent size on a directory with both rtinherit - * and extszinherit flags set. If we're logging a directory that is - * misconfigured in this way, clear the hint. + * Inode verifiers do not check that the extent size hints are an + * integer multiple of the rt extent size on a directory with + * rtinherit flags set. If we're logging a directory that is + * misconfigured in this way, clear the bad hints. */ - if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && - (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && - xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) { - ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | - XFS_DIFLAG_EXTSZINHERIT); - ip->i_extsize = 0; - flags |= XFS_ILOG_CORE; + if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) { + if ((ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && + xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) { + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + ip->i_extsize = 0; + flags |= XFS_ILOG_CORE; + } + if ((ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) && + xfs_extlen_to_rtxmod(ip->i_mount, ip->i_cowextsize) > 0) { + ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; + ip->i_cowextsize = 0; + flags |= XFS_ILOG_CORE; + } } - /* - * Record the specific change for fdatasync optimisation. This allows - * fdatasync to skip log forces for inodes that are only timestamp - * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it - * to XFS_ILOG_CORE so that the actual on-disk dirty tracking - * (ili_fields) correctly tracks that the version has changed. - */ spin_lock(&iip->ili_lock); - iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION); - if (flags & XFS_ILOG_IVERSION) - flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE); if (!iip->ili_item.li_buf) { struct xfs_buf *bp; @@ -190,6 +187,20 @@ xfs_inode_item_precommit( xfs_trans_brelse(tp, bp); } + /* + * Store the dirty flags back into the inode item as this state is used + * later on in xfs_inode_item_committing() to determine whether the + * transaction is relevant to fsync state or not. + */ + iip->ili_dirty_flags = flags; + + /* + * Convert the flags on-disk fields that have been modified in the + * transaction so that ili_fields tracks the changes correctly. + */ + if (flags & XFS_ILOG_IVERSION) + flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE); + /* * Always OR in the bits from the ili_last_fields field. This is to * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines @@ -200,12 +211,6 @@ xfs_inode_item_precommit( spin_unlock(&iip->ili_lock); xfs_inode_item_precommit_check(ip); - - /* - * We are done with the log item transaction dirty state, so clear it so - * that it doesn't pollute future transactions. - */ - iip->ili_dirty_flags = 0; return 0; } @@ -707,13 +712,24 @@ xfs_inode_item_unpin( struct xfs_log_item *lip, int remove) { - struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; trace_xfs_inode_unpin(ip, _RET_IP_); ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE)); ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_test(&ip->i_pincount)) + + /* + * If this is the last unpin, then the inode no longer needs a journal + * flush to persist it. Hence we can clear the commit sequence numbers + * as a fsync/fdatasync operation on the inode at this point is a no-op. + */ + if (atomic_dec_and_lock(&ip->i_pincount, &iip->ili_lock)) { + iip->ili_commit_seq = 0; + iip->ili_datasync_seq = 0; + spin_unlock(&iip->ili_lock); wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); + } } STATIC uint @@ -833,12 +849,45 @@ xfs_inode_item_committed( return lsn; } +/* + * The modification is now complete, so before we unlock the inode we need to + * update the commit sequence numbers for data integrity journal flushes. We + * always record the commit sequence number (ili_commit_seq) so that anything + * that needs a full journal sync will capture all of this modification. + * + * We then + * check if the changes will impact a datasync (O_DSYNC) journal flush. If the + * changes will require a datasync flush, then we also record the sequence in + * ili_datasync_seq. + * + * These commit sequence numbers will get cleared atomically with the inode being + * unpinned (i.e. pin count goes to zero), and so it will only be set when the + * inode is dirty in the journal. This removes the need for checking if the + * inode is pinned to determine if a journal flush is necessary, and hence + * removes the need for holding the ILOCK_SHARED in xfs_file_fsync() to + * serialise pin counts against commit sequence number updates. + * + */ STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, xfs_csn_t seq) { - INODE_ITEM(lip)->ili_commit_seq = seq; + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + + spin_lock(&iip->ili_lock); + iip->ili_commit_seq = seq; + if (iip->ili_dirty_flags & ~(XFS_ILOG_IVERSION | XFS_ILOG_TIMESTAMP)) + iip->ili_datasync_seq = seq; + spin_unlock(&iip->ili_lock); + + /* + * Clear the per-transaction dirty flags now that we have finished + * recording the transaction's inode modifications in the CIL and are + * about to release and (maybe) unlock the inode. + */ + iip->ili_dirty_flags = 0; + return xfs_inode_item_release(lip); } @@ -1042,7 +1091,6 @@ xfs_iflush_abort_clean( { iip->ili_last_fields = 0; iip->ili_fields = 0; - iip->ili_fsync_fields = 0; iip->ili_flush_lsn = 0; iip->ili_item.li_buf = NULL; list_del_init(&iip->ili_item.li_bio_list); diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 377e060078..7a6d179aad 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -32,9 +32,17 @@ struct xfs_inode_log_item { spinlock_t ili_lock; /* flush state lock */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ - unsigned int ili_fsync_fields; /* logged since last fsync */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ + + /* + * We record the sequence number for every inode modification, as + * well as those that only require fdatasync operations for data + * integrity. This allows optimisation of the O_DSYNC/fdatasync path + * without needing to track what modifications the journal is currently + * carrying for the inode. These are protected by the above ili_lock. + */ xfs_csn_t ili_commit_seq; /* last transaction commit */ + xfs_csn_t ili_datasync_seq; /* for datasync optimisation */ }; static inline int xfs_inode_clean(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index c128f53abd..63b99eaffa 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -133,9 +133,18 @@ xfs_bmbt_to_iomap( iomap->bdev = target->bt_bdev; iomap->flags = iomap_flags; - if (xfs_ipincount(ip) && - (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) - iomap->flags |= IOMAP_F_DIRTY; + /* + * If the inode is dirty for datasync purposes, let iomap know so it + * doesn't elide the IO completion journal flushes on O_DSYNC IO. + */ + if (ip->i_itemp) { + struct xfs_inode_log_item *iip = ip->i_itemp; + + spin_lock(&iip->ili_lock); + if (iip->ili_datasync_seq) + iomap->flags |= IOMAP_F_DIRTY; + spin_unlock(&iip->ili_lock); + } iomap->validity_cookie = sequence_cookie; iomap->page_ops = &xfs_iomap_page_ops; diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 74ff688568..f475757daa 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -96,30 +96,28 @@ static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) /* can be called with or without local BH being disabled */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, - const struct inetpeer_addr *daddr, - int create); + const struct inetpeer_addr *daddr); static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, - int vif, int create) + int vif) { struct inetpeer_addr daddr; daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, - const struct in6_addr *v6daddr, - int create) + const struct in6_addr *v6daddr) { struct inetpeer_addr daddr; daddr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4c6441536d..285e981730 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -785,42 +785,53 @@ void ceph_reset_client_addr(struct ceph_client *client) } EXPORT_SYMBOL(ceph_reset_client_addr); -/* - * true if we have the mon map (and have thus joined the cluster) - */ -static bool have_mon_and_osd_map(struct ceph_client *client) -{ - return client->monc.monmap && client->monc.monmap->epoch && - client->osdc.osdmap && client->osdc.osdmap->epoch; -} - /* * mount: join the ceph cluster, and open root directory. */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - unsigned long timeout = client->options->mount_timeout; - long err; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + long timeout = ceph_timeout_jiffies(client->options->mount_timeout); + bool have_monmap, have_osdmap; + int err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) return err; - while (!have_mon_and_osd_map(client)) { - if (timeout && time_after_eq(jiffies, started + timeout)) - return -ETIMEDOUT; + add_wait_queue(&client->auth_wq, &wait); + for (;;) { + mutex_lock(&client->monc.mutex); + err = client->auth_err; + have_monmap = client->monc.monmap && client->monc.monmap->epoch; + mutex_unlock(&client->monc.mutex); + + down_read(&client->osdc.lock); + have_osdmap = client->osdc.osdmap && client->osdc.osdmap->epoch; + up_read(&client->osdc.lock); + + if (err || (have_monmap && have_osdmap)) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (!timeout) { + err = -ETIMEDOUT; + break; + } /* wait */ dout("mount waiting for mon_map\n"); - err = wait_event_interruptible_timeout(client->auth_wq, - have_mon_and_osd_map(client) || (client->auth_err < 0), - ceph_timeout_jiffies(timeout)); - if (err < 0) - return err; - if (client->auth_err < 0) - return client->auth_err; + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); } + remove_wait_queue(&client->auth_wq, &wait); + + if (err) + return err; pr_info("client%llu fsid %pU\n", ceph_client_gid(client), &client->fsid); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 2110439f8a..83c270bce6 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -36,8 +36,9 @@ static int monmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; + mutex_lock(&client->monc.mutex); if (client->monc.monmap == NULL) - return 0; + goto out_unlock; seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); for (i = 0; i < client->monc.monmap->num_mon; i++) { @@ -48,6 +49,9 @@ static int monmap_show(struct seq_file *s, void *p) ENTITY_NAME(inst->name), ceph_pr_addr(&inst->addr)); } + +out_unlock: + mutex_unlock(&client->monc.mutex); return 0; } @@ -56,13 +60,14 @@ static int osdmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; struct ceph_osd_client *osdc = &client->osdc; - struct ceph_osdmap *map = osdc->osdmap; + struct ceph_osdmap *map; struct rb_node *n; - if (map == NULL) - return 0; - down_read(&osdc->lock); + map = osdc->osdmap; + if (map == NULL) + goto out_unlock; + seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch, osdc->epoch_barrier, map->flags); @@ -131,6 +136,7 @@ static int osdmap_show(struct seq_file *s, void *p) seq_printf(s, "]\n"); } +out_unlock: up_read(&osdc->lock); return 0; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4040f9c20a..406b8d6880 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -316,7 +316,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; - int vif; if (!apply_ratelimit) return true; @@ -325,12 +324,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) goto out; - vif = l3mdev_master_ifindex(dst->dev); - peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); + rcu_read_lock(); + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, + l3mdev_master_ifindex_rcu(dst->dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e9fed83e9b..23896b6b84 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -98,6 +98,7 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, { struct rb_node **pp, *parent, *next; struct inet_peer *p; + u32 now; pp = &base->rb_root.rb_node; parent = NULL; @@ -111,8 +112,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, p = rb_entry(parent, struct inet_peer, rb_node); cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - if (!refcount_inc_not_zero(&p->refcnt)) - break; + now = jiffies; + if (READ_ONCE(p->dtime) != now) + WRITE_ONCE(p->dtime, now); return p; } if (gc_stack) { @@ -158,9 +160,6 @@ static void inet_peer_gc(struct inet_peer_base *base, for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; - /* The READ_ONCE() pairs with the WRITE_ONCE() - * in inet_putpeer() - */ delta = (__u32)jiffies - READ_ONCE(p->dtime); if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) @@ -176,31 +175,23 @@ static void inet_peer_gc(struct inet_peer_base *base, } } +/* Must be called under RCU : No refcount change is done here. */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, - const struct inetpeer_addr *daddr, - int create) + const struct inetpeer_addr *daddr) { struct inet_peer *p, *gc_stack[PEER_MAX_GC]; struct rb_node **pp, *parent; unsigned int gc_cnt, seq; - int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ - rcu_read_lock(); seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - invalidated = read_seqretry(&base->lock, seq); - rcu_read_unlock(); if (p) return p; - /* If no writer did a change during our lookup, we can return early. */ - if (!create && !invalidated) - return NULL; - /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ @@ -209,12 +200,12 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, gc_cnt = 0; p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); - if (!p && create) { + if (!p) { p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); if (p) { p->daddr = *daddr; p->dtime = (__u32)jiffies; - refcount_set(&p->refcnt, 2); + refcount_set(&p->refcnt, 1); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; @@ -239,15 +230,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { - /* The WRITE_ONCE() pairs with itself (we run lockless) - * and the READ_ONCE() in inet_peer_gc() - */ - WRITE_ONCE(p->dtime, (__u32)jiffies); - if (refcount_dec_and_test(&p->refcnt)) call_rcu(&p->rcu, inetpeer_free_rcu); } -EXPORT_SYMBOL_GPL(inet_putpeer); /* * Check transmit rate limitation for given message. diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4d5b25b77..b1c7dce401 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); - struct net *net = q->fqdir->net; - const struct frag_v4_compare_key *key = a; + struct net *net = q->fqdir->net; + struct inet_peer *p = NULL; q->key.v4 = *key; qp->ecn = 0; - qp->peer = q->fqdir->max_dist ? - inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : - NULL; + if (q->fqdir->max_dist) { + rcu_read_lock(); + p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif); + if (p && !refcount_inc_not_zero(&p->refcnt)) + p = NULL; + rcu_read_unlock(); + } + qp->peer = p; } static void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c2b69f97a3..dc4db2376a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -895,11 +895,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) } log_martians = IN_DEV_LOG_MARTIANS(in_dev); vif = l3mdev_master_ifindex_rcu(rt->dst.dev); - rcu_read_unlock(); net = dev_net(rt->dst.dev); - peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif); if (!peer) { + rcu_read_unlock(); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); return; @@ -918,7 +918,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; - goto out_put_peer; + goto out_unlock; } /* Check for load limit; set rate_last to the latest sent @@ -933,16 +933,14 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (log_martians && + if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); -#endif } -out_put_peer: - inet_putpeer(peer); +out_unlock: + rcu_read_unlock(); } static int ip_error(struct sk_buff *skb) @@ -1002,9 +1000,9 @@ static int ip_error(struct sk_buff *skb) break; } + rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - l3mdev_master_ifindex(skb->dev), 1); - + l3mdev_master_ifindex_rcu(skb->dev)); send = true; if (peer) { now = jiffies; @@ -1016,8 +1014,9 @@ static int ip_error(struct sk_buff *skb) peer->rate_tokens -= ip_rt_error_cost; else send = false; - inet_putpeer(peer); } + rcu_read_unlock(); + if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7bbe668654..d553b15a0c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -230,10 +230,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); + rcu_read_lock(); + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); } if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 737d43441e..b457d59774 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -612,15 +612,15 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); + rcu_read_lock(); + peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); } else { int addrtype = ipv6_addr_type(&hdr->saddr); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4a2bf0fb42..7632ccaf49 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1656,10 +1656,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); + + rcu_read_lock(); + peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); + if (!ret) goto release; diff --git a/redhat/kernel.changelog-9.7 b/redhat/kernel.changelog-9.7 index 16a88c9b4b..39a966cf09 100644 --- a/redhat/kernel.changelog-9.7 +++ b/redhat/kernel.changelog-9.7 @@ -1,3 +1,65 @@ +* Sat Jan 10 2026 CKI KWF Bot [5.14.0-611.24.1.el9_7] +- gitlab-ci: use rhel9.7 builder image (Michael Hofmann) +- smb: client: let recv_done verify data_offset, data_length and remaining_data_length (Paulo Alcantara) [RHEL-131388] {CVE-2025-39933} +- tty: n_tty: Fix buffer offsets when lookahead is used (Radostin Stoyanov) [RHEL-130039] +Resolves: RHEL-130039, RHEL-131388 + +* Thu Jan 08 2026 CKI KWF Bot [5.14.0-611.23.1.el9_7] +- book3s64/hash: Remove kfence support temporarily (Mamatha Inamdar) [RHEL-135574] +- xfs: rework datasync tracking and execution (CKI Backport Bot) [RHEL-135714] +- xfs: rearrange code in xfs_inode_item_precommit (CKI Backport Bot) [RHEL-135714] +- inetpeer: do not get a refcount in inet_getpeer() (Guillaume Nault) [RHEL-116121] +- inetpeer: update inetpeer timestamp in inet_getpeer() (Guillaume Nault) [RHEL-116121] +- inetpeer: remove create argument of inet_getpeer() (Guillaume Nault) [RHEL-116121] +- inetpeer: remove create argument of inet_getpeer_v[46]() (Guillaume Nault) [RHEL-116121] +- ipv4/route: avoid unused-but-set-variable warning (Guillaume Nault) [RHEL-116121] +- arm64: errata: Expand speculative SSBS workaround for Cortex-A720AE (Waiman Long) [RHEL-130734] +- arm64: cputype: Add Cortex-A720AE definitions (Waiman Long) [RHEL-130734] +- arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays (Waiman Long) [RHEL-130734] +- arm64: Add support for HIP09 Spectre-BHB mitigation (Waiman Long) [RHEL-130734] +- arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists (Waiman Long) [RHEL-130734] +- arm64: cputype: Add MIDR_CORTEX_A76AE (Waiman Long) [RHEL-130734] +- arm64: errata: Add KRYO 2XX/3XX/4XX silver cores to Spectre BHB safe list (Waiman Long) [RHEL-130734] +- arm64: Subscribe Microsoft Azure Cobalt 100 to erratum 3194386 (Waiman Long) [RHEL-130734] +- arm64: errata: Expand speculative SSBS workaround once more (Waiman Long) [RHEL-130734] +- arm64: errata: Expand speculative SSBS workaround (again) (Waiman Long) [RHEL-130734] +- tools headers arm64: Sync arm64's cputype.h with the kernel sources (Waiman Long) [RHEL-130734] +- arm64: cputype: Add Neoverse-N3 definitions (Waiman Long) [RHEL-130734] +- arm64: cputype: Add Cortex-A725 definitions (Waiman Long) [RHEL-130734] +- arm64: cputype: Add Cortex-X1C definitions (Waiman Long) [RHEL-130734] +- drm/xe: Make dma-fences compliant with the safe access rules (Mika Penttilä) [RHEL-122263] {CVE-2025-38703} +Resolves: RHEL-116121, RHEL-122263, RHEL-130734, RHEL-135574, RHEL-135714 + +* Wed Jan 07 2026 CKI KWF Bot [5.14.0-611.22.1.el9_7] +- libceph: fix potential use-after-free in have_mon_and_osd_map() (CKI Backport Bot) [RHEL-137400] {CVE-2025-68285} +Resolves: RHEL-137400 + +* Thu Jan 01 2026 CKI KWF Bot [5.14.0-611.21.1.el9_7] +- usb: dwc3: Fix race condition between concurrent dwc3_remove_requests() call paths (CKI Backport Bot) [RHEL-137147] {CVE-2025-68287} +- redhat: conflict with unsupported shim on x86/aarch64 (9.7.z) (Li Tian) [RHEL-135775] +- drm/vmwgfx: Validate command header size against SVGA_CMD_MAX_DATASIZE (CKI Backport Bot) [RHEL-134428] {CVE-2025-40277} +- perf tools: Don't set attr.exclude_guest by default (Michael Petlan) [RHEL-131726] +- smb: client: fix refcount leak in smb2_set_path_attr (Paulo Alcantara) [RHEL-127422] +- smb: client: fix potential UAF in smb2_close_cached_fid() (Paulo Alcantara) [RHEL-127422] +- smb: client: fix potential cfid UAF in smb2_query_info_compound (Paulo Alcantara) [RHEL-127422] +- smb: client: Fix refcount leak for cifs_sb_tlink (Paulo Alcantara) [RHEL-127422] +- cifs: parse_dfs_referrals: prevent oob on malformed input (Paulo Alcantara) [RHEL-127422] +- smb: client: remove cfids_invalidation_worker (Paulo Alcantara) [RHEL-127422] +- smb client: fix bug with newly created file in cached dir (Paulo Alcantara) [RHEL-127422] +- smb: client: short-circuit negative lookups when parent dir is fully cached (Paulo Alcantara) [RHEL-127422] +- smb: client: short-circuit in open_cached_dir_by_dentry() if !dentry (Paulo Alcantara) [RHEL-127422] +- smb: client: remove pointless cfid->has_lease check (Paulo Alcantara) [RHEL-127422] +- smb: client: remove unused fid_lock (Paulo Alcantara) [RHEL-127422] +- smb: client: update cfid->last_access_time in open_cached_dir_by_dentry() (Paulo Alcantara) [RHEL-127422] +- smb: client: ensure open_cached_dir_by_dentry() only returns valid cfid (Paulo Alcantara) [RHEL-127422] +- smb: client: account smb directory cache usage and per-tcon totals (Paulo Alcantara) [RHEL-127422] +- smb: client: add drop_dir_cache module parameter to invalidate cached dirents (Paulo Alcantara) [RHEL-127422] +- smb: client: show lease state as R/H/W (or NONE) in open_files (Paulo Alcantara) [RHEL-127422] +- smb: client: show negotiated cipher in DebugData (Paulo Alcantara) [RHEL-127422] +- smb: client: add new tracepoint to trace lease break notification (Paulo Alcantara) [RHEL-127422] +- smb: client: Fix NULL pointer dereference in cifs_debug_dirs_proc_show() (Paulo Alcantara) [RHEL-127422] +Resolves: RHEL-127422, RHEL-131726, RHEL-134428, RHEL-135775, RHEL-137147 + * Sat Dec 20 2025 CKI KWF Bot [5.14.0-611.20.1.el9_7] - HID: multitouch: fix slab out-of-bounds access in mt_report_fixup() (CKI Backport Bot) [RHEL-124607] {CVE-2025-39806} - sctp: avoid NULL dereference when chunk data buffer is missing (CKI Backport Bot) [RHEL-134001] {CVE-2025-40240} diff --git a/redhat/kernel.spec.template b/redhat/kernel.spec.template index bfe2c6810a..968760e155 100644 --- a/redhat/kernel.spec.template +++ b/redhat/kernel.spec.template @@ -971,6 +971,9 @@ Recommends: linux-firmware\ Requires(preun): systemd >= 200\ Conflicts: xfsprogs < 4.3.0-1\ Conflicts: xorg-x11-drv-vmmouse < 13.0.99\ +%ifarch x86_64 aarch64\ +Conflicts: shim < 15.8-1\ +%endif\ %{expand:%%{?kernel%{?1:_%{1}}_conflicts:Conflicts: %%{kernel%{?1:_%{1}}_conflicts}}}\ %{expand:%%{?kernel%{?1:_%{1}}_obsoletes:Obsoletes: %%{kernel%{?1:_%{1}}_obsoletes}}}\ %{expand:%%{?kernel%{?1:_%{1}}_provides:Provides: %%{kernel%{?1:_%{1}}_provides}}}\ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5a7dfeb8e8..488f8e7513 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -176,6 +177,7 @@ #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 55ea17c5ff..099ce3ebf6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -2147,6 +2147,7 @@ int cmd_kvm(int argc, const char **argv) "buildid-list", "stat", NULL }; const char *kvm_usage[] = { NULL, NULL }; + exclude_GH_default = true; perf_host = 0; perf_guest = 1; diff --git a/tools/perf/tests/attr/test-record-dummy-C0 b/tools/perf/tests/attr/test-record-dummy-C0 index 3050298bd6..91499405ff 100644 --- a/tools/perf/tests/attr/test-record-dummy-C0 +++ b/tools/perf/tests/attr/test-record-dummy-C0 @@ -37,7 +37,7 @@ precise_ip=0 mmap_data=0 sample_id_all=1 exclude_host=0 -exclude_guest=1 +exclude_guest=0 exclude_callchain_kernel=0 exclude_callchain_user=0 mmap2=1 diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9e3086d021..f4cd1b5d3c 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -932,7 +932,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -947,7 +947,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); if (evsel__has_leader(evsel, leader)) @@ -1072,7 +1072,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1222,7 +1222,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1437,7 +1437,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1453,7 +1453,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1468,7 +1468,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1497,7 +1497,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1513,7 +1513,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 31705b6a41..7b8488e0e4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1772,7 +1772,7 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) + if (!exclude_GH && !perf_guest && exclude_GH_default) eG = 1; eu = 0; } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9d55a13787..280c86d61d 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9966c21aaf..4920e102ff 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -21,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest;