From 9c95bdb7332bb265b4aa9719d95398f6334aeb8f Mon Sep 17 00:00:00 2001 From: almalinux-bot-kernel Date: Sat, 15 Nov 2025 04:06:12 +0000 Subject: [PATCH] Import of kernel-6.12.0-55.43.1.el10_0 --- ...5.42.1.el10 => COPYING-6.12.0-55.43.1.el10 | 0 .../ABI/testing/sysfs-devices-system-cpu | 1 + Documentation/admin-guide/hw-vuln/index.rst | 2 + .../hw-vuln/indirect-target-selection.rst | 168 +++ .../hw-vuln/reg-file-data-sampling.rst | 8 - Documentation/admin-guide/hw-vuln/rsb.rst | 268 ++++ Documentation/admin-guide/hw-vuln/srso.rst | 13 + .../admin-guide/kernel-parameters.txt | 20 + Documentation/admin-guide/sysctl/kernel.rst | 11 + Documentation/core-api/packing.rst | 189 ++- .../device_drivers/ethernet/intel/ice.rst | 31 + MAINTAINERS | 2 + Makefile | 4 + Makefile.rhelver | 2 +- arch/Kconfig | 8 + arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + .../include/asm/{patch.h => text-patching.h} | 0 arch/arm/kernel/ftrace.c | 2 +- arch/arm/kernel/jump_label.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm/kernel/patch.c | 2 +- arch/arm/probes/kprobes/core.c | 2 +- arch/arm/probes/kprobes/opt-arm.c | 2 +- arch/arm64/include/asm/cputype.h | 2 + arch/arm64/include/asm/insn.h | 1 + arch/arm64/include/asm/set_memory.h | 1 + arch/arm64/include/asm/spectre.h | 4 +- .../asm/{patching.h => text-patching.h} | 0 arch/arm64/kernel/ftrace.c | 2 +- arch/arm64/kernel/jump_label.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/arm64/kernel/patching.c | 2 +- arch/arm64/kernel/probes/kprobes.c | 2 +- arch/arm64/kernel/proton-pack.c | 203 +-- arch/arm64/kernel/traps.c | 2 +- arch/arm64/lib/insn.c | 76 +- arch/arm64/mm/pageattr.c | 10 + arch/arm64/net/bpf_jit_comp.c | 59 +- arch/csky/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/loongarch/include/asm/Kbuild | 1 + arch/loongarch/include/asm/set_memory.h | 1 + arch/loongarch/mm/pageattr.c | 19 + arch/m68k/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/nios2/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + .../include/asm/{patch.h => text-patching.h} | 0 arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/jump_label.c | 2 +- arch/parisc/kernel/kgdb.c | 2 +- arch/parisc/kernel/kprobes.c | 2 +- arch/parisc/kernel/patch.c | 2 +- arch/powerpc/include/asm/kprobes.h | 2 +- arch/powerpc/include/asm/mmzone.h | 1 + .../asm/{code-patching.h => text-patching.h} | 0 arch/powerpc/kernel/crash_dump.c | 2 +- arch/powerpc/kernel/epapr_paravirt.c | 2 +- arch/powerpc/kernel/jump_label.c | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/module_32.c | 2 +- arch/powerpc/kernel/module_64.c | 2 +- arch/powerpc/kernel/optprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/security.c | 2 +- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/kernel/static_call.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 2 +- arch/powerpc/kernel/trace/ftrace_64_pg.c | 2 +- arch/powerpc/lib/code-patching.c | 2 +- arch/powerpc/lib/feature-fixups.c | 2 +- arch/powerpc/lib/test-code-patching.c | 2 +- arch/powerpc/lib/test_emulate_step.c | 2 +- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- arch/powerpc/mm/book3s64/slb.c | 2 +- arch/powerpc/mm/kasan/init_32.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/mm/nohash/44x.c | 2 +- arch/powerpc/mm/nohash/book3e_pgtable.c | 2 +- arch/powerpc/mm/nohash/tlb.c | 2 +- arch/powerpc/mm/nohash/tlb_64e.c | 2 +- arch/powerpc/mm/numa.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/powerpc/perf/8xx-pmu.c | 2 +- arch/powerpc/perf/core-book3s.c | 2 +- arch/powerpc/platforms/85xx/smp.c | 2 +- arch/powerpc/platforms/86xx/mpc86xx_smp.c | 2 +- arch/powerpc/platforms/cell/smp.c | 2 +- arch/powerpc/platforms/powermac/smp.c | 2 +- arch/powerpc/platforms/powernv/idle.c | 2 +- arch/powerpc/platforms/powernv/smp.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 29 +- arch/powerpc/platforms/pseries/smp.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- arch/riscv/errata/andes/errata.c | 2 +- arch/riscv/errata/sifive/errata.c | 2 +- arch/riscv/errata/thead/errata.c | 2 +- arch/riscv/include/asm/set_memory.h | 1 + .../include/asm/{patch.h => text-patching.h} | 0 arch/riscv/include/asm/uprobes.h | 2 +- arch/riscv/kernel/alternative.c | 2 +- arch/riscv/kernel/cpufeature.c | 3 +- arch/riscv/kernel/ftrace.c | 2 +- arch/riscv/kernel/jump_label.c | 2 +- arch/riscv/kernel/patch.c | 2 +- arch/riscv/kernel/probes/kprobes.c | 2 +- arch/riscv/mm/pageattr.c | 15 + arch/riscv/net/bpf_jit_comp64.c | 2 +- arch/riscv/net/bpf_jit_core.c | 2 +- arch/s390/include/asm/set_memory.h | 1 + arch/s390/mm/pageattr.c | 11 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/um/kernel/um_arch.c | 5 + arch/x86/Kconfig | 15 +- arch/x86/entry/entry.S | 9 +- arch/x86/entry/entry_64.S | 20 +- arch/x86/include/asm/alternative.h | 14 + arch/x86/include/asm/cpu_device_id.h | 132 +- arch/x86/include/asm/cpufeatures.h | 11 +- arch/x86/include/asm/intel-family.h | 6 + arch/x86/include/asm/msr-index.h | 9 + arch/x86/include/asm/nospec-branch.h | 24 +- arch/x86/include/asm/pgtable_types.h | 2 + arch/x86/include/asm/processor.h | 27 + arch/x86/include/asm/set_memory.h | 1 + arch/x86/include/asm/text-patching.h | 1 + arch/x86/include/asm/topology.h | 9 + arch/x86/kernel/alternative.c | 72 +- arch/x86/kernel/apic/apic.c | 18 +- arch/x86/kernel/cpu/bugs.c | 439 ++++-- arch/x86/kernel/cpu/common.c | 125 +- arch/x86/kernel/cpu/debugfs.c | 1 + arch/x86/kernel/cpu/match.c | 43 +- arch/x86/kernel/cpu/scattered.c | 1 + arch/x86/kernel/cpu/topology_amd.c | 3 + arch/x86/kernel/cpu/topology_common.c | 34 + arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 4 +- arch/x86/kernel/vmlinux.lds.S | 10 + arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/svm/svm.c | 68 +- arch/x86/kvm/svm/svm.h | 2 + arch/x86/kvm/vmx/vmx.c | 3 +- arch/x86/kvm/x86.c | 4 +- arch/x86/lib/msr.c | 2 + arch/x86/lib/retpoline.S | 39 + arch/x86/mm/init.c | 38 +- arch/x86/mm/pat/cpa-test.c | 2 +- arch/x86/mm/pat/set_memory.c | 231 ++- arch/x86/mm/tlb.c | 9 +- arch/x86/net/bpf_jit_comp.c | 7 +- arch/xtensa/include/asm/Kbuild | 1 + .../kernel-6.12.0-aarch64-64k-debug.config | 3 +- configs/kernel-6.12.0-aarch64-64k.config | 3 +- ...nel-6.12.0-aarch64-automotive-debug.config | 3 +- .../kernel-6.12.0-aarch64-automotive.config | 3 +- configs/kernel-6.12.0-aarch64-debug.config | 3 +- .../kernel-6.12.0-aarch64-rt-64k-debug.config | 3 +- configs/kernel-6.12.0-aarch64-rt-64k.config | 3 +- configs/kernel-6.12.0-aarch64-rt-debug.config | 3 +- configs/kernel-6.12.0-aarch64-rt.config | 3 +- configs/kernel-6.12.0-aarch64.config | 3 +- configs/kernel-6.12.0-ppc64le-debug.config | 3 +- configs/kernel-6.12.0-ppc64le.config | 6 +- configs/kernel-6.12.0-s390x-debug.config | 3 +- configs/kernel-6.12.0-s390x-zfcpdump.config | 2 +- configs/kernel-6.12.0-s390x.config | 3 +- ...rnel-6.12.0-x86_64-automotive-debug.config | 5 +- .../kernel-6.12.0-x86_64-automotive.config | 5 +- configs/kernel-6.12.0-x86_64-debug.config | 5 +- configs/kernel-6.12.0-x86_64-rt-debug.config | 5 +- configs/kernel-6.12.0-x86_64-rt.config | 5 +- configs/kernel-6.12.0-x86_64.config | 5 +- configs/kernel-6.12.0-x86_64_v2-debug.config | 5 +- .../kernel-6.12.0-x86_64_v2-rt-debug.config | 5 +- configs/kernel-6.12.0-x86_64_v2-rt.config | 5 +- configs/kernel-6.12.0-x86_64_v2.config | 5 +- drivers/base/cpu.c | 3 + drivers/edac/i10nm_base.c | 21 +- drivers/edac/skx_base.c | 2 +- .../net/dsa/sja1105/sja1105_static_config.c | 8 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 5 +- drivers/net/ethernet/intel/Kconfig | 3 +- drivers/net/ethernet/intel/ice/Makefile | 5 +- .../net/ethernet/intel/ice/devlink/devlink.c | 5 +- .../net/ethernet/intel/ice/devlink/health.c | 550 +++++++ .../net/ethernet/intel/ice/devlink/health.h | 71 + .../ice/devlink/{devlink_port.c => port.c} | 2 +- .../ice/devlink/{devlink_port.h => port.h} | 0 drivers/net/ethernet/intel/ice/ice.h | 76 +- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 119 +- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- drivers/net/ethernet/intel/ice/ice_base.c | 45 +- drivers/net/ethernet/intel/ice/ice_cgu_regs.h | 181 --- drivers/net/ethernet/intel/ice/ice_common.c | 691 +++++---- drivers/net/ethernet/intel/ice/ice_common.h | 58 +- drivers/net/ethernet/intel/ice/ice_ddp.c | 312 ++-- drivers/net/ethernet/intel/ice/ice_ddp.h | 5 +- drivers/net/ethernet/intel/ice/ice_dpll.c | 49 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 - drivers/net/ethernet/intel/ice/ice_eswitch.h | 7 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 189 ++- drivers/net/ethernet/intel/ice/ice_ethtool.h | 38 +- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 21 +- .../net/ethernet/intel/ice/ice_flex_pipe.h | 3 - drivers/net/ethernet/intel/ice/ice_gnss.c | 31 +- drivers/net/ethernet/intel/ice/ice_gnss.h | 4 +- .../net/ethernet/intel/ice/ice_hw_autogen.h | 20 + drivers/net/ethernet/intel/ice/ice_lag.c | 33 + .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 58 +- drivers/net/ethernet/intel/ice/ice_lib.c | 33 +- drivers/net/ethernet/intel/ice/ice_lib.h | 6 - drivers/net/ethernet/intel/ice/ice_main.c | 181 ++- drivers/net/ethernet/intel/ice/ice_parser.h | 6 +- .../net/ethernet/intel/ice/ice_parser_rt.c | 12 +- drivers/net/ethernet/intel/ice/ice_ptp.c | 697 +++++---- drivers/net/ethernet/intel/ice/ice_ptp.h | 23 +- .../net/ethernet/intel/ice/ice_ptp_consts.h | 270 +--- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 1288 ++++++----------- drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 149 +- drivers/net/ethernet/intel/ice/ice_repr.c | 2 +- drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 11 +- drivers/net/ethernet/intel/ice/ice_sched.c | 181 ++- drivers/net/ethernet/intel/ice/ice_sf_eth.c | 2 +- drivers/net/ethernet/intel/ice/ice_switch.c | 3 +- drivers/net/ethernet/intel/ice/ice_tspll.c | 626 ++++++++ drivers/net/ethernet/intel/ice/ice_tspll.h | 31 + drivers/net/ethernet/intel/ice/ice_txrx.c | 111 +- drivers/net/ethernet/intel/ice/ice_txrx.h | 7 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 26 + drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 1 - drivers/net/ethernet/intel/ice/ice_type.h | 38 +- drivers/net/ethernet/intel/ice/ice_vf_lib.h | 11 + drivers/net/ethernet/intel/ice/ice_virtchnl.c | 528 ++++++- drivers/net/ethernet/intel/ice/ice_virtchnl.h | 17 + .../intel/ice/ice_virtchnl_allowlist.c | 13 + .../ethernet/intel/ice/ice_virtchnl_fdir.c | 29 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +- drivers/net/ethernet/intel/ice/ice_xsk.h | 8 - drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_defines.h | 6 +- drivers/net/ethernet/intel/igc/igc_main.c | 1 + drivers/net/ethernet/intel/igc/igc_ptp.c | 115 +- fs/coredump.c | 15 +- include/asm-generic/codetag.lds.h | 19 + include/asm-generic/text-patching.h | 5 + include/config/PACKING | 0 include/config/PACKING_KUNIT_TEST | 0 include/config/auto.conf | 2 + include/generated/autoconf.h | 2 + include/generated/rustc_cfg | 4 + include/linux/alloc_tag.h | 13 +- include/linux/avf/virtchnl.h | 254 +++- include/linux/codetag.h | 37 +- include/linux/cpu.h | 2 + include/linux/execmem.h | 68 + include/linux/maple_tree.h | 14 + include/linux/mod_devicetable.h | 4 + include/linux/module.h | 1 + include/linux/packing.h | 457 +++++- include/linux/set_memory.h | 6 + include/linux/text-patching.h | 15 + include/linux/unroll.h | 44 + include/linux/vm_event_item.h | 2 + include/linux/vmalloc.h | 60 +- include/net/devlink.h | 13 + kernel/module/debug_kmemleak.c | 3 +- kernel/module/main.c | 107 +- kernel/module/strict_rwx.c | 6 +- lib/Kconfig | 12 + lib/Makefile | 1 + lib/alloc_tag.c | 284 +++- lib/codetag.c | 100 +- lib/packing.c | 563 +++++-- lib/packing_test.c | 474 ++++++ mm/execmem.c | 349 ++++- mm/internal.h | 1 + mm/vmalloc.c | 14 +- mm/vmstat.c | 2 + net/devlink/health.c | 67 + net/wireless/scan.c | 3 +- .../common/generic/x86/CONFIG_MITIGATION_ITS | 1 + redhat/configs/rhel/generic/CONFIG_PACKING | 2 +- .../rhel/generic/CONFIG_PACKING_KUNIT_TEST | 1 + redhat/kernel.changelog-10.0 | 211 +++ scripts/Makefile | 2 +- scripts/gen_packed_field_checks.c | 37 + scripts/module.lds.S | 5 +- tools/arch/x86/include/asm/cpufeatures.h | 1 - tools/arch/x86/include/asm/msr-index.h | 24 +- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/x86/bugs/Makefile | 3 + tools/testing/selftests/x86/bugs/common.py | 164 +++ .../x86/bugs/its_indirect_alignment.py | 150 ++ .../selftests/x86/bugs/its_permutations.py | 109 ++ .../selftests/x86/bugs/its_ret_alignment.py | 139 ++ tools/testing/selftests/x86/bugs/its_sysfs.py | 65 + 303 files changed, 10233 insertions(+), 3621 deletions(-) rename COPYING-6.12.0-55.42.1.el10 => COPYING-6.12.0-55.43.1.el10 (100%) create mode 100644 Documentation/admin-guide/hw-vuln/indirect-target-selection.rst create mode 100644 Documentation/admin-guide/hw-vuln/rsb.rst rename arch/arm/include/asm/{patch.h => text-patching.h} (100%) rename arch/arm64/include/asm/{patching.h => text-patching.h} (100%) rename arch/parisc/include/asm/{patch.h => text-patching.h} (100%) rename arch/powerpc/include/asm/{code-patching.h => text-patching.h} (100%) rename arch/riscv/include/asm/{patch.h => text-patching.h} (100%) create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.c create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.h rename drivers/net/ethernet/intel/ice/devlink/{devlink_port.c => port.c} (99%) rename drivers/net/ethernet/intel/ice/devlink/{devlink_port.h => port.h} (100%) delete mode 100644 drivers/net/ethernet/intel/ice/ice_cgu_regs.h create mode 100644 drivers/net/ethernet/intel/ice/ice_tspll.c create mode 100644 drivers/net/ethernet/intel/ice/ice_tspll.h create mode 100644 include/asm-generic/text-patching.h create mode 100644 include/config/PACKING create mode 100644 include/config/PACKING_KUNIT_TEST create mode 100644 include/linux/text-patching.h create mode 100644 lib/packing_test.c create mode 100644 redhat/configs/common/generic/x86/CONFIG_MITIGATION_ITS create mode 100644 redhat/configs/rhel/generic/CONFIG_PACKING_KUNIT_TEST create mode 100644 scripts/gen_packed_field_checks.c create mode 100644 tools/testing/selftests/x86/bugs/Makefile create mode 100755 tools/testing/selftests/x86/bugs/common.py create mode 100755 tools/testing/selftests/x86/bugs/its_indirect_alignment.py create mode 100755 tools/testing/selftests/x86/bugs/its_permutations.py create mode 100755 tools/testing/selftests/x86/bugs/its_ret_alignment.py create mode 100755 tools/testing/selftests/x86/bugs/its_sysfs.py diff --git a/COPYING-6.12.0-55.42.1.el10 b/COPYING-6.12.0-55.43.1.el10 similarity index 100% rename from COPYING-6.12.0-55.42.1.el10 rename to COPYING-6.12.0-55.43.1.el10 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 206079d3bd..6a1acabb29 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -511,6 +511,7 @@ Description: information about CPUs heterogeneity. What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/gather_data_sampling + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ff0b440ef2..ce296b8430 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -22,3 +22,5 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + rsb + indirect-target-selection diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst new file mode 100644 index 0000000000..d9ca64108d --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Indirect Target Selection (ITS) +=============================== + +ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were +released before Alder Lake. ITS may allow an attacker to control the prediction +of indirect branches and RETs located in the lower half of a cacheline. + +ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium). + +Scope of Impact +--------------- +- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be + predicted with unintended target corresponding to a branch in the guest. + +- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native + gadgets. + +- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect + branches may still be predicted with targets corresponding to direct branches + executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which + should be available via distro updates. Alternatively microcode can be + obtained from Intel's github repository [#f1]_. + +Affected CPUs +------------- +Below is the list of ITS affected CPUs [#f2]_ [#f3]_: + + ======================== ============ ==================== =============== + Common name Family_Model eIBRS Intra-mode BTI + Guest/Host Isolation + ======================== ============ ==================== =============== + SKYLAKE_X (step >= 6) 06_55H Affected Affected + ICELAKE_X 06_6AH Not affected Affected + ICELAKE_D 06_6CH Not affected Affected + ICELAKE_L 06_7EH Not affected Affected + TIGERLAKE_L 06_8CH Not affected Affected + TIGERLAKE 06_8DH Not affected Affected + KABYLAKE_L (step >= 12) 06_8EH Affected Affected + KABYLAKE (step >= 13) 06_9EH Affected Affected + COMETLAKE 06_A5H Affected Affected + COMETLAKE_L 06_A6H Affected Affected + ROCKETLAKE 06_A7H Not affected Affected + ======================== ============ ==================== =============== + +- All affected CPUs enumerate Enhanced IBRS feature. +- IBPB isolation is affected on all ITS affected CPUs, and need a microcode + update for mitigation. +- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden + Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the + host's affected status. +- Intel Atom CPUs are not affected by ITS. + +Mitigation +---------- +As only the indirect branches and RETs that have their last byte of instruction +in the lower half of the cacheline are vulnerable to ITS, the basic idea behind +the mitigation is to not allow indirect branches in the lower half. + +This is achieved by relying on existing retpoline support in the kernel, and in +compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly +added ITS-safe thunks. These safe thunks consists of indirect branch in the +second half of the cacheline. Not all retpoline sites are patched to thunks, if +a retpoline site is evaluated to be ITS-safe, it is replaced with an inline +indirect branch. + +Dynamic thunks +~~~~~~~~~~~~~~ +From a dynamically allocated pool of safe-thunks, each vulnerable site is +replaced with a new thunk, such that they get a unique address. This could +improve the branch prediction accuracy. Also, it is a defense-in-depth measure +against aliasing. + +Note, for simplicity, indirect branches in eBPF programs are always replaced +with a jump to a static thunk in __x86_indirect_its_thunk_array. If required, +in future this can be changed to use dynamic thunks. + +All vulnerable RETs are replaced with a static thunk, they do not use dynamic +thunks. This is because RETs get their prediction from RSB mostly that does not +depend on source address. RETs that underflow RSB may benefit from dynamic +thunks. But, RETs significantly outnumber indirect branches, and any benefit +from a unique source address could be outweighed by the increased icache +footprint and iTLB pressure. + +Retpoline +~~~~~~~~~ +Retpoline sequence also mitigates ITS-unsafe indirect branches. For this +reason, when retpoline is enabled, ITS mitigation only relocates the RETs to +safe thunks. Unless user requested the RSB-stuffing mitigation. + +RSB Stuffing +~~~~~~~~~~~~ +RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow +attacks. And it also mitigates RETs that are vulnerable to ITS. + +Mitigation in guests +^^^^^^^^^^^^^^^^^^^^ +All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration +and Family/Model of the guest. This is because eIBRS feature could be hidden +from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which +indicates that the guest is running on an unaffected host. + +To prevent guests from unnecessarily deploying the mitigation on unaffected +platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When +a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit +is not set by any hardware, but is **intended for VMMs to synthesize** it for +guests as per the host's affected status. + +Mitigation options +^^^^^^^^^^^^^^^^^^ +The ITS mitigation can be controlled using the "indirect_target_selection" +kernel parameter. The available options are: + + ======== =================================================================== + on (default) Deploy the "Aligned branch/return thunks" mitigation. + If spectre_v2 mitigation enables retpoline, aligned-thunks are only + deployed for the affected RET instructions. Retpoline mitigates + indirect branches. + + off Disable ITS mitigation. + + vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation + part of ITS. Otherwise, mitigation is not deployed. This option is + useful when host userspace is not in the threat model, and only + attacks from guest to host are considered. + + stuff Deploy RSB-fill mitigation when retpoline is also deployed. + Otherwise, deploy the default mitigation. When retpoline mitigation + is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates + ITS. + + force Force the ITS bug and deploy the default mitigation. + ======== =================================================================== + +Sysfs reporting +--------------- + +The sysfs file showing ITS mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection + +Note, microcode mitigation status is not reported in this file. + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - Vulnerable + - System is vulnerable and no mitigation has been applied. + * - Vulnerable, KVM: Not affected + - System is vulnerable to intra-mode BTI, but not affected by eIBRS + guest/host isolation. + * - Mitigation: Aligned branch/return thunks + - The mitigation is enabled, affected indirect branches and RETs are + relocated to safe thunks. + * - Mitigation: Retpolines, Stuffing RSB + - The mitigation is enabled using retpoline and RSB stuffing. + +References +---------- +.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files + +.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html + +.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list diff --git a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst index 0585d02b9a..ad15417d39 100644 --- a/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst +++ b/Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst @@ -29,14 +29,6 @@ Below is the list of affected Intel processors [#f1]_: RAPTORLAKE_S 06_BFH =================== ============ -As an exception to this table, Intel Xeon E family parts ALDERLAKE(06_97H) and -RAPTORLAKE(06_B7H) codenamed Catlow are not affected. They are reported as -vulnerable in Linux because they share the same family/model with an affected -part. Unlike their affected counterparts, they do not enumerate RFDS_CLEAR or -CPUID.HYBRID. This information could be used to distinguish between the -affected and unaffected parts, but it is deemed not worth adding complexity as -the reporting is fixed automatically when these parts enumerate RFDS_NO. - Mitigation ========== Intel released a microcode update that enables software to clear sensitive diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst new file mode 100644 index 0000000000..21dbf9cf25 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/rsb.rst @@ -0,0 +1,268 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +RSB-related mitigations +======================= + +.. warning:: + Please keep this document up-to-date, otherwise you will be + volunteered to update it and convert it to a very long comment in + bugs.c! + +Since 2018 there have been many Spectre CVEs related to the Return Stack +Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or +Return Address Predictor (RAP) on AMD). + +Information about these CVEs and how to mitigate them is scattered +amongst a myriad of microarchitecture-specific documents. + +This document attempts to consolidate all the relevant information in +once place and clarify the reasoning behind the current RSB-related +mitigations. It's meant to be as concise as possible, focused only on +the current kernel mitigations: what are the RSB-related attack vectors +and how are they currently being mitigated? + +It's *not* meant to describe how the RSB mechanism operates or how the +exploits work. More details about those can be found in the references +below. + +Rather, this is basically a glorified comment, but too long to actually +be one. So when the next CVE comes along, a kernel developer can +quickly refer to this as a refresher to see what we're actually doing +and why. + +At a high level, there are two classes of RSB attacks: RSB poisoning +(Intel and AMD) and RSB underflow (Intel only). They must each be +considered individually for each attack vector (and microarchitecture +where applicable). + +---- + +RSB poisoning (Intel and AMD) +============================= + +SpectreRSB +~~~~~~~~~~ + +RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where +an attacker poisons an RSB entry to cause a victim's return instruction +to speculate to an attacker-controlled address. This can happen when +there are unbalanced CALLs/RETs after a context switch or VMEXIT. + +* All attack vectors can potentially be mitigated by flushing out any + poisoned RSB entries using an RSB filling sequence + [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between + untrusted and trusted domains. But this has a performance impact and + should be avoided whenever possible. + + .. DANGER:: + **FIXME**: Currently we're flushing 32 entries. However, some CPU + models have more than 32 entries. The loop count needs to be + increased for those. More detailed information is needed about RSB + sizes. + +* On context switch, the user->user mitigation requires ensuring the + RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_ + during a context switch: + + * AMD: + On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB. + This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_. + + On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be + always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is + indicated by X86_BUG_IBPB_NO_RET. + + * Intel: + IBPB always clears the RSB: + + "Software that executed before the IBPB command cannot control + the predicted targets of indirect branches executed after the + command on the same logical processor. The term indirect branch + in this context includes near return instructions, so these + predicted targets may come from the RSB." [#intel-ibpb-rsb]_ + +* On context switch, user->kernel attacks are prevented by SMEP. User + space can only insert user space addresses into the RSB. Even + non-canonical addresses can't be inserted due to the page gap at the + end of the user canonical address space reserved by TASK_SIZE_MAX. + A SMEP #PF at instruction fetch prevents the kernel from speculatively + executing user space. + + * AMD: + "Finally, branches that are predicted as 'ret' instructions get + their predicted targets from the Return Address Predictor (RAP). + AMD recommends software use a RAP stuffing sequence (mitigation + V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP) + to ensure that the addresses in the RAP are safe for + speculation. Collectively, we refer to these mitigations as "RAP + Protection"." [#amd-smep-rsb]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_ + +* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB + mitigation if needed): + + * AMD: + "When Automatic IBRS is enabled, the internal return address + stack used for return address predictions is cleared on VMEXIT." + [#amd-eibrs-vmexit]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits. Processors with + enhanced IBRS still support the usage model where IBRS is set + only in the OS/VMM for OSes that enable SMEP. To do this, such + processors will ensure that guest behavior cannot control the + RSB after a VM exit once IBRS is set, even if IBRS was not set + at the time of the VM exit." [#intel-eibrs-vmexit]_ + + Note that some Intel CPUs are susceptible to Post-barrier Return + Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last + CALL from the guest can be used to predict the first unbalanced RET. + In this case the PBRSB mitigation is needed in addition to eIBRS. + +AMD RETBleed / SRSO / Branch Type Confusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On AMD, poisoned RSB entries can also be created by the AMD RETBleed +variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack +Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel +protects itself by replacing every RET in the kernel with a branch to a +single safe RET. + +---- + +RSB underflow (Intel only) +========================== + +RSB Alternate (RSBA) ("Intel Retbleed") +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some Intel Skylake-generation CPUs are susceptible to the Intel variant +of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow +[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due +to mismatched CALLs/RETs or returning from a deep call stack, the branch +predictor can fall back to using the Branch Target Buffer (BTB). If a +user forces a BTB collision then the RET can speculatively branch to a +user-controlled address. + +* Note that RSB filling doesn't fully mitigate this issue. If there + are enough unbalanced RETs, the RSB may still underflow and fall back + to using a poisoned BTB entry. + +* On context switch, user->user underflow attacks are mitigated by the + conditional IBPB [#cond-ibpb]_ on context switch which effectively + clears the BTB: + + * "The indirect branch predictor barrier (IBPB) is an indirect branch + control mechanism that establishes a barrier, preventing software + that executed before the barrier from controlling the predicted + targets of indirect branches executed after the barrier on the same + logical processor." [#intel-ibpb-btb]_ + +* On context switch and VMEXIT, user->kernel and guest->host RSB + underflows are mitigated by IBRS or eIBRS: + + * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU" + attack demonstrated by the researchers. As previously documented, + Intel recommends the use of enhanced IBRS, where supported. This + includes any processor that enumerates RRSBA but not RRSBA_DIS_S." + [#intel-rsbu]_ + + However, note that eIBRS and IBRS do not mitigate intra-mode attacks. + Like RRSBA below, this is mitigated by clearing the BHB on kernel + entry. + + As an alternative to classic IBRS, call depth tracking (combined with + retpolines) can be used to track kernel returns and fill the RSB when + it gets close to being empty. + +Restricted RSB Alternate (RRSBA) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior, +which, similar to RSBA described above, also falls back to using the BTB +on RSB underflow. The only difference is that the predicted targets are +restricted to the current domain when eIBRS is enabled: + +* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch + predictors to be used by near RET instructions when the RSB is + empty. When eIBRS is enabled, the predicted targets of these + alternate predictors are restricted to those belonging to the + indirect branch predictor entries of the current prediction domain. + [#intel-eibrs-rrsba]_ + +When a CPU with RRSBA is vulnerable to Branch History Injection +[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an +intra-mode BTI attack. This is mitigated by clearing the BHB on +kernel entry. + +However if the kernel uses retpolines instead of eIBRS, it needs to +disable RRSBA: + +* "Where software is using retpoline as a mitigation for BHI or + intra-mode BTI, and the processor both enumerates RRSBA and + enumerates RRSBA_DIS controls, it should disable this behavior." + [#intel-retpoline-rrsba]_ + +---- + +References +========== + +.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer `_ + +.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation `_ + +.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation `_ + +.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt. + +.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD. + +.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions `_. SBPB behaves the same way according to `this email `_. + +.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution `_ + +.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming `_ + +.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction `_ + +.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow `_ + +.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution `_ + +.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' `_ + +.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks `_ + +.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ + +.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst index 2ad1c05b8c..66af95251a 100644 --- a/Documentation/admin-guide/hw-vuln/srso.rst +++ b/Documentation/admin-guide/hw-vuln/srso.rst @@ -104,7 +104,20 @@ The possible values in this file are: (spec_rstack_overflow=ibpb-vmexit) + * 'Mitigation: Reduced Speculation': + This mitigation gets automatically enabled when the above one "IBPB on + VMEXIT" has been selected and the CPU supports the BpSpecReduce bit. + + It gets automatically enabled on machines which have the + SRSO_USER_KERNEL_NO=1 CPUID bit. In that case, the code logic is to switch + to the above =ibpb-vmexit mitigation because the user/kernel boundary is + not affected anymore and thus "safe RET" is not needed. + + After enabling the IBPB on VMEXIT mitigation option, the BpSpecReduce bit + is detected (functionality present on all such machines) and that + practically overrides IBPB on VMEXIT as it has a lot less performance + impact and takes care of the guest->host attack vector too. In order to exploit vulnerability, an attacker needs to: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 543cd80f3c..c810f13a2c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2149,6 +2149,23 @@ different crypto accelerators. This option can be used to achieve best performance for particular HW. + indirect_target_selection= [X86,Intel] Mitigation control for Indirect + Target Selection(ITS) bug in Intel CPUs. Updated + microcode is also required for a fix in IBPB. + + on: Enable mitigation (default). + off: Disable mitigation. + force: Force the ITS bug and deploy default + mitigation. + vmexit: Only deploy mitigation if CPU is affected by + guest/host isolation part of ITS. + stuff: Deploy RSB-fill mitigation when retpoline is + also deployed. Otherwise, deploy the default + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/indirect-target-selection.rst + init= [KNL] Format: Run specified binary instead of /sbin/init as init @@ -3508,6 +3525,7 @@ expose users to several CPU vulnerabilities. Equivalent to: if nokaslr then kpti=0 [ARM64] gather_data_sampling=off [X86] + indirect_target_selection=off [X86] kvm.nx_huge_pages=off [X86] l1tf=off [X86] mds=off [X86] @@ -6252,6 +6270,8 @@ Selecting 'on' will also enable the mitigation against user space to user space task attacks. + Selecting specific mitigation does not force enable + user mitigations. Selecting 'off' will disable both the kernel and the user space protections. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index f8bc1630eb..fa21cdd610 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -212,6 +212,17 @@ pid>/``). This value defaults to 0. +core_sort_vma +============= + +The default coredump writes VMAs in address order. By setting +``core_sort_vma`` to 1, VMAs will be written from smallest size +to largest size. This is known to break at least elfutils, but +can be handy when dealing with very large (and truncated) +coredumps where the more useful debugging details are included +in the smaller VMAs. + + core_uses_pid ============= diff --git a/Documentation/core-api/packing.rst b/Documentation/core-api/packing.rst index 3ed13bc9a1..0ce2078c8e 100644 --- a/Documentation/core-api/packing.rst +++ b/Documentation/core-api/packing.rst @@ -151,16 +151,195 @@ the more significant 4-byte word. We always think of our offsets as if there were no quirk, and we translate them afterwards, before accessing the memory region. +Note on buffer lengths not multiple of 4 +---------------------------------------- + +To deal with memory layout quirks where groups of 4 bytes are laid out "little +endian" relative to each other, but "big endian" within the group itself, the +concept of groups of 4 bytes is intrinsic to the packing API (not to be +confused with the memory access, which is performed byte by byte, though). + +With buffer lengths not multiple of 4, this means one group will be incomplete. +Depending on the quirks, this may lead to discontinuities in the bit fields +accessible through the buffer. The packing API assumes discontinuities were not +the intention of the memory layout, so it avoids them by effectively logically +shortening the most significant group of 4 octets to the number of octets +actually available. + +Example with a 31 byte sized buffer given below. Physical buffer offsets are +implicit, and increase from left to right within a group, and from top to +bottom within a column. + +No quirks: + +:: + + 31 29 28 | Group 7 (most significant) + 27 26 25 24 | Group 6 + 23 22 21 20 | Group 5 + 19 18 17 16 | Group 4 + 15 14 13 12 | Group 3 + 11 10 9 8 | Group 2 + 7 6 5 4 | Group 1 + 3 2 1 0 | Group 0 (least significant) + +QUIRK_LSW32_IS_FIRST: + +:: + + 3 2 1 0 | Group 0 (least significant) + 7 6 5 4 | Group 1 + 11 10 9 8 | Group 2 + 15 14 13 12 | Group 3 + 19 18 17 16 | Group 4 + 23 22 21 20 | Group 5 + 27 26 25 24 | Group 6 + 30 29 28 | Group 7 (most significant) + +QUIRK_LITTLE_ENDIAN: + +:: + + 30 28 29 | Group 7 (most significant) + 24 25 26 27 | Group 6 + 20 21 22 23 | Group 5 + 16 17 18 19 | Group 4 + 12 13 14 15 | Group 3 + 8 9 10 11 | Group 2 + 4 5 6 7 | Group 1 + 0 1 2 3 | Group 0 (least significant) + +QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST: + +:: + + 0 1 2 3 | Group 0 (least significant) + 4 5 6 7 | Group 1 + 8 9 10 11 | Group 2 + 12 13 14 15 | Group 3 + 16 17 18 19 | Group 4 + 20 21 22 23 | Group 5 + 24 25 26 27 | Group 6 + 28 29 30 | Group 7 (most significant) + Intended use ------------ Drivers that opt to use this API first need to identify which of the above 3 quirk combinations (for a total of 8) match what the hardware documentation -describes. Then they should wrap the packing() function, creating a new -xxx_packing() that calls it using the proper QUIRK_* one-hot bits set. +describes. + +There are 3 supported usage patterns, detailed below. + +packing() +^^^^^^^^^ + +This API function is deprecated. The packing() function returns an int-encoded error code, which protects the programmer against incorrect API use. The errors are not expected to occur -during runtime, therefore it is reasonable for xxx_packing() to return void -and simply swallow those errors. Optionally it can dump stack or print the -error description. +during runtime, therefore it is reasonable to wrap packing() into a custom +function which returns void and swallows those errors. Optionally it can +dump stack or print the error description. + +.. code-block:: c + + void my_packing(void *buf, u64 *val, int startbit, int endbit, + size_t len, enum packing_op op) + { + int err; + + /* Adjust quirks accordingly */ + err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST); + if (likely(!err)) + return; + + if (err == -EINVAL) { + pr_err("Start bit (%d) expected to be larger than end (%d)\n", + startbit, endbit); + } else if (err == -ERANGE) { + if ((startbit - endbit + 1) > 64) + pr_err("Field %d-%d too large for 64 bits!\n", + startbit, endbit); + else + pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n", + *val, startbit, endbit); + } + dump_stack(); + } + +pack() and unpack() +^^^^^^^^^^^^^^^^^^^ + +These are const-correct variants of packing(), and eliminate the last "enum +packing_op op" argument. + +Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK). + +Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK). + +pack_fields() and unpack_fields() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The library exposes optimized functions for the scenario where there are many +fields represented in a buffer, and it encourages consumer drivers to avoid +repetitive calls to pack() and unpack() for each field, but instead use +pack_fields() and unpack_fields(), which reduces the code footprint. + +These APIs use field definitions in arrays of ``struct packed_field_u8`` or +``struct packed_field_u16``, allowing consumer drivers to minimize the size +of these arrays according to their custom requirements. + +The pack_fields() and unpack_fields() API functions are actually macros which +automatically select the appropriate function at compile time, based on the +type of the fields array passed in. + +An additional benefit over pack() and unpack() is that sanity checks on the +field definitions are handled at compile time with ``BUILD_BUG_ON`` rather +than only when the offending code is executed. These functions return void and +wrapping them to handle unexpected errors is not necessary. + +It is recommended, but not required, that you wrap your packed buffer into a +structured type with a fixed size. This generally makes it easier for the +compiler to enforce that the correct size buffer is used. + +Here is an example of how to use the fields APIs: + +.. code-block:: c + + /* Ordering inside the unpacked structure is flexible and can be different + * from the packed buffer. Here, it is optimized to reduce padding. + */ + struct data { + u64 field3; + u32 field4; + u16 field1; + u8 field2; + }; + + #define SIZE 13 + + typdef struct __packed { u8 buf[SIZE]; } packed_buf_t; + + static const struct packed_field_u8 fields[] = { + PACKED_FIELD(100, 90, struct data, field1), + PACKED_FIELD(90, 87, struct data, field2), + PACKED_FIELD(86, 30, struct data, field3), + PACKED_FIELD(29, 0, struct data, field4), + }; + + void unpack_your_data(const packed_buf_t *buf, struct data *unpacked) + { + BUILD_BUG_ON(sizeof(*buf) != SIZE; + + unpack_fields(buf, sizeof(*buf), unpacked, fields, + QUIRK_LITTLE_ENDIAN); + } + + void pack_your_data(const struct data *unpacked, packed_buf_t *buf) + { + BUILD_BUG_ON(sizeof(*buf) != SIZE; + + pack_fields(buf, sizeof(*buf), unpacked, fields, + QUIRK_LITTLE_ENDIAN); + } diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index 934752f675..3c46a48d99 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -101,6 +101,37 @@ example, if Rx packets are 10 and Netdev (software statistics) displays rx_bytes as "X", then ethtool (hardware statistics) will display rx_bytes as "X+40" (4 bytes CRC x 10 packets). +ethtool reset +------------- +The driver supports 3 types of resets: + +- PF reset - resets only components associated with the given PF, does not + impact other PFs + +- CORE reset - whole adapter is affected, reset all PFs + +- GLOBAL reset - same as CORE but mac and phy components are also reinitialized + +These are mapped to ethtool reset flags as follow: + +- PF reset: + + # ethtool --reset irq dma filter offload + +- CORE reset: + + # ethtool --reset irq-shared dma-shared filter-shared offload-shared \ + ram-shared + +- GLOBAL reset: + + # ethtool --reset irq-shared dma-shared filter-shared offload-shared \ + mac-shared phy-shared ram-shared + +In switchdev mode you can reset a VF using port representor: + + # ethtool --reset irq dma filter offload + Viewing Link Messages --------------------- diff --git a/MAINTAINERS b/MAINTAINERS index acccd725fa..4741d47561 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17427,6 +17427,8 @@ S: Supported F: Documentation/core-api/packing.rst F: include/linux/packing.h F: lib/packing.c +F: lib/packing_test.c +F: scripts/gen_packed_field_checks.c PADATA PARALLEL EXECUTION MECHANISM M: Steffen Klassert diff --git a/Makefile b/Makefile index a8e9ff0e7a..4c9109b7fb 100644 --- a/Makefile +++ b/Makefile @@ -1336,6 +1336,10 @@ PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef +PHONY += scripts_gen_packed_field_checks +scripts_gen_packed_field_checks: scripts_basic + $(Q)$(MAKE) $(build)=scripts scripts/gen_packed_field_checks + # --------------------------------------------------------------------------- # Install diff --git a/Makefile.rhelver b/Makefile.rhelver index b1ab65e864..4695070ff2 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 0 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 55.42.1 +RHEL_RELEASE = 55.43.1 # # RHEL_REBASE_NUM diff --git a/arch/Kconfig b/arch/Kconfig index bd9f095d69..89b14e4edc 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1024,6 +1024,14 @@ config ARCH_WANTS_EXECMEM_LATE enough entropy for module space randomization, for instance arm64. +config ARCH_HAS_EXECMEM_ROX + bool + depends on MMU && !HIGHMEM + help + For architectures that support allocations of executable memory + with read-only execute permissions. Architecture must implement + execmem_fill_trapping_insns() callback to enable this. + config HAVE_IRQ_EXIT_ON_IRQ_STACK bool help diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 396caece6d..483965c5a4 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += agp.h generic-y += asm-offsets.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += text-patching.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 49285a3ce2..4c69522e03 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += user.h +generic-y += text-patching.h diff --git a/arch/arm/include/asm/patch.h b/arch/arm/include/asm/text-patching.h similarity index 100% rename from arch/arm/include/asm/patch.h rename to arch/arm/include/asm/text-patching.h diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index e61591f33a..845acf9ce2 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include /* * The compiler emitted profiling hook consists of diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index eb9c24b6e8..a06a92d0f5 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include +#include #include static void __arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 22f937e6f3..ab76c55fd6 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index e9e828b6bb..4d45e60cd4 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct patch { void *addr; diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index d8238da095..9fd877c87a 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include "../decode-arm.h" diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7f65048380..966c6042c5 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -14,7 +14,7 @@ /* for arm_gen_branch */ #include /* for patch_text */ -#include +#include #include "core.h" diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 488f8e7513..9fe31bc8dd 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,6 +80,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -163,6 +164,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 8c0a36f72d..f06c20f70e 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -692,6 +692,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg); diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h index 917761feef..98088c0436 100644 --- a/arch/arm64/include/asm/set_memory.h +++ b/arch/arm64/include/asm/set_memory.h @@ -13,6 +13,7 @@ int set_memory_valid(unsigned long addr, int numpages, int enable); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); #endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 0c4d9045c3..8fef126260 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,7 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); -u8 spectre_bhb_loop_affected(int scope); +extern bool __nospectre_bhb; +u8 get_spectre_bhb_loop_value(void); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/text-patching.h similarity index 100% rename from arch/arm64/include/asm/patching.h rename to arch/arm64/include/asm/text-patching.h diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index a650f5e11f..3575d03d60 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS struct fregs_offset { diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index f63ea915d6..b345425193 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 4e1f983df3..f3c4d3a8a2 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 945df74005..7f99723fbb 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include static DEFINE_RAW_SPINLOCK(patch_lock); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 4268678d0e..01dbe9a569 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index da53722f95..e1735fb3bd 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -845,52 +845,71 @@ static unsigned long system_bhb_mitigations; * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any * SCOPE_SYSTEM call will give the right answer. */ -u8 spectre_bhb_loop_affected(int scope) +static bool is_spectre_bhb_safe(int scope) +{ + static const struct midr_range spectre_bhb_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A520), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + {}, + }; + static bool all_safe = true; + + if (scope != SCOPE_LOCAL_CPU) + return all_safe; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list)) + return true; + + all_safe = false; + + return false; +} + +static u8 spectre_bhb_loop_affected(void) { u8 k = 0; - static u8 max_bhb_k; - if (scope == SCOPE_LOCAL_CPU) { - static const struct midr_range spectre_bhb_k32_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), - MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), - {}, - }; - static const struct midr_range spectre_bhb_k24_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), - {}, - }; - static const struct midr_range spectre_bhb_k11_list[] = { - MIDR_ALL_VERSIONS(MIDR_AMPERE1), - {}, - }; - static const struct midr_range spectre_bhb_k8_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - {}, - }; + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD), + {}, + }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; - if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) - k = 32; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) - k = 24; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) - k = 11; - else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) - k = 8; - - max_bhb_k = max(max_bhb_k, k); - } else { - k = max_bhb_k; - } + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; return k; } @@ -916,29 +935,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) } } -static bool is_spectre_bhb_fw_affected(int scope) +static bool has_spectre_bhb_fw_mitigation(void) { - static bool system_affected; enum mitigation_state fw_state; bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; - static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - {}, - }; - bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), - spectre_bhb_firmware_mitigated_list); - - if (scope != SCOPE_LOCAL_CPU) - return system_affected; fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { - system_affected = true; - return true; - } - - return false; + return has_smccc && fw_state == SPECTRE_MITIGATED; } static bool supports_ecbhb(int scope) @@ -954,6 +957,8 @@ static bool supports_ecbhb(int scope) ID_AA64MMFR1_EL1_ECBHB_SHIFT); } +static u8 max_bhb_k; + bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope) { @@ -962,16 +967,23 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, if (supports_csv2p3(scope)) return false; - if (supports_clearbhb(scope)) - return true; + if (is_spectre_bhb_safe(scope)) + return false; - if (spectre_bhb_loop_affected(scope)) - return true; + /* + * At this point the core isn't known to be "safe" so we're going to + * assume it's vulnerable. We still need to update `max_bhb_k` though, + * but only if we aren't mitigating with clearbhb though. + */ + if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU)) + max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected()); - if (is_spectre_bhb_fw_affected(scope)) - return true; + return true; +} - return false; +u8 get_spectre_bhb_loop_value(void) +{ + return max_bhb_k; } static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) @@ -991,7 +1003,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } -static bool __read_mostly __nospectre_bhb; +bool __read_mostly __nospectre_bhb; static int __init parse_spectre_bhb_param(char *str) { __nospectre_bhb = true; @@ -1002,7 +1014,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cpu_cb; - enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + enum mitigation_state state = SPECTRE_VULNERABLE; struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) @@ -1028,7 +1040,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); state = SPECTRE_MITIGATED; set_bit(BHB_INSN, &system_bhb_mitigations); - } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + } else if (spectre_bhb_loop_affected()) { /* * Ensure KVM uses the indirect vector which will have the * branchy-loop added. A57/A72-r0 will already have selected @@ -1041,37 +1053,39 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); state = SPECTRE_MITIGATED; set_bit(BHB_LOOP, &system_bhb_mitigations); - } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { - fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); - if (fw_state == SPECTRE_MITIGATED) { - /* - * Ensure KVM uses one of the spectre bp_hardening - * vectors. The indirect vector doesn't include the EL3 - * call, so needs upgrading to - * HYP_VECTOR_SPECTRE_INDIRECT. - */ - if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) - data->slot += 1; + } else if (has_spectre_bhb_fw_mitigation()) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; - this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); - /* - * The WA3 call in the vectors supersedes the WA1 call - * made during context-switch. Uninstall any firmware - * bp_hardening callback. - */ - cpu_cb = spectre_v2_get_sw_mitigation_cb(); - if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) - __this_cpu_write(bp_hardening_data.fn, NULL); + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); - state = SPECTRE_MITIGATED; - set_bit(BHB_FW, &system_bhb_mitigations); - } + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); } update_mitigation_state(&spectre_bhb_state, state); } +bool is_spectre_bhb_fw_mitigated(void) +{ + return test_bit(BHB_FW, &system_bhb_mitigations); +} + /* Patched to NOP when enabled */ void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, @@ -1100,7 +1114,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, { u8 rd; u32 insn; - u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); BUG_ON(nr_inst != 1); /* MOV -> MOV */ @@ -1109,7 +1122,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, insn = le32_to_cpu(*origptr); rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); - insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0, AARCH64_INSN_VARIANT_64BIT, AARCH64_INSN_MOVEWIDE_ZERO); *updptr++ = cpu_to_le32(insn); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 563cbce111..7d81998040 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index b008a9b46a..36d33e064e 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -5,6 +5,7 @@ * * Copyright (C) 2014-2016 Zi Shen Lim */ +#include #include #include #include @@ -1471,43 +1472,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } +static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type) +{ + switch (type) { + case AARCH64_INSN_MB_SY: + return 0xf; + case AARCH64_INSN_MB_ST: + return 0xe; + case AARCH64_INSN_MB_LD: + return 0xd; + case AARCH64_INSN_MB_ISH: + return 0xb; + case AARCH64_INSN_MB_ISHST: + return 0xa; + case AARCH64_INSN_MB_ISHLD: + return 0x9; + case AARCH64_INSN_MB_NSH: + return 0x7; + case AARCH64_INSN_MB_NSHST: + return 0x6; + case AARCH64_INSN_MB_NSHLD: + return 0x5; + default: + pr_err("%s: unknown barrier type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } +} + u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) { u32 opt; u32 insn; - switch (type) { - case AARCH64_INSN_MB_SY: - opt = 0xf; - break; - case AARCH64_INSN_MB_ST: - opt = 0xe; - break; - case AARCH64_INSN_MB_LD: - opt = 0xd; - break; - case AARCH64_INSN_MB_ISH: - opt = 0xb; - break; - case AARCH64_INSN_MB_ISHST: - opt = 0xa; - break; - case AARCH64_INSN_MB_ISHLD: - opt = 0x9; - break; - case AARCH64_INSN_MB_NSH: - opt = 0x7; - break; - case AARCH64_INSN_MB_NSHST: - opt = 0x6; - break; - case AARCH64_INSN_MB_NSHLD: - opt = 0x5; - break; - default: - pr_err("%s: unknown dmb type %d\n", __func__, type); + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) return AARCH64_BREAK_FAULT; - } insn = aarch64_insn_get_dmb_value(); insn &= ~GENMASK(11, 8); @@ -1516,6 +1515,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type) +{ + u32 opt, insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_dsb_base_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} + u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg) { diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0e270a1c51..0122590029 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -192,6 +192,16 @@ int set_direct_map_default_noflush(struct page *page) PAGE_SIZE, change_page_range, &data); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (!can_set_direct_map()) + return 0; + + return set_memory_valid(addr, nr, valid); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 5db82bfc9d..6f04b562f6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include #include #include #include @@ -17,9 +18,10 @@ #include #include #include +#include #include #include -#include +#include #include #include "bpf_jit.h" @@ -857,7 +859,51 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (capable(CAP_SYS_ADMIN)) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; @@ -870,10 +916,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1817,7 +1866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1880,7 +1929,7 @@ skip_init_ctx: goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 9a9bc65b57..3a5c7f6e5a 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -11,3 +11,4 @@ generic-y += qspinlock.h generic-y += parport.h generic-y += user.h generic-y += vmlinux.lds.h +generic-y += text-patching.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 8c1a78c8f5..1efa1e993d 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += text-patching.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 5b5a6c90e6..80ddb5edb8 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -11,3 +11,4 @@ generic-y += ioctl.h generic-y += mmzone.h generic-y += statfs.h generic-y += param.h +generic-y += text-patching.h diff --git a/arch/loongarch/include/asm/set_memory.h b/arch/loongarch/include/asm/set_memory.h index d70505b667..55dfaefd02 100644 --- a/arch/loongarch/include/asm/set_memory.h +++ b/arch/loongarch/include/asm/set_memory.h @@ -17,5 +17,6 @@ int set_memory_rw(unsigned long addr, int numpages); bool kernel_page_present(struct page *page); int set_direct_map_default_noflush(struct page *page); int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); #endif /* _ASM_LOONGARCH_SET_MEMORY_H */ diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c index ffd8d76021..bf86782484 100644 --- a/arch/loongarch/mm/pageattr.c +++ b/arch/loongarch/mm/pageattr.c @@ -216,3 +216,22 @@ int set_direct_map_invalid_noflush(struct page *page) return __set_memory(addr, 1, __pgprot(0), __pgprot(_PAGE_PRESENT | _PAGE_VALID)); } + +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + unsigned long addr = (unsigned long)page_address(page); + pgprot_t set, clear; + + if (addr < vm_map_base) + return 0; + + if (valid) { + set = PAGE_KERNEL; + clear = __pgprot(0); + } else { + set = __pgprot(0); + clear = __pgprot(_PAGE_PRESENT | _PAGE_VALID); + } + + return __set_memory(addr, 1, set, clear); +} diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 0dbf9c5c6f..b282e0dd8d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += spinlock.h +generic-y += text-patching.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index a055f5dbe0..7178f990e8 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += parport.h generic-y += syscalls.h generic-y += tlb.h generic-y += user.h +generic-y += text-patching.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7ba67a0d6c..684569b2ec 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -13,3 +13,4 @@ generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += user.h +generic-y += text-patching.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 0d09829ed1..28004301c2 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += spinlock.h generic-y += user.h +generic-y += text-patching.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index cef49d60d7..2b1a6b00cd 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += spinlock.h generic-y += qrwlock_types.h generic-y += qrwlock.h generic-y += user.h +generic-y += text-patching.h diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/text-patching.h similarity index 100% rename from arch/parisc/include/asm/patch.h rename to arch/parisc/include/asm/text-patching.h diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index c91f9c2e61..3e34b4473d 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define __hot __section(".text.hot") diff --git a/arch/parisc/kernel/jump_label.c b/arch/parisc/kernel/jump_label.c index e253b13450..ea51f15bf0 100644 --- a/arch/parisc/kernel/jump_label.c +++ b/arch/parisc/kernel/jump_label.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static inline int reassemble_17(int as17) { diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index b16fa9bac5..fee81f8775 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include const struct kgdb_arch arch_kgdb_ops = { diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index 6e0b86652f..9255adba67 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index e59574f65e..35dd764b87 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -13,7 +13,7 @@ #include #include -#include +#include struct patch { void *addr; diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 4525a9c682..dfe2e5ad3b 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index d99863cd6c..049152f8d5 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -29,6 +29,7 @@ extern cpumask_var_t node_to_cpumask_map[]; #ifdef CONFIG_MEMORY_HOTPLUG extern unsigned long max_pfn; u64 memory_hotplug_max(void); +u64 hot_add_drconf_memory_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/text-patching.h similarity index 100% rename from arch/powerpc/include/asm/code-patching.h rename to arch/powerpc/include/asm/text-patching.h diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 2086fa6cdc..103b6605dd 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff208..247ab2acac 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c..2659e1ac86 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a8bc03a00..5081334b7b 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index f8aa91bc3b..9c85bbcc52 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 816a63fd71..f930e3395a 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* Count how many different relocations (different symbol, different addend) */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index e9bab599d0..135960918d 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index c0b351d610..2e83702bf9 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ff61a3e798..7b739b9a91 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include #include #endif -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4856e1a516..fbb7ebd8aa 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e515c1f7d8..75dbf3e0d9 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22f83fbbc7..3ebf5b9fbe 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 1502b7e439..7cfd0710e7 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,7 +2,7 @@ #include #include -#include +#include void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a..be1a245241 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803b..9e862ba552 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index acdab294b3..af97fbb3c2 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index b7201ba50b..587c8cf123 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index 8cd3b32f80..1440d99630 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -6,7 +6,7 @@ #include #include -#include +#include static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) { diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 23c7805fb7..66b5b4fa16 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #define MAX_SUBTESTS 16 diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 2db167f423..6978344edc 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e1eadd03f1..47b2228226 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index f2708c8629..6b78355240 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include "internal.h" diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index aa9aa11927..03666d790a 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include static pgprot_t __init kasan_prot_ro(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1221c561b4..c7708c8fad 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 1beae802bb..6d10c6d8be 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index ad2a7c26f2..062e8785c1 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index b653a7be4c..0a650742f3 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c index d26656b07b..4f925adf26 100644 --- a/arch/powerpc/mm/nohash/tlb_64e.c +++ b/arch/powerpc/mm/nohash/tlb_64e.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3c1da08304..603a0f652b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1336,7 +1336,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) return nid; } -static u64 hot_add_drconf_memory_max(void) +u64 hot_add_drconf_memory_max(void) { struct device_node *memory = NULL; struct device_node *dn = NULL; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2a36cc2e7e..68c6a13e6a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include "bpf_jit.h" diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 308a2e40d7..1d2972229e 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #define PERF_8xx_ID_CPU_CYCLES 1 diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4286746975..a727cd111c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index e52b848b64..32fa5fb557 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 8a7e55acf0..9be33e41af 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index fee638fd89..0e8f20ecca 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include "interrupt.h" #include diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index d21b681f52..09e7fe24fa 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index ad41dffe4d..d98b933e49 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8f14f0581a..6b746feeab 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 3313dafb1f..293d4a18d9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1293,17 +1293,13 @@ static LIST_HEAD(failed_ddw_pdn_list); static phys_addr_t ddw_memory_hotplug_max(void) { - resource_size_t max_addr = memory_hotplug_max(); - struct device_node *memory; + resource_size_t max_addr; - for_each_node_by_type(memory, "memory") { - struct resource res; - - if (of_address_to_resource(memory, 0, &res)) - continue; - - max_addr = max_t(resource_size_t, max_addr, res.end + 1); - } +#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) + max_addr = hot_add_drconf_memory_max(); +#else + max_addr = memblock_end_of_DRAM(); +#endif return max_addr; } @@ -1609,7 +1605,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (direct_mapping) { /* DDW maps the whole partition, so enable direct DMA mapping */ - ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, + ret = walk_system_ram_range(0, ddw_memory_hotplug_max() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); if (ret) { dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", @@ -2355,11 +2351,17 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, struct memory_notify *arg = data; int ret = 0; + /* This notifier can get called when onlining persistent memory as well. + * TCEs are not pre-mapped for persistent memory. Persistent memory will + * always be above ddw_memory_hotplug_max() + */ + switch (action) { case MEM_GOING_ONLINE: spin_lock(&dma_win_list_lock); list_for_each_entry(window, &dma_win_list, list) { - if (window->direct) { + if (window->direct && (arg->start_pfn << PAGE_SHIFT) < + ddw_memory_hotplug_max()) { ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); } @@ -2371,7 +2373,8 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, case MEM_OFFLINE: spin_lock(&dma_win_list_lock); list_for_each_entry(window, &dma_win_list, list) { - if (window->direct) { + if (window->direct && (arg->start_pfn << PAGE_SHIFT) < + ddw_memory_hotplug_max()) { ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c597711ef2..db99725e75 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e6cddbb230..e76e1d5d06 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index fc1a34faa5..dcc9d1ee5f 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index cea3b96ade..38aac2c478 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index f5120e07c3..e24770a779 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index ab92fc84e1..ea263d3683 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -42,6 +42,7 @@ static inline int set_kernel_memory(char *startp, char *endp, int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/text-patching.h similarity index 100% rename from arch/riscv/include/asm/patch.h rename to arch/riscv/include/asm/text-patching.h diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h index 3fc7deda91..5008f76cdc 100644 --- a/arch/riscv/include/asm/uprobes.h +++ b/arch/riscv/include/asm/uprobes.h @@ -4,7 +4,7 @@ #define _ASM_RISCV_UPROBES_H #include -#include +#include #include #define MAX_UINSN_BYTES 8 diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 0128b161bf..7eb3cb1215 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include struct cpu_manufacturer_info_t { unsigned long vendor_id; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3a8eeaa931..826f46b21f 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -20,7 +20,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4b95c574fd..a7620ef93b 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 11ad789c60..6eee6f736f 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #define RISCV_INSN_NOP 0x00000013U #define RISCV_INSN_JAL 0x0000006fU diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 34ef522f07..db13c9ddf9 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include struct patch_insn { diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 474a652136..380a0e8cec 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "decode-insn.h" diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 271d01a5ba..d815448758 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -386,6 +386,21 @@ int set_direct_map_default_noflush(struct page *page) PAGE_KERNEL, __pgprot(_PAGE_EXEC)); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + pgprot_t set, clear; + + if (valid) { + set = PAGE_KERNEL; + clear = __pgprot(_PAGE_EXEC); + } else { + set = __pgprot(0); + clear = __pgprot(_PAGE_PRESENT); + } + + return __set_memory((unsigned long)page_address(page), nr, set, clear); +} + #ifdef CONFIG_DEBUG_PAGEALLOC static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data) { diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 4cc631fa70..ca60db7519 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include "bpf_jit.h" diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 6de753c667..f8cd2f70a7 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include "bpf_jit.h" diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index 06fbabe2f6..240bcfbdcd 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -62,5 +62,6 @@ __SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K) int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); #endif diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 5f805ad42d..4c7ee74aa1 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -406,6 +406,17 @@ int set_direct_map_default_noflush(struct page *page) return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + unsigned long flags; + + if (valid) + flags = SET_MEMORY_DEF; + else + flags = SET_MEMORY_INV; + + return __set_memory((unsigned long)page_to_virt(page), nr, flags); +} #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static void ipte_range(pte_t *pte, unsigned long address, int nr) diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index fc44d9c88b..4d3f10ed82 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -3,3 +3,4 @@ generated-y += syscall_table.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += parport.h +generic-y += text-patching.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 43b0ae4c2c..17ee8a273a 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -4,3 +4,4 @@ generated-y += syscall_table_64.h generic-y += agp.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += text-patching.h diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8e594cda6d..f8de31a0c5 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -468,6 +468,11 @@ void *text_poke(void *addr, const void *opcode, size_t len) return memcpy(addr, opcode, len); } +void *text_poke_copy(void *addr, const void *opcode, size_t len) +{ + return text_poke(addr, opcode, len); +} + void text_poke_sync(void) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 35d5354c8d..a06c01048f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -2584,7 +2585,8 @@ config MITIGATION_IBPB_ENTRY depends on CPU_SUP_AMD && X86_64 default y help - Compile the kernel with support for the retbleed=ibpb mitigation. + Compile the kernel with support for the retbleed=ibpb and + spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations. config MITIGATION_IBRS_ENTRY bool "Enable IBRS on kernel entry" @@ -2744,6 +2746,17 @@ config MITIGATION_SSB of speculative execution in a similar way to the Meltdown and Spectre security vulnerabilities. +config MITIGATION_ITS + bool "Enable Indirect Target Selection mitigation" + depends on CPU_SUP_INTEL && X86_64 + depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK + default y + help + Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in + BPU on some Intel CPUs that may allow Spectre V2 style attacks. If + disabled, mitigation cannot be enabled via cmdline. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index b7ea3e8e9e..207b6a0f9c 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -16,18 +16,19 @@ .pushsection .noinstr.text, "ax" -SYM_FUNC_START(entry_ibpb) +/* Clobbers AX, CX, DX */ +SYM_FUNC_START(write_ibpb) movl $MSR_IA32_PRED_CMD, %ecx - movl $PRED_CMD_IBPB, %eax + movl _ASM_RIP(x86_pred_cmd), %eax xorl %edx, %edx wrmsr /* Make sure IBPB clears return stack preductions too. */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET -SYM_FUNC_END(entry_ibpb) +SYM_FUNC_END(write_ibpb) /* For KVM */ -EXPORT_SYMBOL_GPL(entry_ibpb); +EXPORT_SYMBOL_GPL(write_ibpb); .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1b5be07f86..9c6a110a52 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1524,7 +1524,9 @@ SYM_CODE_END(rewind_stack_and_make_dead) * ORC to unwind properly. * * The alignment is for performance and not for safety, and may be safely - * refactored in the future if needed. + * refactored in the future if needed. The .skips are for safety, to ensure + * that all RETs are in the second half of a cacheline to mitigate Indirect + * Target Selection, rather than taking the slowpath via its_return_thunk. */ SYM_FUNC_START(clear_bhb_loop) push %rbp @@ -1534,10 +1536,22 @@ SYM_FUNC_START(clear_bhb_loop) call 1f jmp 5f .align 64, 0xcc + /* + * Shift instructions so that the RET is in the upper half of the + * cacheline and don't take the slowpath to its_return_thunk. + */ + .skip 32 - (.Lret1 - 1f), 0xcc ANNOTATE_INTRA_FUNCTION_CALL 1: call 2f - RET +.Lret1: RET .align 64, 0xcc + /* + * As above shift instructions for RET at .Lret2 as well. + * + * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc + * but some Clang versions (e.g. 18) don't like this. + */ + .skip 32 - 18, 0xcc 2: movl $5, %eax 3: jmp 4f nop @@ -1545,7 +1559,7 @@ SYM_FUNC_START(clear_bhb_loop) jnz 3b sub $1, %ecx jnz 1b - RET +.Lret2: RET 5: lfence pop %rbp RET diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ca9ae606aa..5398d3fdc2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -134,6 +134,20 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) +extern bool cpu_wants_rethunk(void); +extern bool cpu_wants_rethunk_at(void *addr); +#else +static __always_inline bool cpu_wants_rethunk(void) +{ + return false; +} +static __always_inline bool cpu_wants_rethunk_at(void *addr) +{ + return false; +} +#endif + #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index e4121d9aa9..e754fce9b0 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -56,9 +56,8 @@ /* x86_cpu_id::flags */ #define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0) -#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** - * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * X86_MATCH_CPU - Base macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@_vendor * @_family: The family number or X86_FAMILY_ANY @@ -75,46 +74,17 @@ * into another macro at the usage site for good reasons, then please * start this local macro with X86_MATCH to allow easy grepping. */ -#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ - _steppings, _feature, _data) { \ - .vendor = X86_VENDOR_##_vendor, \ - .family = _family, \ - .model = _model, \ - .steppings = _steppings, \ - .feature = _feature, \ - .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ - .driver_data = (unsigned long) _data \ -} - -#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ - _steppings, _feature, _data) { \ +#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \ .vendor = _vendor, \ .family = _family, \ .model = _model, \ .steppings = _steppings, \ .feature = _feature, \ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \ + .type = _type, \ .driver_data = (unsigned long) _data \ } -/** - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching - * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY - * The name is expanded to X86_VENDOR_@_vendor - * @_family: The family number or X86_FAMILY_ANY - * @_model: The model number, model constant or X86_MODEL_ANY - * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY - * @_data: Driver specific data or NULL. The internal storage - * format is unsigned long. The supplied value, pointer - * etc. is casted to unsigned long internally. - * - * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is - * set to wildcards. - */ -#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ - X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ - X86_STEPPING_ANY, feature, data) - /** * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY @@ -124,13 +94,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ - X86_MODEL_ANY, feature, data) +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature @@ -140,12 +107,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ - X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_FEATURE - Macro for matching a CPU feature @@ -153,12 +118,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_FEATURE(feature, data) \ - X86_MATCH_VENDOR_FEATURE(ANY, feature, data) +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model @@ -169,13 +132,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set to wildcards. */ -#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ - X86_FEATURE_ANY, data) +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \ + X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VENDOR_FAM - Match vendor and family @@ -185,12 +145,10 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is casted to unsigned long internally. - * - * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are - * set of wildcards. */ -#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ - X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VFM - Match encoded vendor/family/model @@ -198,32 +156,26 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * Stepping and feature are set to wildcards */ -#define X86_MATCH_VFM(vfm, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - X86_STEPPING_ANY, X86_FEATURE_ANY, data) +#define X86_MATCH_VFM(vfm, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data) +#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** - * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping + * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings + * range. * @vfm: Encoded 8-bits each for vendor, family, model - * @steppings: Bitmask of steppings to match + * @min_step: Lowest stepping number to match + * @max_step: Highest stepping number to match * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * feature is set to wildcard */ -#define X86_MATCH_VFM_STEPPINGS(vfm, steppings, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - steppings, X86_FEATURE_ANY, data) +#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \ + X86_CPU_TYPE_ANY, data) /** * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature @@ -232,15 +184,22 @@ * @data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer * etc. is cast to unsigned long internally. - * - * Steppings is set to wildcard */ -#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ - X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ - VFM_VENDOR(vfm), \ - VFM_FAMILY(vfm), \ - VFM_MODEL(vfm), \ - X86_STEPPING_ANY, feature, data) +#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data) + +/** + * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type + * @vfm: Encoded 8-bits each for vendor, family, model + * @type: CPU type e.g. P-core, E-core + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + */ +#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \ + X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, type, data) /* * Match specific microcode revisions. @@ -278,5 +237,6 @@ struct x86_cpu_desc { extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); +extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table); #endif /* _ASM_X86_CPU_DEVICE_ID */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d96277dcea..45f78d74ec 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -210,7 +210,6 @@ #define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ #define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ @@ -464,6 +463,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -477,6 +481,8 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ +#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 9) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -528,4 +534,7 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_ITS X86_BUG(1*32 + 6) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 1a42f82966..7367644720 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -183,4 +183,10 @@ /* Family 19 */ #define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ +/* CPU core types */ +enum intel_cpu_type { + INTEL_CPU_TYPE_ATOM = 0x20, + INTEL_CPU_TYPE_CORE = 0x40, +}; + #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ae84c3b8e..8afb902aaa 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -209,6 +209,14 @@ * VERW clears CPU Register * File. */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -717,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 96b410b1d4..41b53a9e43 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -283,7 +283,7 @@ * typically has NO_MELTDOWN). * * While retbleed_untrain_ret() doesn't clobber anything but requires stack, - * entry_ibpb() will clobber AX, CX, DX. + * write_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. @@ -293,7 +293,7 @@ VALIDATE_UNRET_END CALL_UNTRAIN_RET ALTERNATIVE_2 "", \ - "call entry_ibpb", \ibpb_feature, \ + "call write_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif .endm @@ -356,10 +356,14 @@ ".long 999b\n\t" \ ".popsection\n\t" +#define ITS_THUNK_SIZE 64 + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +typedef u8 its_thunk_t[ITS_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +extern its_thunk_t __x86_indirect_its_thunk_array[]; #ifdef CONFIG_MITIGATION_RETHUNK extern void __x86_return_thunk(void); @@ -383,12 +387,18 @@ static inline void srso_return_thunk(void) {} static inline void srso_alias_return_thunk(void) {} #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_return_thunk(void); +#else +static inline void its_return_thunk(void) {} +#endif + extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); -extern void entry_ibpb(void); +extern void write_ibpb(void); #ifdef CONFIG_X86_64 extern void clear_bhb_loop(void); @@ -529,11 +539,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } -extern u64 x86_pred_cmd; - static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); + asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) + : ASM_CALL_CONSTRAINT + :: "rax", "rcx", "rdx", "memory"); } /* The Intel SPEC CTRL MSR base value cache */ @@ -570,6 +580,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); + DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 6f82e75b61..bfee9fda3e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -33,6 +33,7 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 #define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ +#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 #ifdef CONFIG_X86_64 @@ -62,6 +63,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K) #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0) #define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4a686f0e5d..301cf01af5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -36,6 +36,7 @@ struct vm86; #include #include #include +#include /* * We handle most unaligned accesses in hardware. On the other hand @@ -139,6 +140,31 @@ struct cpuinfo_x86 { __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; + + /* + * Insert the 4-byte cpu_type into a 4-byte hole in the cpuinfo_x86 + * structure to avoid breaking kABI. + */ + RH_KABI_FILL_HOLE( + // Hardware defined CPU-type + union { + u32 topo_cpu_type; + struct { + // CPUID.1A.EAX[23-0] + u32 intel_native_model_id :24; + // CPUID.1A.EAX[31-24] + u32 topo_intel_type :8; + }; + struct { + // CPUID 0x80000026.EBX + u32 amd_num_processors :16; + u32 amd_power_eff_ranking :8; + u32 amd_native_model_id :4; + u32 topo_amd_type :4; + }; + }; + ) + /* * Align to size of unsigned long because the x86_capability array * is passed to bitops which require the alignment. Use unnamed @@ -736,6 +762,7 @@ extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, + MDS_MITIGATION_AUTO, MDS_MITIGATION_FULL, MDS_MITIGATION_VMWERV, }; diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 4b2abce2e3..cc62ef70cc 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -89,6 +89,7 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid); bool kernel_page_present(struct page *page); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 6259f1937f..ab9e143ec9 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -35,6 +35,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +#define text_poke_copy text_poke_copy extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 92f3664dd9..fd41103ad3 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -114,6 +114,12 @@ enum x86_topology_domains { TOPO_MAX_DOMAIN, }; +enum x86_topology_cpu_type { + TOPO_CPU_TYPE_PERFORMANCE, + TOPO_CPU_TYPE_EFFICIENCY, + TOPO_CPU_TYPE_UNKNOWN, +}; + struct x86_topology_system { unsigned int dom_shifts[TOPO_MAX_DOMAIN]; unsigned int dom_size[TOPO_MAX_DOMAIN]; @@ -149,6 +155,9 @@ extern unsigned int __max_threads_per_core; extern unsigned int __num_threads_per_package; extern unsigned int __num_cores_per_package; +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); +enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); + static inline unsigned int topology_max_packages(void) { return __max_logical_packages; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d17518ca19..cee33d0710 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -31,6 +31,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -581,7 +582,8 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } -static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes, + void *call_dest, void *jmp_dest) { u8 op = insn->opcode.bytes[0]; int i = 0; @@ -602,7 +604,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 switch (op) { case CALL_INSN_OPCODE: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_call_thunk_array[reg], + call_dest, CALL_INSN_SIZE); i += CALL_INSN_SIZE; break; @@ -610,7 +612,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 case JMP32_INSN_OPCODE: clang_jcc: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_jump_thunk_array[reg], + jmp_dest, JMP32_INSN_SIZE); i += JMP32_INSN_SIZE; break; @@ -625,6 +627,35 @@ clang_jcc: return i; } +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_call_thunk_array[reg], + __x86_indirect_jump_thunk_array[reg]); +} + +#ifdef CONFIG_MITIGATION_ITS +static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_its_thunk_array[reg], + __x86_indirect_its_thunk_array[reg]); +} + +/* Check if an indirect branch is at ITS-unsafe address */ +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return false; + + /* Indirect branch opcode is 2 or 3 bytes depending on reg */ + addr += 1 + reg / 8; + + /* Lower-half of the cacheline? */ + return !(addr & 0x20); +} +#endif + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -699,6 +730,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) bytes[i++] = 0xe8; /* LFENCE */ } +#ifdef CONFIG_MITIGATION_ITS + /* + * Check if the address of last byte of emitted-indirect is in + * lower-half of the cacheline. Such branches need ITS mitigation. + */ + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg)) + return emit_its_trampoline(addr, insn, reg, bytes); +#endif + ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret; @@ -770,6 +810,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_MITIGATION_RETHUNK +bool cpu_wants_rethunk(void) +{ + return cpu_feature_enabled(X86_FEATURE_RETHUNK); +} + +bool cpu_wants_rethunk_at(void *addr) +{ + if (!cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return false; + if (x86_return_thunk != its_return_thunk) + return true; + + return !((unsigned long)addr & 0x20); +} + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -786,7 +841,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) int i = 0; /* Patch the custom return thunks... */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk_at(addr)) { i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { @@ -803,7 +858,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) static_call_force_reinit(); for (s = start; s < end; s++) { @@ -1665,6 +1720,8 @@ static noinline void __init alt_reloc_selftest(void) void __init alternative_instructions(void) { + u64 ibt; + int3_selftest(); /* @@ -1691,6 +1748,9 @@ void __init alternative_instructions(void) */ paravirt_set_cap(); + /* Keep CET-IBT disabled until caller/callee are patched */ + ibt = ibt_save(/*disable*/ true); + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); @@ -1714,6 +1774,8 @@ void __init alternative_instructions(void) */ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + ibt_restore(ibt); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c5fb28e645..b16bda1ffa 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -509,19 +509,19 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static const struct x86_cpu_id deadline_match[] __initconst = { - X86_MATCH_VFM_STEPPINGS(INTEL_HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ - X86_MATCH_VFM_STEPPINGS(INTEL_HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x2, 0x2, 0x3a), /* EP */ + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 0x4, 0x4, 0x0f), /* EX */ X86_MATCH_VFM(INTEL_BROADWELL_X, 0x0b000020), - X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), - X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), - X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), - X86_MATCH_VFM_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x2, 0x2, 0x00000011), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x3, 0x3, 0x0700000e), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x4, 0x4, 0x0f00000c), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 0x5, 0x5, 0x0e000003), - X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), - X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), - X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x3, 0x3, 0x01000136), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x4, 0x4, 0x02000014), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 0x5, 0xf, 0), X86_MATCH_VFM(INTEL_HASWELL, 0x22), X86_MATCH_VFM(INTEL_HASWELL_L, 0x20), diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 47a01d4028..c6dcc03a43 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); static void __init srso_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init its_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -59,7 +60,6 @@ DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; -EXPORT_SYMBOL_GPL(x86_pred_cmd); static u64 __ro_after_init x86_arch_cap_msr; @@ -67,6 +67,14 @@ static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; +static void __init set_return_thunk(void *thunk) +{ + if (x86_return_thunk != __x86_return_thunk) + pr_warn("x86/bugs: return thunk changed\n"); + + x86_return_thunk = thunk; +} + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { @@ -113,6 +121,10 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control IBPB on vCPU load */ +DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); +EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); + /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -175,6 +187,7 @@ void __init cpu_select_mitigations(void) */ srso_select_mitigation(); gds_select_mitigation(); + its_select_mitigation(); } /* @@ -234,7 +247,7 @@ static void x86_amd_ssb_disable(void) /* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = - IS_ENABLED(CONFIG_MITIGATION_MDS) ? MDS_MITIGATION_FULL : MDS_MITIGATION_OFF; + IS_ENABLED(CONFIG_MITIGATION_MDS) ? MDS_MITIGATION_AUTO : MDS_MITIGATION_OFF; static bool mds_nosmt __ro_after_init = false; static const char * const mds_strings[] = { @@ -243,6 +256,40 @@ static const char * const mds_strings[] = { [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", }; +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_AUTO, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + +/* Default mitigation for TAA-affected CPUs */ +static enum taa_mitigations taa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TAA) ? TAA_MITIGATION_AUTO : TAA_MITIGATION_OFF; + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_AUTO, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_MMIO_STALE_DATA) ? MMIO_MITIGATION_AUTO : MMIO_MITIGATION_OFF; + +enum rfds_mitigations { + RFDS_MITIGATION_OFF, + RFDS_MITIGATION_AUTO, + RFDS_MITIGATION_VERW, + RFDS_MITIGATION_UCODE_NEEDED, +}; + +/* Default mitigation for Register File Data Sampling */ +static enum rfds_mitigations rfds_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_AUTO : RFDS_MITIGATION_OFF; + static void __init mds_select_mitigation(void) { if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { @@ -250,6 +297,9 @@ static void __init mds_select_mitigation(void) return; } + if (mds_mitigation == MDS_MITIGATION_AUTO) + mds_mitigation = MDS_MITIGATION_FULL; + if (mds_mitigation == MDS_MITIGATION_FULL) { if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; @@ -286,16 +336,6 @@ early_param("mds", mds_cmdline); #undef pr_fmt #define pr_fmt(fmt) "TAA: " fmt -enum taa_mitigations { - TAA_MITIGATION_OFF, - TAA_MITIGATION_UCODE_NEEDED, - TAA_MITIGATION_VERW, - TAA_MITIGATION_TSX_DISABLED, -}; - -/* Default mitigation for TAA-affected CPUs */ -static enum taa_mitigations taa_mitigation __ro_after_init = - IS_ENABLED(CONFIG_MITIGATION_TAA) ? TAA_MITIGATION_VERW : TAA_MITIGATION_OFF; static bool taa_nosmt __ro_after_init; static const char * const taa_strings[] = { @@ -386,15 +426,6 @@ early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); #undef pr_fmt #define pr_fmt(fmt) "MMIO Stale Data: " fmt -enum mmio_mitigations { - MMIO_MITIGATION_OFF, - MMIO_MITIGATION_UCODE_NEEDED, - MMIO_MITIGATION_VERW, -}; - -/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ -static enum mmio_mitigations mmio_mitigation __ro_after_init = - IS_ENABLED(CONFIG_MITIGATION_MMIO_STALE_DATA) ? MMIO_MITIGATION_VERW : MMIO_MITIGATION_OFF; static bool mmio_nosmt __ro_after_init = false; static const char * const mmio_strings[] = { @@ -483,16 +514,6 @@ early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); #undef pr_fmt #define pr_fmt(fmt) "Register File Data Sampling: " fmt -enum rfds_mitigations { - RFDS_MITIGATION_OFF, - RFDS_MITIGATION_VERW, - RFDS_MITIGATION_UCODE_NEEDED, -}; - -/* Default mitigation for Register File Data Sampling */ -static enum rfds_mitigations rfds_mitigation __ro_after_init = - IS_ENABLED(CONFIG_MITIGATION_RFDS) ? RFDS_MITIGATION_VERW : RFDS_MITIGATION_OFF; - static const char * const rfds_strings[] = { [RFDS_MITIGATION_OFF] = "Vulnerable", [RFDS_MITIGATION_VERW] = "Mitigation: Clear Register File", @@ -508,6 +529,9 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; + if (rfds_mitigation == RFDS_MITIGATION_AUTO) + rfds_mitigation = RFDS_MITIGATION_VERW; + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else @@ -1104,7 +1128,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - x86_return_thunk = retbleed_return_thunk; + set_return_thunk(retbleed_return_thunk); if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -1115,6 +1139,8 @@ do_cmd_auto: case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + mitigate_smt = true; /* * IBPB on entry already obviates the need for @@ -1124,11 +1150,8 @@ do_cmd_auto: setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); - setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); - mitigate_smt = true; - /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -1140,7 +1163,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); - x86_return_thunk = call_depth_return_thunk; + set_return_thunk(call_depth_return_thunk); break; default: @@ -1174,6 +1197,145 @@ do_cmd_auto: pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } +#undef pr_fmt +#define pr_fmt(fmt) "ITS: " fmt + +enum its_mitigation_cmd { + ITS_CMD_OFF, + ITS_CMD_ON, + ITS_CMD_VMEXIT, + ITS_CMD_RSB_STUFF, +}; + +enum its_mitigation { + ITS_MITIGATION_OFF, + ITS_MITIGATION_VMEXIT_ONLY, + ITS_MITIGATION_ALIGNED_THUNKS, + ITS_MITIGATION_RETPOLINE_STUFF, +}; + +static const char * const its_strings[] = { + [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected", + [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", + [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", +}; + +static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS; + +static enum its_mitigation_cmd its_cmd __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF; + +static int __init its_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) { + pr_err("Mitigation disabled at compile time, ignoring option (%s)", str); + return 0; + } + + if (!strcmp(str, "off")) { + its_cmd = ITS_CMD_OFF; + } else if (!strcmp(str, "on")) { + its_cmd = ITS_CMD_ON; + } else if (!strcmp(str, "force")) { + its_cmd = ITS_CMD_ON; + setup_force_cpu_bug(X86_BUG_ITS); + } else if (!strcmp(str, "vmexit")) { + its_cmd = ITS_CMD_VMEXIT; + } else if (!strcmp(str, "stuff")) { + its_cmd = ITS_CMD_RSB_STUFF; + } else { + pr_err("Ignoring unknown indirect_target_selection option (%s).", str); + } + + return 0; +} +early_param("indirect_target_selection", its_parse_cmdline); + +static void __init its_select_mitigation(void) +{ + enum its_mitigation_cmd cmd = its_cmd; + + if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + its_mitigation = ITS_MITIGATION_OFF; + return; + } + + /* Retpoline+CDT mitigates ITS, bail out */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + boot_cpu_has(X86_FEATURE_CALL_DEPTH)) { + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + goto out; + } + + /* Exit early to avoid irrelevant warnings */ + if (cmd == ITS_CMD_OFF) { + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (spectre_v2_enabled == SPECTRE_V2_NONE) { + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || + !IS_ENABLED(CONFIG_MITIGATION_RETHUNK)) { + pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) { + pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + + if (cmd == ITS_CMD_RSB_STUFF && + (!boot_cpu_has(X86_FEATURE_RETPOLINE) || !IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))) { + pr_err("RSB stuff mitigation not supported, using default\n"); + cmd = ITS_CMD_ON; + } + + switch (cmd) { + case ITS_CMD_OFF: + its_mitigation = ITS_MITIGATION_OFF; + break; + case ITS_CMD_VMEXIT: + if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) { + its_mitigation = ITS_MITIGATION_VMEXIT_ONLY; + goto out; + } + fallthrough; + case ITS_CMD_ON: + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + case ITS_CMD_RSB_STUFF: + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + set_return_thunk(call_depth_return_thunk); + if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + pr_info("Retbleed mitigation updated to stuffing\n"); + } + break; + } +out: + pr_info("%s\n", its_strings[its_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -1294,9 +1456,13 @@ static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; static enum spectre_v2_user_cmd __init spectre_v2_parse_user_cmdline(void) { + enum spectre_v2_user_cmd mode; char arg[20]; int ret, i; + mode = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ? + SPECTRE_V2_USER_CMD_AUTO : SPECTRE_V2_USER_CMD_NONE; + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; @@ -1309,7 +1475,7 @@ spectre_v2_parse_user_cmdline(void) ret = cmdline_find_option(boot_command_line, "spectre_v2_user", arg, sizeof(arg)); if (ret < 0) - return SPECTRE_V2_USER_CMD_AUTO; + return mode; for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { if (match_option(arg, ret, v2_user_options[i].option)) { @@ -1319,8 +1485,8 @@ spectre_v2_parse_user_cmdline(void) } } - pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_USER_CMD_AUTO; + pr_err("Unknown user space protection option (%s). Switching to default\n", arg); + return mode; } static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) @@ -1332,16 +1498,11 @@ static void __init spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; - bool smt_possible = IS_ENABLED(CONFIG_SMP); enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; - if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || - cpu_smt_control == CPU_SMT_NOT_SUPPORTED) - smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: @@ -1365,7 +1526,7 @@ spectre_v2_user_select_mitigation(void) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + static_branch_enable(&switch_vcpu_ibpb); spectre_v2_user_ibpb = mode; switch (cmd) { @@ -1402,7 +1563,7 @@ spectre_v2_user_select_mitigation(void) * so allow for STIBP to be selected in those cases. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || - !smt_possible || + !cpu_smt_possible() || (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && !boot_cpu_has(X86_FEATURE_AUTOIBRS))) return; @@ -1579,51 +1740,54 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) rrsba_disabled = true; } -static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* - * Similar to context switches, there are two types of RSB attacks - * after VM exit: + * WARNING! There are many subtleties to consider when changing *any* + * code related to RSB-related mitigations. Before doing so, carefully + * read the following document, and update if necessary: * - * 1) RSB underflow + * Documentation/admin-guide/hw-vuln/rsb.rst * - * 2) Poisoned RSB entry + * In an overly simplified nutshell: * - * When retpoline is enabled, both are mitigated by filling/clearing - * the RSB. + * - User->user RSB attacks are conditionally mitigated during + * context switches by cond_mitigation -> write_ibpb(). * - * When IBRS is enabled, while #1 would be mitigated by the IBRS branch - * prediction isolation protections, RSB still needs to be cleared - * because of #2. Note that SMEP provides no protection here, unlike - * user-space-poisoned RSB entries. + * - User->kernel and guest->host attacks are mitigated by eIBRS or + * RSB filling. * - * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB - * bug is present then a LITE version of RSB protection is required, - * just a single call needs to retire before a RET is executed. + * Though, depending on config, note that other alternative + * mitigations may end up getting used instead, e.g., IBPB on + * entry/vmexit, call depth tracking, or return thunks. */ + switch (mode) { case SPECTRE_V2_NONE: - return; + break; - case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS: - if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); - pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); - } - return; - + case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS_RETPOLINE: + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); + } + break; + case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: + pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); - return; - } + break; - pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); - dump_stack(); + default: + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n"); + dump_stack(); + break; + } } /* @@ -1817,48 +1981,7 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); - /* - * If Spectre v2 protection has been enabled, fill the RSB during a - * context switch. In general there are two types of RSB attacks - * across context switches, for which the CALLs/RETs may be unbalanced. - * - * 1) RSB underflow - * - * Some Intel parts have "bottomless RSB". When the RSB is empty, - * speculated return targets may come from the branch predictor, - * which could have a user-poisoned BTB or BHB entry. - * - * AMD has it even worse: *all* returns are speculated from the BTB, - * regardless of the state of the RSB. - * - * When IBRS or eIBRS is enabled, the "user -> kernel" attack - * scenario is mitigated by the IBRS branch prediction isolation - * properties, so the RSB buffer filling wouldn't be necessary to - * protect against this type of attack. - * - * The "user -> user" attack scenario is mitigated by RSB filling. - * - * 2) Poisoned RSB entry - * - * If the 'next' in-kernel return stack is shorter than 'prev', - * 'next' could be tricked into speculating with a user-poisoned RSB - * entry. - * - * The "user -> kernel" attack scenario is mitigated by SMEP and - * eIBRS. - * - * The "user -> user" scenario, also known as SpectreBHB, requires - * RSB clearing. - * - * So to mitigate all cases, unconditionally fill RSB on context - * switches. - * - * FIXME: Is this pointless for retbleed-affected AMD? - */ - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - - spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + spectre_v2_select_rsb_mitigation(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS @@ -1974,6 +2097,7 @@ void cpu_bugs_smt_update(void) switch (mds_mitigation) { case MDS_MITIGATION_FULL: + case MDS_MITIGATION_AUTO: case MDS_MITIGATION_VMWERV: if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) pr_warn_once(MDS_MSG_SMT); @@ -1985,6 +2109,7 @@ void cpu_bugs_smt_update(void) switch (taa_mitigation) { case TAA_MITIGATION_VERW: + case TAA_MITIGATION_AUTO: case TAA_MITIGATION_UCODE_NEEDED: if (sched_smt_active()) pr_warn_once(TAA_MSG_SMT); @@ -1996,6 +2121,7 @@ void cpu_bugs_smt_update(void) switch (mmio_mitigation) { case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_AUTO: case MMIO_MITIGATION_UCODE_NEEDED: if (sched_smt_active()) pr_warn_once(MMIO_MSG_SMT); @@ -2523,6 +2649,7 @@ enum srso_mitigation { SRSO_MITIGATION_SAFE_RET, SRSO_MITIGATION_IBPB, SRSO_MITIGATION_IBPB_ON_VMEXIT, + SRSO_MITIGATION_BP_SPEC_REDUCE, }; enum srso_mitigation_cmd { @@ -2540,7 +2667,8 @@ static const char * const srso_strings[] = { [SRSO_MITIGATION_MICROCODE] = "Vulnerable: Microcode, no safe RET", [SRSO_MITIGATION_SAFE_RET] = "Mitigation: Safe RET", [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", - [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" + [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only", + [SRSO_MITIGATION_BP_SPEC_REDUCE] = "Mitigation: Reduced Speculation" }; static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; @@ -2579,7 +2707,7 @@ static void __init srso_select_mitigation(void) srso_cmd == SRSO_CMD_OFF) { if (boot_cpu_has(X86_FEATURE_SBPB)) x86_pred_cmd = PRED_CMD_SBPB; - return; + goto out; } if (has_microcode) { @@ -2591,7 +2719,7 @@ static void __init srso_select_mitigation(void) */ if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { setup_force_cpu_cap(X86_FEATURE_SRSO_NO); - return; + goto out; } if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { @@ -2615,6 +2743,9 @@ static void __init srso_select_mitigation(void) break; case SRSO_CMD_SAFE_RET: + if (boot_cpu_has(X86_FEATURE_SRSO_USER_KERNEL_NO)) + goto ibpb_on_vmexit; + if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { /* * Enable the return thunk for generated code @@ -2625,10 +2756,10 @@ static void __init srso_select_mitigation(void) if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - x86_return_thunk = srso_alias_return_thunk; + set_return_thunk(srso_alias_return_thunk); } else { setup_force_cpu_cap(X86_FEATURE_SRSO); - x86_return_thunk = srso_return_thunk; + set_return_thunk(srso_return_thunk); } if (has_microcode) srso_mitigation = SRSO_MITIGATION_SAFE_RET; @@ -2643,6 +2774,7 @@ static void __init srso_select_mitigation(void) if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB; /* @@ -2652,35 +2784,57 @@ static void __init srso_select_mitigation(void) */ setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); - } - } else { - pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); - } - break; - - case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { - if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { - setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); - srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { - pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); - } + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); + } + break; + +ibpb_on_vmexit: + case SRSO_CMD_IBPB_ON_VMEXIT: + if (boot_cpu_has(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) { + pr_notice("Reducing speculation to address VM/HV SRSO attack vector.\n"); + srso_mitigation = SRSO_MITIGATION_BP_SPEC_REDUCE; + break; + } + + if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { + if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + + /* + * There is no need for RSB filling: write_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); + } + } else { + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); + } break; default: break; } out: - pr_info("%s\n", srso_strings[srso_mitigation]); + /* + * Clear the feature flag if this mitigation is not selected as that + * feature flag controls the BpSpecReduce MSR bit toggling in KVM. + */ + if (srso_mitigation != SRSO_MITIGATION_BP_SPEC_REDUCE) + setup_clear_cpu_cap(X86_FEATURE_SRSO_BP_SPEC_REDUCE); + + if (srso_mitigation != SRSO_MITIGATION_NONE) + pr_info("%s\n", srso_strings[srso_mitigation]); } #undef pr_fmt @@ -2795,6 +2949,11 @@ static ssize_t rfds_show_state(char *buf) return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); } +static ssize_t its_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && @@ -2977,6 +3136,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RFDS: return rfds_show_state(buf); + case X86_BUG_ITS: + return its_show_state(buf); + default: break; } @@ -3056,6 +3218,11 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib { return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); } + +ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITS); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 49c5c308de..4b47615929 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1204,8 +1204,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define VULNBL(vendor, family, model, blacklist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) -#define VULNBL_INTEL_STEPPINGS(vfm, steppings, issues) \ - X86_MATCH_VFM_STEPPINGS(vfm, steppings, issues) +#define VULNBL_INTEL_STEPS(vfm, max_stepping, issues) \ + X86_MATCH_VFM_STEPS(vfm, X86_STEP_MIN, max_stepping, issues) + +#define VULNBL_INTEL_TYPE(vfm, cpu_type, issues) \ + X86_MATCH_VFM_CPU_TYPE(vfm, INTEL_CPU_TYPE_##cpu_type, issues) #define VULNBL_AMD(family, blacklist) \ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) @@ -1228,51 +1231,59 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by Indirect Target Selection */ +#define ITS BIT(8) +/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ +#define ITS_NATIVE_ONLY BIT(9) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { - VULNBL_INTEL_STEPPINGS(INTEL_IVYBRIDGE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_HASWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_HASWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPPINGS(INTEL_CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE_L, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE, X86_STEPPING_ANY, GDS), - VULNBL_INTEL_STEPPINGS(INTEL_LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(INTEL_ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE_L, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE_P, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE_S, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GRACEMONT, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO | RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT_D, X86_STEPPING_ANY, RFDS), - VULNBL_INTEL_STEPPINGS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEPPING_ANY, RFDS), + VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), + VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), + VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), + VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS), + VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x1a, SRSO), {} }; @@ -1318,6 +1329,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) return cpu_matches(cpu_vuln_blacklist, RFDS); } +static bool __init vulnerable_to_its(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_ITS_NO) + return false; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + /* None of the affected CPUs have BHI_CTRL */ + if (boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + /* + * If a VMM did not expose ITS_NO, assume that a guest could + * be running on a vulnerable hardware or may migrate to such + * hardware. + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + if (cpu_matches(cpu_vuln_blacklist, ITS)) + return true; + + return false; +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); @@ -1332,8 +1369,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) { setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2_USER); + } if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && @@ -1447,6 +1486,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (vulnerable_to_its(x86_arch_cap_msr)) { + setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) + setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 3baf3e4358..10719aba62 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -22,6 +22,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "die_id: %u\n", c->topo.die_id); seq_printf(m, "cu_id: %u\n", c->topo.cu_id); seq_printf(m, "core_id: %u\n", c->topo.core_id); + seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c)); seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); seq_printf(m, "llc_id: %u\n", c->topo.llc_id); diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 8e7de73332..37d02544d0 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -6,7 +6,35 @@ #include /** - * x86_match_cpu - match current CPU again an array of x86_cpu_ids + * x86_match_vendor_cpu_type - helper function to match the hardware defined + * cpu-type for a single entry in the x86_cpu_id + * table. Note, this function does not match the + * generic cpu-types TOPO_CPU_TYPE_EFFICIENCY and + * TOPO_CPU_TYPE_PERFORMANCE. + * @c: Pointer to the cpuinfo_x86 structure of the CPU to match. + * @m: Pointer to the x86_cpu_id entry to match against. + * + * Return: true if the cpu-type matches, false otherwise. + */ +static bool x86_match_vendor_cpu_type(struct cpuinfo_x86 *c, const struct x86_cpu_id *m) +{ + if (m->type == X86_CPU_TYPE_ANY) + return true; + + /* Hybrid CPUs are special, they are assumed to match all cpu-types */ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + return true; + + if (c->x86_vendor == X86_VENDOR_INTEL) + return m->type == c->topo_intel_type; + if (c->x86_vendor == X86_VENDOR_AMD) + return m->type == c->topo_amd_type; + + return false; +} + +/** + * x86_match_cpu - match current CPU against an array of x86_cpu_ids * @match: Pointer to array of x86_cpu_ids. Last entry terminated with * {}. * @@ -50,6 +78,8 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; + if (!x86_match_vendor_cpu_type(c, m)) + continue; return m; } return NULL; @@ -86,3 +116,14 @@ bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table) return true; } EXPORT_SYMBOL_GPL(x86_cpu_has_min_microcode_rev); + +bool x86_match_min_microcode_rev(const struct x86_cpu_id *table) +{ + const struct x86_cpu_id *res = x86_match_cpu(table); + + if (!res || res->driver_data > boot_cpu_data.microcode) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(x86_match_min_microcode_rev); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c84c30188f..2792fa29db 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -52,6 +52,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, + { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 7d476fa697..121ffb0037 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -182,6 +182,9 @@ static void parse_topology_amd(struct topo_scan *tscan) if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) has_topoext = cpu_parse_topology_ext(tscan); + if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) + tscan->c->topo_cpu_type = cpuid_ebx(0x80000026); + if (!has_topoext && !parse_8000_0008(tscan)) return; diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 9a6069e713..218ca2c767 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -3,6 +3,7 @@ #include +#include #include #include #include @@ -27,6 +28,36 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, } } +enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c) +{ + if (c->x86_vendor == X86_VENDOR_INTEL) { + switch (c->topo_intel_type) { + case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY; + case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE; + } + } + if (c->x86_vendor == X86_VENDOR_AMD) { + switch (c->topo_amd_type) { + case 0: return TOPO_CPU_TYPE_PERFORMANCE; + case 1: return TOPO_CPU_TYPE_EFFICIENCY; + } + } + + return TOPO_CPU_TYPE_UNKNOWN; +} + +const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c) +{ + switch (get_topology_cpu_type(c)) { + case TOPO_CPU_TYPE_PERFORMANCE: + return "performance"; + case TOPO_CPU_TYPE_EFFICIENCY: + return "efficiency"; + default: + return "unknown"; + } +} + static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) { struct { @@ -96,6 +127,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } ebx; c->topo = topo_defaults; + c->topo_cpu_type = TOPO_CPU_TYPE_UNKNOWN; if (fake_topology(tscan)) return; @@ -132,6 +164,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) case X86_VENDOR_INTEL: if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) parse_legacy(tscan); + if (c->cpuid_level >= 0x1a) + c->topo_cpu_type = cpuid_eax(0x1a); break; case X86_VENDOR_HYGON: if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8da0e66ca2..bfab966ea5 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -354,7 +354,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(ip)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 4eefaac64c..c321b1873c 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, break; case RET: - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(insn)) code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; @@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case JCC: if (!func) { func = __static_call_return; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) func = x86_return_thunk; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index feb8102a9c..e2567c8f6b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -530,4 +530,14 @@ INIT_PER_CPU(irq_stack_backing_store); "SRSO function pair won't alias"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline"); +. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart"); +. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); +#endif + +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline"); +#endif + #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9f6b15485f..6fbd9b749d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -815,7 +815,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_0021_EAX, F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ | - F(WRMSR_XX_BASE_NS) + F(WRMSR_XX_BASE_NS) | F(SRSO_USER_KERNEL_NO) ); kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 06ae075e31..0fb4747a4f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1507,6 +1507,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +#ifdef CONFIG_CPU_MITIGATIONS +static DEFINE_SPINLOCK(srso_lock); +static atomic_t srso_nr_vms; + +static void svm_srso_clear_bp_spec_reduce(void *ign) +{ + struct svm_cpu_data *sd = this_cpu_ptr(&svm_data); + + if (!sd->bp_spec_reduce_set) + return; + + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + sd->bp_spec_reduce_set = false; +} + +static void svm_srso_vm_destroy(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + if (atomic_dec_return(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + /* + * Verify a new VM didn't come along, acquire the lock, and increment + * the count before this task acquired the lock. + */ + if (atomic_read(&srso_nr_vms)) + return; + + on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1); +} + +static void svm_srso_vm_init(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + /* + * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 + * transition, i.e. destroying the last VM, is fully complete, e.g. so + * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs. + */ + if (atomic_inc_not_zero(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + atomic_inc(&srso_nr_vms); +} +#else +static void svm_srso_vm_init(void) { } +static void svm_srso_vm_destroy(void) { } +#endif + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1539,6 +1596,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); + if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) && + !sd->bp_spec_reduce_set) { + sd->bp_spec_reduce_set = true; + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + } svm->guest_state_loaded = true; } @@ -1558,7 +1620,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; - if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT)) + if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) && + static_branch_likely(&switch_vcpu_ibpb)) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) @@ -4951,6 +5014,8 @@ static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); sev_vm_destroy(kvm); + + svm_srso_vm_destroy(); } static int svm_vm_init(struct kvm *kvm) @@ -4976,6 +5041,7 @@ static int svm_vm_init(struct kvm *kvm) return ret; } + svm_srso_vm_init(); return 0; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 43fa6a16eb..708f7fc3b1 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -335,6 +335,8 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; + bool bp_spec_reduce_set; + struct vmcb *save_area; unsigned long save_area_pa; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0bc4a4bb00..600a75dbd8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1478,7 +1478,8 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, * performs IBPB on nested VM-Exit (a single nested transition * may switch the active VMCS multiple times). */ - if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) + if (static_branch_likely(&switch_vcpu_ibpb) && + (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))) indirect_branch_prediction_barrier(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cb138fade6..ef7aed3fe9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1602,7 +1602,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1636,6 +1636,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_MDS_NO; if (!boot_cpu_has_bug(X86_BUG_RFDS)) data |= ARCH_CAP_RFDS_NO; + if (!boot_cpu_has_bug(X86_BUG_ITS)) + data |= ARCH_CAP_ITS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 4bf4fad5b1..5a18ecc04a 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -103,6 +103,7 @@ int msr_set_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, true); } +EXPORT_SYMBOL_GPL(msr_set_bit); /** * msr_clear_bit - Clear @bit in a MSR @msr. @@ -118,6 +119,7 @@ int msr_clear_bit(u32 msr, u8 bit) { return __flip_bit(msr, bit, false); } +EXPORT_SYMBOL_GPL(msr_clear_bit); #ifdef CONFIG_TRACEPOINTS void do_trace_write_msr(unsigned int msr, u64 val, int failed) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 391059b2c6..614fb9aee2 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -366,6 +366,45 @@ SYM_FUNC_END(call_depth_return_thunk) #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ +#ifdef CONFIG_MITIGATION_ITS + +.macro ITS_THUNK reg + +SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + jmp *%\reg + int3 + .align 32, 0xcc /* fill to the end of the line */ + .skip 32, 0xcc /* skip to the next upper half */ +.endm + +/* ITS mitigation requires thunks be aligned to upper half of cacheline */ +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(__x86_indirect_its_thunk_array) + +#define GEN(reg) ITS_THUNK reg +#include +#undef GEN + + .align 64, 0xcc +SYM_CODE_END(__x86_indirect_its_thunk_array) + +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(its_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(its_return_thunk) +EXPORT_SYMBOL(its_return_thunk) + +#endif /* CONFIG_MITIGATION_ITS */ + /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index eb503f53c3..58f0c0c922 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1053,18 +1053,54 @@ unsigned long arch_max_swapfile_size(void) #ifdef CONFIG_EXECMEM static struct execmem_info execmem_info __ro_after_init; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writeable) +{ + /* fill memory with INT3 instructions */ + if (writeable) + memset(ptr, INT3_INSN_OPCODE, size); + else + text_poke_set(ptr, INT3_INSN_OPCODE, size); +} +#endif + struct execmem_info __init *execmem_arch_setup(void) { unsigned long start, offset = 0; + enum execmem_range_flags flags; + pgprot_t pgprot; if (kaslr_enabled()) offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE; start = MODULES_VADDR + offset; + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) && + cpu_feature_enabled(X86_FEATURE_PSE)) { + pgprot = PAGE_KERNEL_ROX; + flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; + } else { + pgprot = PAGE_KERNEL; + flags = EXECMEM_KASAN_SHADOW; + } + execmem_info = (struct execmem_info){ .ranges = { - [EXECMEM_DEFAULT] = { + [EXECMEM_MODULE_TEXT] = { + .flags = flags, + .start = start, + .end = MODULES_END, + .pgprot = pgprot, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_KPROBES ... EXECMEM_BPF] = { + .flags = EXECMEM_KASAN_SHADOW, + .start = start, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = MODULE_ALIGN, + }, + [EXECMEM_MODULE_DATA] = { .flags = EXECMEM_KASAN_SHADOW, .start = start, .end = MODULES_END, diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 3d2f7f0a6e..ad3c1feec9 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 44f7b2ea6a..8742e54752 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -75,6 +75,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +#define CPA_COLLAPSE 16 /* try to collapse large pages */ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { @@ -107,6 +108,18 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } +static void collapse_page_count(int level) +{ + direct_pages_count[level]++; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE); + } + direct_pages_count[level - 1] -= PTRS_PER_PTE; +} + void arch_report_meminfo(struct seq_file *m) { seq_printf(m, "DirectMap4k: %8lu kB\n", @@ -124,6 +137,7 @@ void arch_report_meminfo(struct seq_file *m) } #else static inline void split_page_count(int level) { } +static inline void collapse_page_count(int level) { } #endif #ifdef CONFIG_X86_CPA_STATISTICS @@ -396,16 +410,49 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } -static void cpa_flush(struct cpa_data *data, int cache) +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); + +static void cpa_collapse_large_pages(struct cpa_data *cpa) +{ + unsigned long start, addr, end; + struct ptdesc *ptdesc, *tmp; + LIST_HEAD(pgtables); + int collapsed = 0; + int i; + + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + for (i = 0; i < cpa->numpages; i++) + collapsed += collapse_large_pages(__cpa_addr(cpa, i), + &pgtables); + } else { + addr = __cpa_addr(cpa, 0); + start = addr & PMD_MASK; + end = addr + PAGE_SIZE * cpa->numpages; + + for (addr = start; within(addr, start, end); addr += PMD_SIZE) + collapsed += collapse_large_pages(addr, &pgtables); + } + + if (!collapsed) + return; + + flush_tlb_all(); + + list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) { + list_del(&ptdesc->pt_list); + __free_page(ptdesc_page(ptdesc)); + } +} + +static void cpa_flush(struct cpa_data *cpa, int cache) { - struct cpa_data *cpa = data; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { cpa_flush_all(cache); - return; + goto collapse_large_pages; } if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) @@ -414,7 +461,7 @@ static void cpa_flush(struct cpa_data *data, int cache) on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) - return; + goto collapse_large_pages; mb(); for (i = 0; i < cpa->numpages; i++) { @@ -430,6 +477,10 @@ static void cpa_flush(struct cpa_data *data, int cache) clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); + +collapse_large_pages: + if (cpa->flags & CPA_COLLAPSE) + cpa_collapse_large_pages(cpa); } static bool overlaps(unsigned long r1_start, unsigned long r1_end, @@ -1199,6 +1250,164 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } +static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + struct list_head *pgtables) +{ + pmd_t _pmd, old_pmd; + pte_t *pte, first; + unsigned long pfn; + pgprot_t pgprot; + int i = 0; + + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; + pfn = pte_pfn(first); + + /* Make sure alignment is suitable */ + if (PFN_PHYS(pfn) & ~PMD_MASK) + return 0; + + /* The page is 4k intentionally */ + if (pte_flags(first) & _PAGE_KERNEL_4K) + return 0; + + /* Check that the rest of PTEs are compatible with the first one */ + for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) { + pte_t entry = *pte; + + if (!pte_present(entry)) + return 0; + if (pte_flags(entry) != pte_flags(first)) + return 0; + if (pte_pfn(entry) != pte_pfn(first) + i) + return 0; + } + + old_pmd = *pmd; + + /* Success: set up a large page */ + pgprot = pgprot_4k_2_large(pte_pgprot(first)); + pgprot_val(pgprot) |= _PAGE_PSE; + _pmd = pfn_pmd(pfn, pgprot); + set_pmd(pmd, _pmd); + + /* Queue the page table to be freed after TLB flush */ + list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); + + if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) { + struct page *page; + + /* Update all PGD tables to use the same large page */ + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + /* Something is wrong if entries doesn't match */ + if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd))) + continue; + set_pmd(pmd, _pmd); + } + } + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_2M); + + return 1; +} + +static int collapse_pud_page(pud_t *pud, unsigned long addr, + struct list_head *pgtables) +{ + unsigned long pfn; + pmd_t *pmd, first; + int i; + + if (!direct_gbpages) + return 0; + + addr &= PUD_MASK; + pmd = pmd_offset(pud, addr); + first = *pmd; + + /* + * To restore PUD page all PMD entries must be large and + * have suitable alignment + */ + pfn = pmd_pfn(first); + if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK)) + return 0; + + /* + * To restore PUD page, all following PMDs must be compatible with the + * first one. + */ + for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) { + pmd_t entry = *pmd; + + if (!pmd_present(entry) || !pmd_leaf(entry)) + return 0; + if (pmd_flags(entry) != pmd_flags(first)) + return 0; + if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE) + return 0; + } + + /* Restore PUD page and queue page table to be freed after TLB flush */ + list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables); + set_pud(pud, pfn_pud(pfn, pmd_pgprot(first))); + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_1G); + + return 1; +} + +/* + * Collapse PMD and PUD pages in the kernel mapping around the address where + * possible. + * + * Caller must flush TLB and free page tables queued on the list before + * touching the new entries. CPU must not see TLB entries of different size + * with different attributes. + */ +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) +{ + int collapsed = 0; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + addr &= PMD_MASK; + + spin_lock(&pgd_lock); + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + goto out; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) + goto out; + + collapsed = collapse_pmd_page(pmd, addr, pgtables); + if (collapsed) + collapsed += collapse_pud_page(pud, addr, pgtables); + +out: + spin_unlock(&pgd_lock); + return collapsed; +} + static bool try_to_free_pte_page(pte_t *pte) { int i; @@ -2122,7 +2331,8 @@ int set_memory_rox(unsigned long addr, int numpages) if (__supported_pte_mask & _PAGE_NX) clr.pgprot |= _PAGE_NX; - return change_page_attr_clear(&addr, numpages, clr, 0); + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0, + CPA_COLLAPSE, NULL); } int set_memory_rw(unsigned long addr, int numpages) @@ -2149,7 +2359,8 @@ int set_memory_p(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + return change_page_attr_set_clr(&addr, numpages, + __pgprot(_PAGE_KERNEL_4K), __pgprot(0), 1, 0, NULL); } @@ -2444,6 +2655,14 @@ int set_direct_map_default_noflush(struct page *page) return __set_pages_p(page, 1); } +int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) +{ + if (valid) + return __set_pages_p(page, nr); + + return __set_pages_np(page, nr); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 86593d1b78..4b040dbbba 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -388,9 +388,9 @@ static void cond_mitigation(struct task_struct *next) prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); /* - * Avoid user/user BTB poisoning by flushing the branch predictor - * when switching between processes. This stops one process from - * doing Spectre-v2 attacks on another. + * Avoid user->user BTB/RSB poisoning by flushing them when switching + * between processes. This stops one process from doing Spectre-v2 + * attacks on another. * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the @@ -446,8 +446,7 @@ static void cond_mitigation(struct task_struct *next) * different context than the user space task which ran * last on this CPU. */ - if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != - (unsigned long)next->mm) + if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != (unsigned long)next->mm) indirect_branch_prediction_barrier(); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06b080b61a..87f22f8d72 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -637,7 +637,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { @@ -659,7 +662,7 @@ static void emit_return(u8 **pprog, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk()) { emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index fa07c686cb..cc5dba7383 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += parport.h generic-y += qrwlock.h generic-y += qspinlock.h generic-y += user.h +generic-y += text-patching.h diff --git a/configs/kernel-6.12.0-aarch64-64k-debug.config b/configs/kernel-6.12.0-aarch64-64k-debug.config index f753dd3878..724806df21 100644 --- a/configs/kernel-6.12.0-aarch64-64k-debug.config +++ b/configs/kernel-6.12.0-aarch64-64k-debug.config @@ -8234,7 +8234,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-64k.config b/configs/kernel-6.12.0-aarch64-64k.config index 66ac8e372f..d5e0212d7a 100644 --- a/configs/kernel-6.12.0-aarch64-64k.config +++ b/configs/kernel-6.12.0-aarch64-64k.config @@ -8212,7 +8212,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-automotive-debug.config b/configs/kernel-6.12.0-aarch64-automotive-debug.config index f2a4b38de9..1f74cb563b 100644 --- a/configs/kernel-6.12.0-aarch64-automotive-debug.config +++ b/configs/kernel-6.12.0-aarch64-automotive-debug.config @@ -7909,7 +7909,8 @@ CONFIG_BINARY_PRINTF=y # Library routines # CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-automotive.config b/configs/kernel-6.12.0-aarch64-automotive.config index ac69e51588..8d3f106dde 100644 --- a/configs/kernel-6.12.0-aarch64-automotive.config +++ b/configs/kernel-6.12.0-aarch64-automotive.config @@ -7893,7 +7893,8 @@ CONFIG_BINARY_PRINTF=y # Library routines # CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-debug.config b/configs/kernel-6.12.0-aarch64-debug.config index 97ba8fe09a..d6b09250ae 100644 --- a/configs/kernel-6.12.0-aarch64-debug.config +++ b/configs/kernel-6.12.0-aarch64-debug.config @@ -8240,7 +8240,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-rt-64k-debug.config b/configs/kernel-6.12.0-aarch64-rt-64k-debug.config index c1fe5e835d..e4e4db71ae 100644 --- a/configs/kernel-6.12.0-aarch64-rt-64k-debug.config +++ b/configs/kernel-6.12.0-aarch64-rt-64k-debug.config @@ -8216,7 +8216,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-rt-64k.config b/configs/kernel-6.12.0-aarch64-rt-64k.config index 33493067aa..dc5b21934a 100644 --- a/configs/kernel-6.12.0-aarch64-rt-64k.config +++ b/configs/kernel-6.12.0-aarch64-rt-64k.config @@ -8194,7 +8194,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-rt-debug.config b/configs/kernel-6.12.0-aarch64-rt-debug.config index 91521e91e9..8beda36950 100644 --- a/configs/kernel-6.12.0-aarch64-rt-debug.config +++ b/configs/kernel-6.12.0-aarch64-rt-debug.config @@ -8221,7 +8221,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64-rt.config b/configs/kernel-6.12.0-aarch64-rt.config index c354492431..891a4c222b 100644 --- a/configs/kernel-6.12.0-aarch64-rt.config +++ b/configs/kernel-6.12.0-aarch64-rt.config @@ -8199,7 +8199,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-aarch64.config b/configs/kernel-6.12.0-aarch64.config index 07b04e3efc..5b916be44d 100644 --- a/configs/kernel-6.12.0-aarch64.config +++ b/configs/kernel-6.12.0-aarch64.config @@ -8218,7 +8218,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y diff --git a/configs/kernel-6.12.0-ppc64le-debug.config b/configs/kernel-6.12.0-ppc64le-debug.config index c5f36f1d3c..5c5fb9385b 100644 --- a/configs/kernel-6.12.0-ppc64le-debug.config +++ b/configs/kernel-6.12.0-ppc64le-debug.config @@ -6586,7 +6586,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=m -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-ppc64le.config b/configs/kernel-6.12.0-ppc64le.config index b2c676e309..5a3fb32f38 100644 --- a/configs/kernel-6.12.0-ppc64le.config +++ b/configs/kernel-6.12.0-ppc64le.config @@ -494,6 +494,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_PPC_UV=y # CONFIG_LD_HEAD_STUB_CATCH is not set CONFIG_MPROFILE_KERNEL=y +CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY=y CONFIG_HOTPLUG_CPU=y CONFIG_INTERRUPT_SANITIZE_REGISTERS=y CONFIG_PPC_QUEUED_SPINLOCKS=y @@ -6581,7 +6582,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=m -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y @@ -7019,7 +7021,7 @@ CONFIG_BPF_EVENTS=y CONFIG_DYNAMIC_EVENTS=y CONFIG_PROBE_EVENTS=y CONFIG_FTRACE_MCOUNT_RECORD=y -CONFIG_FTRACE_MCOUNT_USE_CC=y +CONFIG_FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY=y CONFIG_TRACING_MAP=y CONFIG_SYNTH_EVENTS=y # CONFIG_USER_EVENTS is not set diff --git a/configs/kernel-6.12.0-s390x-debug.config b/configs/kernel-6.12.0-s390x-debug.config index 7177f6fbda..c56eab0498 100644 --- a/configs/kernel-6.12.0-s390x-debug.config +++ b/configs/kernel-6.12.0-s390x-debug.config @@ -3833,7 +3833,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=m -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-s390x-zfcpdump.config b/configs/kernel-6.12.0-s390x-zfcpdump.config index bf999953a1..2e46b4ca37 100644 --- a/configs/kernel-6.12.0-s390x-zfcpdump.config +++ b/configs/kernel-6.12.0-s390x-zfcpdump.config @@ -1646,7 +1646,7 @@ CONFIG_S390_PRNG=y # # Library routines # -# CONFIG_PACKING is not set +CONFIG_PACKING=y CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-s390x.config b/configs/kernel-6.12.0-s390x.config index 815e48faba..e29dd8a311 100644 --- a/configs/kernel-6.12.0-s390x.config +++ b/configs/kernel-6.12.0-s390x.config @@ -3856,7 +3856,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=m -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64-automotive-debug.config b/configs/kernel-6.12.0-x86_64-automotive-debug.config index 23f8513cec..c4a0ef6c5e 100644 --- a/configs/kernel-6.12.0-x86_64-automotive-debug.config +++ b/configs/kernel-6.12.0-x86_64-automotive-debug.config @@ -571,6 +571,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -896,6 +897,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -6735,7 +6737,8 @@ CONFIG_BINARY_PRINTF=y # Library routines # CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64-automotive.config b/configs/kernel-6.12.0-x86_64-automotive.config index 49b9d48c7c..7dce2ba96a 100644 --- a/configs/kernel-6.12.0-x86_64-automotive.config +++ b/configs/kernel-6.12.0-x86_64-automotive.config @@ -568,6 +568,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -892,6 +893,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -6716,7 +6718,8 @@ CONFIG_BINARY_PRINTF=y # Library routines # CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64-debug.config b/configs/kernel-6.12.0-x86_64-debug.config index 96a558651e..2641264a44 100644 --- a/configs/kernel-6.12.0-x86_64-debug.config +++ b/configs/kernel-6.12.0-x86_64-debug.config @@ -585,6 +585,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -920,6 +921,7 @@ CONFIG_ARCH_WANT_PMD_MKWRITE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y @@ -9055,7 +9057,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64-rt-debug.config b/configs/kernel-6.12.0-x86_64-rt-debug.config index ef406994f3..033696714a 100644 --- a/configs/kernel-6.12.0-x86_64-rt-debug.config +++ b/configs/kernel-6.12.0-x86_64-rt-debug.config @@ -584,6 +584,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -918,6 +919,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -9036,7 +9038,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64-rt.config b/configs/kernel-6.12.0-x86_64-rt.config index ef3c7cd5e9..cfe7a1c2ba 100644 --- a/configs/kernel-6.12.0-x86_64-rt.config +++ b/configs/kernel-6.12.0-x86_64-rt.config @@ -581,6 +581,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -914,6 +915,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -9008,7 +9010,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64.config b/configs/kernel-6.12.0-x86_64.config index 629a8edb8a..a377d80792 100644 --- a/configs/kernel-6.12.0-x86_64.config +++ b/configs/kernel-6.12.0-x86_64.config @@ -582,6 +582,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -916,6 +917,7 @@ CONFIG_ARCH_WANT_PMD_MKWRITE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y @@ -9027,7 +9029,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64_v2-debug.config b/configs/kernel-6.12.0-x86_64_v2-debug.config index 96a558651e..2641264a44 100644 --- a/configs/kernel-6.12.0-x86_64_v2-debug.config +++ b/configs/kernel-6.12.0-x86_64_v2-debug.config @@ -585,6 +585,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -920,6 +921,7 @@ CONFIG_ARCH_WANT_PMD_MKWRITE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y @@ -9055,7 +9057,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64_v2-rt-debug.config b/configs/kernel-6.12.0-x86_64_v2-rt-debug.config index ef406994f3..033696714a 100644 --- a/configs/kernel-6.12.0-x86_64_v2-rt-debug.config +++ b/configs/kernel-6.12.0-x86_64_v2-rt-debug.config @@ -584,6 +584,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -918,6 +919,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -9036,7 +9038,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64_v2-rt.config b/configs/kernel-6.12.0-x86_64_v2-rt.config index ef3c7cd5e9..cfe7a1c2ba 100644 --- a/configs/kernel-6.12.0-x86_64_v2-rt.config +++ b/configs/kernel-6.12.0-x86_64_v2-rt.config @@ -581,6 +581,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -914,6 +915,7 @@ CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -9008,7 +9010,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/configs/kernel-6.12.0-x86_64_v2.config b/configs/kernel-6.12.0-x86_64_v2.config index 629a8edb8a..a377d80792 100644 --- a/configs/kernel-6.12.0-x86_64_v2.config +++ b/configs/kernel-6.12.0-x86_64_v2.config @@ -582,6 +582,7 @@ CONFIG_MITIGATION_SPECTRE_V1=y CONFIG_MITIGATION_SPECTRE_V2=y CONFIG_MITIGATION_SRBDS=y CONFIG_MITIGATION_SSB=y +CONFIG_MITIGATION_ITS=y CONFIG_ARCH_HAS_ADD_PAGES=y # @@ -916,6 +917,7 @@ CONFIG_ARCH_WANT_PMD_MKWRITE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_ARCH_HAS_EXECMEM_ROX=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y @@ -9027,7 +9029,8 @@ CONFIG_BINARY_PRINTF=y CONFIG_RAID6_PQ=m # CONFIG_RAID6_PQ_BENCHMARK is not set CONFIG_LINEAR_RANGES=y -# CONFIG_PACKING is not set +CONFIG_PACKING=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index fdaa24bb64..d88f721cf6 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -599,6 +599,7 @@ CPU_SHOW_VULN_FALLBACK(retbleed); CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); +CPU_SHOW_VULN_FALLBACK(indirect_target_selection); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -614,6 +615,7 @@ static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); +static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -630,6 +632,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_rstack_overflow.attr, &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, + &dev_attr_indirect_target_selection.attr, NULL }; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index e2a954de91..007dca7aa5 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -938,16 +938,17 @@ static struct res_config gnr_cfg = { }; static const struct x86_cpu_id i10nm_cpuids[] = { - X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), - X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), - X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0), - X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1), - X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1), - X86_MATCH_VFM_STEPPINGS(INTEL_SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), - X86_MATCH_VFM_STEPPINGS(INTEL_EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), - X86_MATCH_VFM_STEPPINGS(INTEL_GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), - X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), - X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0), + X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1), + X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1), + + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 14cfd394b4..fed5ecb4b0 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -164,7 +164,7 @@ static struct res_config skx_cfg = { }; static const struct x86_cpu_id skx_cpuids[] = { - X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x0, 0xf), &skx_cfg), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg), { } }; MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index baba204ad6..3d790f8c6f 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -26,12 +26,8 @@ void sja1105_pack(void *buf, const u64 *val, int start, int end, size_t len) pr_err("Start bit (%d) expected to be larger than end (%d)\n", start, end); } else if (rc == -ERANGE) { - if ((start - end + 1) > 64) - pr_err("Field %d-%d too large for 64 bits!\n", - start, end); - else - pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n", - *val, start, end); + pr_err("Field %d-%d too large for 64 bits!\n", + start, end); } dump_stack(); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index fb3933fbb8..757c6484f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1799,7 +1799,10 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid, struct adapter *adap = container_of(t, struct adapter, tids); struct sk_buff *skb; - WARN_ON(tid_out_of_range(&adap->tids, tid)); + if (tid_out_of_range(&adap->tids, tid)) { + dev_err(adap->pdev_dev, "tid %d out of range\n", tid); + return; + } if (t->tid_tab[tid - adap->tids.tid_base]) { t->tid_tab[tid - adap->tids.tid_base] = NULL; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 0375c7448a..a8bbd982b5 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -291,6 +291,7 @@ config ICE select DIMLIB select LIBIE select NET_DEVLINK + select PACKING select PLDMFW select DPLL help @@ -334,7 +335,7 @@ config ICE_SWITCHDEV config ICE_HWTS bool "Support HW cross-timestamp on platforms with PTM support" default y - depends on ICE && X86 + depends on ICE && X86 && PCIE_PTM help Say Y to enable hardware supported cross-timestamping on platforms with PCIe PTM support. The cross-timestamp is available through diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 3307d551f4..d0f9c94923 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -32,7 +32,8 @@ ice-y := ice_main.o \ ice_parser_rt.o \ ice_idc.o \ devlink/devlink.o \ - devlink/devlink_port.o \ + devlink/health.o \ + devlink/port.o \ ice_sf_eth.o \ ice_sf_vsi_vlan_ops.o \ ice_ddp.o \ @@ -52,7 +53,7 @@ ice-$(CONFIG_PCI_IOV) += \ ice_vf_mbx.o \ ice_vf_vsi_vlan_ops.o \ ice_vf_lib.o -ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice_dpll.o +ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice_dpll.o ice_tspll.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 0f48c4f437..8726bba97f 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -6,7 +6,7 @@ #include "ice.h" #include "ice_lib.h" #include "devlink.h" -#include "devlink_port.h" +#include "port.h" #include "ice_eswitch.h" #include "ice_fw_update.h" #include "ice_dcb_lib.h" @@ -977,6 +977,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c new file mode 100644 index 0000000000..19c3d37aa7 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include "ice.h" +#include "ice_adminq_cmd.h" /* for enum ice_aqc_health_status_elem */ +#include "health.h" + +#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \ + devlink_fmsg_put(fmsg, #name, (obj)->name) + +#define ICE_HEALTH_STATUS_DATA_SIZE 2 + +struct ice_health_status { + enum ice_aqc_health_status code; + const char *description; + const char *solution; + const char *data_label[ICE_HEALTH_STATUS_DATA_SIZE]; +}; + +/* + * In addition to the health status codes provided below, the firmware might + * generate Health Status Codes that are not pertinent to the end-user. + * For instance, Health Code 0x1002 is triggered when the command fails. + * Such codes should be disregarded by the end-user. + * The below lookup requires to be sorted by code. + */ + +static const char ice_common_port_solutions[] = + "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex."; +static const char ice_port_number_label[] = "Port Number"; +static const char ice_update_nvm_solution[] = "Update to the latest NVM image."; + +static const struct ice_health_status ice_health_status_lookup[] = { + {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module was detected.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE, "Module type is not supported.", + "Change or replace the module or cable.", {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL, "Module is not qualified.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM, + "Device cannot communicate with the module.", + "Check your cable connection. Change or replace the module or cable. Manually set speed and duplex.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT, "Unresolved module conflict.", + "Manually set speed/duplex or change the port option. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT, "Module is not present.", + "Check that the module is inserted correctly. If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED, "Underutilized module.", + "Change or replace the module or cable. Change the port option.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT, "An unsupported module was detected.", + ice_common_port_solutions, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG, "Invalid link configuration.", + NULL, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS, "Port hardware access error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE, "A port is unreachable.", + "Change the port option. Update to the latest NVM image."}, + {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED, "Port speed is limited due to module.", + "Change the module or configure the port option to match the current module speed. Change the port option.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT, + "All configured link modes were attempted but failed to establish link. The device will restart the process to establish link.", + "Check link partner connection and configuration.", + {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED, + "Port speed is limited by PHY capabilities.", + "Change the module to align to port option.", {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO, "LOM topology netlist is corrupted.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_NETLIST, "Unrecoverable netlist error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT, "Port topology conflict.", + "Change the port option. Update to the latest NVM image."}, + {ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS, "Unrecoverable hardware access error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME, "Unrecoverable runtime error.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT, "Link management engine failed to initialize.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD, + "Failed to load the firmware image in the external PHY.", + ice_update_nvm_solution, {ice_port_number_label}}, + {ICE_AQC_HEALTH_STATUS_INFO_RECOVERY, "The device is in firmware recovery mode.", + ice_update_nvm_solution, {"Extended Error"}}, + {ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS, "The flash chip cannot be accessed.", + "If issue persists, call customer support.", {"Access Type"}}, + {ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH, "NVM authentication failed.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH, "Option ROM authentication failed.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH, "DDP package authentication failed.", + "Update to latest base driver and DDP package."}, + {ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT, "NVM image is incompatible.", + ice_update_nvm_solution}, + {ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT, "Option ROM is incompatible.", + ice_update_nvm_solution, {"Expected PCI Device ID", "Expected Module ID"}}, + {ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB, + "Supplied MIB file is invalid. DCB reverted to default configuration.", + "Disable FW-LLDP and check DCBx system configuration.", + {ice_port_number_label, "MIB ID"}}, +}; + +static int ice_health_status_lookup_compare(const void *a, const void *b) +{ + return ((struct ice_health_status *)a)->code - ((struct ice_health_status *)b)->code; +} + +static const struct ice_health_status *ice_get_health_status(u16 code) +{ + struct ice_health_status key = { .code = code }; + + return bsearch(&key, ice_health_status_lookup, ARRAY_SIZE(ice_health_status_lookup), + sizeof(struct ice_health_status), ice_health_status_lookup_compare); +} + +static void ice_describe_status_code(struct devlink_fmsg *fmsg, + struct ice_aqc_health_status_elem *hse) +{ + static const char *const aux_label[] = { "Aux Data 1", "Aux Data 2" }; + const struct ice_health_status *health_code; + u32 internal_data[2]; + u16 status_code; + + status_code = le16_to_cpu(hse->health_status_code); + + devlink_fmsg_put(fmsg, "Syndrome", status_code); + if (status_code) { + internal_data[0] = le32_to_cpu(hse->internal_data1); + internal_data[1] = le32_to_cpu(hse->internal_data2); + + health_code = ice_get_health_status(status_code); + if (!health_code) + return; + + devlink_fmsg_string_pair_put(fmsg, "Description", health_code->description); + if (health_code->solution) + devlink_fmsg_string_pair_put(fmsg, "Possible Solution", + health_code->solution); + + for (size_t i = 0; i < ICE_HEALTH_STATUS_DATA_SIZE; i++) { + if (internal_data[i] != ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA) + devlink_fmsg_u32_pair_put(fmsg, + health_code->data_label[i] ? + health_code->data_label[i] : + aux_label[i], + internal_data[i]); + } + } +} + +static int +ice_port_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.port_status); + return 0; +} + +static int +ice_port_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + void *priv_ctx, struct netlink_ext_ack __always_unused *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.port_status); + return 0; +} + +static int +ice_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.fw_status); + return 0; +} + +static int +ice_fw_reporter_dump(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, + void *priv_ctx, struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_health_reporter_priv(reporter); + + ice_describe_status_code(fmsg, &pf->health_reporters.fw_status); + return 0; +} + +static void ice_config_health_events(struct ice_pf *pf, bool enable) +{ + u8 enable_bits = 0; + int ret; + + if (enable) + enable_bits = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK | + ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK; + + ret = ice_aq_set_health_status_cfg(&pf->hw, enable_bits); + if (ret) + dev_err(ice_pf_to_dev(pf), "Failed to %s firmware health events, err %d aq_err %s\n", + str_enable_disable(enable), ret, + ice_aq_str(pf->hw.adminq.sq_last_status)); +} + +/** + * ice_process_health_status_event - Process the health status event from FW + * @pf: pointer to the PF structure + * @event: event structure containing the Health Status Event opcode + * + * Decode the Health Status Events and print the associated messages + */ +void ice_process_health_status_event(struct ice_pf *pf, struct ice_rq_event_info *event) +{ + const struct ice_aqc_health_status_elem *health_info; + u16 count; + + health_info = (struct ice_aqc_health_status_elem *)event->msg_buf; + count = le16_to_cpu(event->desc.params.get_health_status.health_status_count); + + if (count > (event->buf_len / sizeof(*health_info))) { + dev_err(ice_pf_to_dev(pf), "Received a health status event with invalid element count\n"); + return; + } + + for (size_t i = 0; i < count; i++) { + const struct ice_health_status *health_code; + u16 status_code; + + status_code = le16_to_cpu(health_info->health_status_code); + health_code = ice_get_health_status(status_code); + + if (health_code) { + switch (le16_to_cpu(health_info->event_source)) { + case ICE_AQC_HEALTH_STATUS_GLOBAL: + pf->health_reporters.fw_status = *health_info; + devlink_health_report(pf->health_reporters.fw, + "FW syndrome reported", NULL); + break; + case ICE_AQC_HEALTH_STATUS_PF: + case ICE_AQC_HEALTH_STATUS_PORT: + pf->health_reporters.port_status = *health_info; + devlink_health_report(pf->health_reporters.port, + "Port syndrome reported", NULL); + break; + default: + dev_err(ice_pf_to_dev(pf), "Health code with unknown source\n"); + } + } else { + u32 data1, data2; + u16 source; + + source = le16_to_cpu(health_info->event_source); + data1 = le32_to_cpu(health_info->internal_data1); + data2 = le32_to_cpu(health_info->internal_data2); + dev_dbg(ice_pf_to_dev(pf), + "Received internal health status code 0x%08x, source: 0x%08x, data1: 0x%08x, data2: 0x%08x", + status_code, source, data1, data2); + } + health_info++; + } +} + +/** + * ice_devlink_health_report - boilerplate to call given @reporter + * + * @reporter: devlink health reporter to call, do nothing on NULL + * @msg: message to pass up, "event name" is fine + * @priv_ctx: typically some event struct + */ +static void ice_devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + if (!reporter) + return; + + /* We do not do auto recovering, so return value of the below function + * will always be 0, thus we do ignore it. + */ + devlink_health_report(reporter, msg, priv_ctx); +} + +struct ice_mdd_event { + enum ice_mdd_src src; + u16 vf_num; + u16 queue; + u8 pf_num; + u8 event; +}; + +static const char *ice_mdd_src_to_str(enum ice_mdd_src src) +{ + switch (src) { + case ICE_MDD_SRC_TX_PQM: + return "tx_pqm"; + case ICE_MDD_SRC_TX_TCLAN: + return "tx_tclan"; + case ICE_MDD_SRC_TX_TDPU: + return "tx_tdpu"; + case ICE_MDD_SRC_RX: + return "rx"; + default: + return "invalid"; + } +} + +static int +ice_mdd_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_mdd_event *mdd_event = priv_ctx; + const char *src; + + if (!mdd_event) + return 0; + + src = ice_mdd_src_to_str(mdd_event->src); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "src", src); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +/** + * ice_report_mdd_event - Report an MDD event through devlink health + * @pf: the PF device structure + * @src: the HW block that was the source of this MDD event + * @pf_num: the pf_num on which the MDD event occurred + * @vf_num: the vf_num on which the MDD event occurred + * @event: the event type of the MDD event + * @queue: the queue on which the MDD event occurred + * + * Report an MDD event that has occurred on this PF. + */ +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue) +{ + struct ice_mdd_event ev = { + .src = src, + .pf_num = pf_num, + .vf_num = vf_num, + .event = event, + .queue = queue, + }; + + ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev); +} + +/** + * ice_fmsg_put_ptr - put hex value of pointer into fmsg + * + * @fmsg: devlink fmsg under construction + * @name: name to pass + * @ptr: 64 bit value to print as hex and put into fmsg + */ +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, + void *ptr) +{ + char buf[sizeof(ptr) * 3]; + + sprintf(buf, "%p", ptr); + devlink_fmsg_put(fmsg, name, buf); +} + +struct ice_tx_hang_event { + u32 head; + u32 intr; + u16 vsi_num; + u16 queue; + u16 next_to_clean; + u16 next_to_use; + struct ice_tx_ring *tx_ring; +}; + +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_tx_hang_event *event = priv_ctx; + struct sk_buff *skb; + + if (!event) + return 0; + + skb = event->tx_ring->tx_buf->skb; + devlink_fmsg_obj_nest_start(fmsg); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma); + ice_fmsg_put_ptr(fmsg, "skb-ptr", skb); + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, + event->tx_ring->count * sizeof(struct ice_tx_desc)); + devlink_fmsg_dump_skb(fmsg, skb); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + + buf->tx_ring = tx_ring; + buf->vsi_num = vsi_num; + buf->head = head; + buf->intr = intr; +} + +void ice_report_tx_hang(struct ice_pf *pf) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + struct ice_tx_ring *tx_ring = buf->tx_ring; + + struct ice_tx_hang_event ev = { + .head = buf->head, + .intr = buf->intr, + .vsi_num = buf->vsi_num, + .queue = tx_ring->q_index, + .next_to_clean = tx_ring->next_to_clean, + .next_to_use = tx_ring->next_to_use, + .tx_ring = tx_ring, + }; + + ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev); +} + +static struct devlink_health_reporter * +ice_init_devlink_rep(struct ice_pf *pf, + const struct devlink_health_reporter_ops *ops) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct devlink_health_reporter *rep; + const u64 graceful_period = 0; + + rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + if (IS_ERR(rep)) { + struct device *dev = ice_pf_to_dev(pf); + + dev_err(dev, "failed to create devlink %s health report er", + ops->name); + return NULL; + } + return rep; +} + +#define ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field) \ + ._field = ice_##_name##_reporter_##_field, + +#define ICE_DEFINE_HEALTH_REPORTER_OPS_1(_name, _field1) \ + static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \ + .name = #_name, \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \ + } + +#define ICE_DEFINE_HEALTH_REPORTER_OPS_2(_name, _field1, _field2) \ + static const struct devlink_health_reporter_ops ice_##_name##_reporter_ops = { \ + .name = #_name, \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field1) \ + ICE_HEALTH_REPORTER_OPS_FIELD(_name, _field2) \ + } + +ICE_DEFINE_HEALTH_REPORTER_OPS_1(mdd, dump); +ICE_DEFINE_HEALTH_REPORTER_OPS_1(tx_hang, dump); +ICE_DEFINE_HEALTH_REPORTER_OPS_2(fw, dump, diagnose); +ICE_DEFINE_HEALTH_REPORTER_OPS_2(port, dump, diagnose); + +/** + * ice_health_init - allocate and init all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_init(struct ice_pf *pf) +{ + struct ice_health *reps = &pf->health_reporters; + + reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops); + reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops); + + if (ice_is_fw_health_report_supported(&pf->hw)) { + reps->fw = ice_init_devlink_rep(pf, &ice_fw_reporter_ops); + reps->port = ice_init_devlink_rep(pf, &ice_port_reporter_ops); + ice_config_health_events(pf, true); + } +} + +/** + * ice_deinit_devl_reporter - destroy given devlink health reporter + * @reporter: reporter to destroy + */ +static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter) +{ + if (reporter) + devl_health_reporter_destroy(reporter); +} + +/** + * ice_health_deinit - deallocate all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_deinit(struct ice_pf *pf) +{ + ice_deinit_devl_reporter(pf->health_reporters.mdd); + ice_deinit_devl_reporter(pf->health_reporters.tx_hang); + if (ice_is_fw_health_report_supported(&pf->hw)) { + ice_deinit_devl_reporter(pf->health_reporters.fw); + ice_deinit_devl_reporter(pf->health_reporters.port); + ice_config_health_events(pf, false); + } +} + +static +void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter) +{ + if (reporter) + devlink_health_reporter_state_update(reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +/** + * ice_health_clear - clear devlink health issues after a reset + * @pf: the PF device structure + * + * Mark the PF in healthy state again after a reset has completed. + */ +void ice_health_clear(struct ice_pf *pf) +{ + ice_health_assign_healthy_state(pf->health_reporters.mdd); + ice_health_assign_healthy_state(pf->health_reporters.tx_hang); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/health.h b/drivers/net/ethernet/intel/ice/devlink/health.h new file mode 100644 index 0000000000..5edfc4d2ad --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _HEALTH_H_ +#define _HEALTH_H_ + +#include + +/** + * DOC: health.h + * + * This header file stores everything that is needed for broadly understood + * devlink health mechanism for ice driver. + */ + +struct ice_aqc_health_status_elem; +struct ice_pf; +struct ice_tx_ring; +struct ice_rq_event_info; + +enum ice_mdd_src { + ICE_MDD_SRC_TX_PQM, + ICE_MDD_SRC_TX_TCLAN, + ICE_MDD_SRC_TX_TDPU, + ICE_MDD_SRC_RX, +}; + +/** + * struct ice_health - stores ice devlink health reporters and accompanied data + * @fw: devlink health reporter for FW Health Status events + * @mdd: devlink health reporter for MDD detection event + * @port: devlink health reporter for Port Health Status events + * @tx_hang: devlink health reporter for tx_hang event + * @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from + * non-sleeping context + * @tx_ring: ring that the hang occurred on + * @head: descriptor head + * @intr: interrupt register value + * @vsi_num: VSI owning the queue that the hang occurred on + * @fw_status: buffer for last received FW Status event + * @port_status: buffer for last received Port Status event + */ +struct ice_health { + struct devlink_health_reporter *fw; + struct devlink_health_reporter *mdd; + struct devlink_health_reporter *port; + struct devlink_health_reporter *tx_hang; + struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf, + struct ice_tx_ring *tx_ring; + u32 head; + u32 intr; + u16 vsi_num; + ); + struct ice_aqc_health_status_elem fw_status; + struct ice_aqc_health_status_elem port_status; +}; + +void ice_process_health_status_event(struct ice_pf *pf, + struct ice_rq_event_info *event); + +void ice_health_init(struct ice_pf *pf); +void ice_health_deinit(struct ice_pf *pf); +void ice_health_clear(struct ice_pf *pf); + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr); +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue); +void ice_report_tx_hang(struct ice_pf *pf); + +#endif /* _HEALTH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/port.c similarity index 99% rename from drivers/net/ethernet/intel/ice/devlink/devlink_port.c rename to drivers/net/ethernet/intel/ice/devlink/port.c index c6779d9dff..767419a67f 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/port.c @@ -5,7 +5,7 @@ #include "ice.h" #include "devlink.h" -#include "devlink_port.h" +#include "port.h" #include "ice_lib.h" #include "ice_fltr.h" diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h b/drivers/net/ethernet/intel/ice/devlink/port.h similarity index 100% rename from drivers/net/ethernet/intel/ice/devlink/devlink_port.h rename to drivers/net/ethernet/intel/ice/devlink/port.h diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index afa837e467..0bc64cf392 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -67,6 +67,7 @@ #include "ice_sriov.h" #include "ice_vf_mbx.h" #include "ice_ptp.h" +#include "ice_tspll.h" #include "ice_fdir.h" #include "ice_xsk.h" #include "ice_arfs.h" @@ -78,6 +79,7 @@ #include "ice_irq.h" #include "ice_dpll.h" #include "ice_adapter.h" +#include "devlink/health.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -178,11 +180,9 @@ #define ice_for_each_chnl_tc(i) \ for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++) -#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX) +#define ICE_UCAST_PROMISC_BITS ICE_PROMISC_UCAST_RX -#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \ - ICE_PROMISC_UCAST_RX | \ - ICE_PROMISC_VLAN_TX | \ +#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_RX | \ ICE_PROMISC_VLAN_RX) #define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX) @@ -194,14 +194,13 @@ #define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) -#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned) - enum ice_feature { ICE_F_DSCP, ICE_F_PHY_RCLK, ICE_F_SMA_CTRL, ICE_F_CGU, ICE_F_GNSS, + ICE_F_GCS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, ICE_F_MBX_LIMIT, @@ -369,9 +368,6 @@ struct ice_vsi { spinlock_t arfs_lock; /* protects aRFS hash table and filter state */ atomic_t *arfs_last_fltr_id; - u16 max_frame; - u16 rx_buf_len; - struct ice_aqc_vsi_props info; /* VSI properties */ struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored */ @@ -669,6 +665,9 @@ struct ice_pf { struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; struct ice_dplls dplls; struct device *hwmon_dev; + struct ice_health health_reporters; + + u8 num_quanta_prof_used; }; extern struct workqueue_struct *ice_lag_wq; @@ -1047,10 +1046,63 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); } -static inline enum ice_phy_model ice_get_phy_model(const struct ice_hw *hw) +extern const struct xdp_metadata_ops ice_xdp_md_ops; + +/** + * ice_is_dual - Check if given config is multi-NAC + * @hw: pointer to HW structure + * + * Return: true if the device is running in mutli-NAC (Network + * Acceleration Complex) configuration variant, false otherwise + * (always false for non-E825 devices). + */ +static inline bool ice_is_dual(struct ice_hw *hw) { - return hw->ptp.phy_model; + return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && + (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M); } -extern const struct xdp_metadata_ops ice_xdp_md_ops; +/** + * ice_is_primary - Check if given device belongs to the primary complex + * @hw: pointer to HW structure + * + * Check if given PF/HW is running on primary complex in multi-NAC + * configuration. + * + * Return: true if the device is dual, false otherwise (always true + * for non-E825 devices). + */ +static inline bool ice_is_primary(struct ice_hw *hw) +{ + return hw->mac_type != ICE_MAC_GENERIC_3K_E825 || + !ice_is_dual(hw) || + (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M); +} + +/** + * ice_pf_src_tmr_owned - Check if a primary timer is owned by PF + * @pf: pointer to PF structure + * + * Return: true if PF owns primary timer, false otherwise. + */ +static inline bool ice_pf_src_tmr_owned(struct ice_pf *pf) +{ + return pf->hw.func_caps.ts_func_info.src_tmr_owned && + ice_is_primary(&pf->hw); +} + +/** + * ice_get_primary_hw - Get pointer to primary ice_hw structure + * @pf: pointer to PF structure + * + * Return: A pointer to ice_hw structure with access to timesync + * register space. + */ +static inline struct ice_hw *ice_get_primary_hw(struct ice_pf *pf) +{ + if (!pf->adapter->ctrl_pf) + return &pf->hw; + else + return &pf->adapter->ctrl_pf->hw; +} #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1f01f3501d..53c85fcad0 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -12,6 +12,13 @@ #define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 #define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728 +#define ICE_RXQ_CTX_SIZE_DWORDS 8 +#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) +#define ICE_TXQ_CTX_SZ 22 + +typedef struct __packed { u8 buf[ICE_RXQ_CTX_SZ]; } ice_rxq_ctx_buf_t; +typedef struct __packed { u8 buf[ICE_TXQ_CTX_SZ]; } ice_txq_ctx_buf_t; + struct ice_aqc_generic { __le32 param0; __le32 param1; @@ -1491,7 +1498,23 @@ struct ice_aqc_dnl_equa_param { #define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT) -#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_CTLE_GAINHF (0x20 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_CTLE_GAINLF (0x21 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_CTLE_GAINDC (0x22 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_CTLE_BW (0x23 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_GAIN (0x30 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_GAIN2 (0x31 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_2 (0x32 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_3 (0x33 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_4 (0x34 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_5 (0x35 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_6 (0x36 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_7 (0x37 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_8 (0x38 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_9 (0x39 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_10 (0x3A << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_11 (0x3B << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DFE_12 (0x3C << ICE_AQC_RX_EQU_SHIFT) #define ICE_AQC_TX_EQU_PRE1 0x0 #define ICE_AQC_TX_EQU_PRE3 0x3 #define ICE_AQC_TX_EQU_ATTEN 0x4 @@ -1648,6 +1671,7 @@ struct ice_aqc_get_port_options_elem { #define ICE_AQC_PORT_OPT_MAX_LANE_25G 5 #define ICE_AQC_PORT_OPT_MAX_LANE_50G 6 #define ICE_AQC_PORT_OPT_MAX_LANE_100G 7 +#define ICE_AQC_PORT_OPT_MAX_LANE_200G 8 u8 global_scid[2]; u8 phy_scid[2]; @@ -2067,10 +2091,10 @@ struct ice_aqc_add_txqs_perq { __le16 txq_id; u8 rsvd[2]; __le32 q_teid; - u8 txq_ctx[22]; + ice_txq_ctx_buf_t txq_ctx; u8 rsvd2[2]; struct ice_aqc_txsched_elem info; -}; +} __packed; /* The format of the command buffer for Add Tx LAN Queues (0x0C30) * is an array of the following structs. Please note that the length of @@ -2247,6 +2271,8 @@ struct ice_aqc_get_pkg_info_resp { struct ice_aqc_get_pkg_info pkg_info[]; }; +#define ICE_AQC_GET_CGU_MAX_PHASE_ADJ GENMASK(30, 0) + /* Get CGU abilities command response data structure (indirect 0x0C61) */ struct ice_aqc_get_cgu_abilities { u8 num_inputs; @@ -2491,6 +2517,87 @@ enum ice_aqc_fw_logging_mod { ICE_AQC_FW_LOG_ID_MAX, }; +enum ice_aqc_health_status_mask { + ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK = BIT(0), + ICE_AQC_HEALTH_STATUS_SET_ALL_PF_MASK = BIT(1), + ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK = BIT(2), +}; + +/* Set Health Status (direct 0xFF20) */ +struct ice_aqc_set_health_status_cfg { + u8 event_source; + u8 reserved[15]; +}; + +enum ice_aqc_health_status { + ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT = 0x101, + ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE = 0x102, + ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL = 0x103, + ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM = 0x104, + ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT = 0x105, + ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT = 0x106, + ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED = 0x107, + ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT = 0x108, + ICE_AQC_HEALTH_STATUS_ERR_MOD_DIAGNOSTIC_FEATURE = 0x109, + ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG = 0x10B, + ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS = 0x10C, + ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE = 0x10D, + ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED = 0x10F, + ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT = 0x110, + ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED = 0x111, + ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO = 0x112, + ICE_AQC_HEALTH_STATUS_ERR_NETLIST = 0x113, + ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT = 0x114, + ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS = 0x115, + ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME = 0x116, + ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT = 0x117, + ICE_AQC_HEALTH_STATUS_ERR_PHY_NVM_PROG = 0x120, + ICE_AQC_HEALTH_STATUS_ERR_PHY_FW_LOAD = 0x121, + ICE_AQC_HEALTH_STATUS_INFO_RECOVERY = 0x500, + ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS = 0x501, + ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH = 0x502, + ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH = 0x503, + ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH = 0x504, + ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT = 0x505, + ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT = 0x506, + ICE_AQC_HEALTH_STATUS_ERR_NVM_SEC_VIOLATION = 0x507, + ICE_AQC_HEALTH_STATUS_ERR_OROM_SEC_VIOLATION = 0x508, + ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB = 0x509, + ICE_AQC_HEALTH_STATUS_ERR_MNG_TIMEOUT = 0x50A, + ICE_AQC_HEALTH_STATUS_ERR_BMC_RESET = 0x50B, + ICE_AQC_HEALTH_STATUS_ERR_LAST_MNG_FAIL = 0x50C, + ICE_AQC_HEALTH_STATUS_ERR_RESOURCE_ALLOC_FAIL = 0x50D, + ICE_AQC_HEALTH_STATUS_ERR_FW_LOOP = 0x1000, + ICE_AQC_HEALTH_STATUS_ERR_FW_PFR_FAIL = 0x1001, + ICE_AQC_HEALTH_STATUS_ERR_LAST_FAIL_AQ = 0x1002, +}; + +/* Get Health Status (indirect 0xFF22) */ +struct ice_aqc_get_health_status { + __le16 health_status_count; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +enum ice_aqc_health_status_scope { + ICE_AQC_HEALTH_STATUS_PF = 0x1, + ICE_AQC_HEALTH_STATUS_PORT = 0x2, + ICE_AQC_HEALTH_STATUS_GLOBAL = 0x3, +}; + +#define ICE_AQC_HEALTH_STATUS_UNDEFINED_DATA 0xDEADBEEF + +/* Get Health Status event buffer entry (0xFF22), + * repeated per reported health status. + */ +struct ice_aqc_health_status_elem { + __le16 health_status_code; + __le16 event_source; + __le32 internal_data1; + __le32 internal_data2; +}; + /* Set FW Logging configuration (indirect 0xFF30) * Register for FW Logging (indirect 0xFF31) * Query FW Logging (indirect 0xFF32) @@ -2631,6 +2738,8 @@ struct ice_aq_desc { struct ice_aqc_get_link_status get_link_status; struct ice_aqc_event_lan_overflow lan_overflow; struct ice_aqc_get_link_topo get_link_topo; + struct ice_aqc_set_health_status_cfg set_health_status_cfg; + struct ice_aqc_get_health_status get_health_status; struct ice_aqc_dnl_call_command dnl_call; struct ice_aqc_i2c read_write_i2c; struct ice_aqc_read_i2c_resp read_i2c_resp; @@ -2833,6 +2942,10 @@ enum ice_adminq_opc { /* Standalone Commands/Events */ ice_aqc_opc_event_lan_overflow = 0x1001, + /* System Diagnostic commands */ + ice_aqc_opc_set_health_status_cfg = 0xFF20, + ice_aqc_opc_get_health_status = 0xFF22, + /* FW Logging Commands */ ice_aqc_opc_fw_logs_config = 0xFF30, ice_aqc_opc_fw_logs_register = 0xFF31, diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7..405ddd17de 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 4b52f1a94d..05cd68598d 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -347,6 +347,8 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf break; } + tlan_ctx->quanta_prof_idx = ring->quanta_prof_id; + tlan_ctx->tso_ena = ICE_TX_LEGACY; tlan_ctx->tso_qnum = pf_q; @@ -445,12 +447,15 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Max packet size for this queue - must not be set to a larger value * than 5 x DBUF */ - rlan_ctx.rxmax = min_t(u32, vsi->max_frame, + rlan_ctx.rxmax = min_t(u32, ring->max_frame, ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len); /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; + /* Enable descriptor prefetch */ + rlan_ctx.prefena = 1; + /* PF acts as uplink for switchdev; set flex descriptor with src_vsi * metadata and flags to allow redirecting to PR netdev */ @@ -467,9 +472,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) */ if (vsi->type != ICE_VSI_VF) ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true); - else - ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3, - false); /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); @@ -541,8 +543,6 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) u32 num_bufs = ICE_RX_DESC_UNUSED(ring); int err; - ring->rx_buf_len = ring->vsi->rx_buf_len; - if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) { err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, @@ -641,21 +641,25 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) /** * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length * @vsi: VSI + * @ring: Rx ring to configure + * + * Determine the maximum frame size and Rx buffer length to use for a PF VSI. + * Set these in the associated Rx ring structure. */ -static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) +static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring *ring) { if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { - vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX; - vsi->rx_buf_len = ICE_RXBUF_1664; + ring->max_frame = ICE_MAX_FRAME_LEGACY_RX; + ring->rx_buf_len = ICE_RXBUF_1664; #if (PAGE_SIZE < 8192) } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && (vsi->netdev->mtu <= ETH_DATA_LEN)) { - vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; - vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; + ring->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN; + ring->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN; #endif } else { - vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; - vsi->rx_buf_len = ICE_RXBUF_3072; + ring->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; + ring->rx_buf_len = ICE_RXBUF_3072; } } @@ -670,15 +674,15 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) { u16 i; - if (vsi->type == ICE_VSI_VF) - goto setup_rings; - - ice_vsi_cfg_frame_size(vsi); -setup_rings: /* set up individual rings */ ice_for_each_rxq(vsi, i) { - int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]); + struct ice_rx_ring *ring = vsi->rx_rings[i]; + int err; + if (vsi->type != ICE_VSI_VF) + ice_vsi_cfg_frame_size(vsi, ring); + + err = ice_vsi_cfg_rxq(ring); if (err) return err; } @@ -903,8 +907,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, - ice_tlan_ctx_info); + ice_pack_txq_ctx(&tlan_ctx, &qg_buf->txqs[0].txq_ctx); /* init queue specific tail reg. It is referred as * transmit comm scheduler queue doorbell. diff --git a/drivers/net/ethernet/intel/ice/ice_cgu_regs.h b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h deleted file mode 100644 index 10d9d74f35..0000000000 --- a/drivers/net/ethernet/intel/ice/ice_cgu_regs.h +++ /dev/null @@ -1,181 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2018-2021, Intel Corporation. */ - -#ifndef _ICE_CGU_REGS_H_ -#define _ICE_CGU_REGS_H_ - -#define NAC_CGU_DWORD9 0x24 -union nac_cgu_dword9 { - struct { - u32 time_ref_freq_sel : 3; - u32 clk_eref1_en : 1; - u32 clk_eref0_en : 1; - u32 time_ref_en : 1; - u32 time_sync_en : 1; - u32 one_pps_out_en : 1; - u32 clk_ref_synce_en : 1; - u32 clk_synce1_en : 1; - u32 clk_synce0_en : 1; - u32 net_clk_ref1_en : 1; - u32 net_clk_ref0_en : 1; - u32 clk_synce1_amp : 2; - u32 misc6 : 1; - u32 clk_synce0_amp : 2; - u32 one_pps_out_amp : 2; - u32 misc24 : 12; - }; - u32 val; -}; - -#define NAC_CGU_DWORD16_E825C 0x40 -union nac_cgu_dword16_e825c { - struct { - u32 synce_remndr : 6; - u32 synce_phlmt_en : 1; - u32 misc13 : 17; - u32 tspll_ck_refclkfreq : 8; - }; - u32 val; -}; - -#define NAC_CGU_DWORD19 0x4c -union nac_cgu_dword19 { - struct { - u32 tspll_fbdiv_intgr : 8; - u32 fdpll_ulck_thr : 5; - u32 misc15 : 3; - u32 tspll_ndivratio : 4; - u32 tspll_iref_ndivratio : 3; - u32 misc19 : 1; - u32 japll_ndivratio : 4; - u32 japll_iref_ndivratio : 3; - u32 misc27 : 1; - }; - u32 val; -}; - -#define NAC_CGU_DWORD22 0x58 -union nac_cgu_dword22 { - struct { - u32 fdpll_frac_div_out_nc : 2; - u32 fdpll_lock_int_for : 1; - u32 synce_hdov_int_for : 1; - u32 synce_lock_int_for : 1; - u32 fdpll_phlead_slip_nc : 1; - u32 fdpll_acc1_ovfl_nc : 1; - u32 fdpll_acc2_ovfl_nc : 1; - u32 synce_status_nc : 6; - u32 fdpll_acc1f_ovfl : 1; - u32 misc18 : 1; - u32 fdpllclk_div : 4; - u32 time1588clk_div : 4; - u32 synceclk_div : 4; - u32 synceclk_sel_div2 : 1; - u32 fdpllclk_sel_div2 : 1; - u32 time1588clk_sel_div2 : 1; - u32 misc3 : 1; - }; - u32 val; -}; - -#define NAC_CGU_DWORD23_E825C 0x5C -union nac_cgu_dword23_e825c { - struct { - u32 cgupll_fbdiv_intgr : 10; - u32 ux56pll_fbdiv_intgr : 10; - u32 misc20 : 4; - u32 ts_pll_enable : 1; - u32 time_sync_tspll_align_sel : 1; - u32 ext_synce_sel : 1; - u32 ref1588_ck_div : 4; - u32 time_ref_sel : 1; - - }; - u32 val; -}; - -#define NAC_CGU_DWORD24 0x60 -union nac_cgu_dword24 { - struct { - u32 tspll_fbdiv_frac : 22; - u32 misc20 : 2; - u32 ts_pll_enable : 1; - u32 time_sync_tspll_align_sel : 1; - u32 ext_synce_sel : 1; - u32 ref1588_ck_div : 4; - u32 time_ref_sel : 1; - }; - u32 val; -}; - -#define TSPLL_CNTR_BIST_SETTINGS 0x344 -union tspll_cntr_bist_settings { - struct { - u32 i_irefgen_settling_time_cntr_7_0 : 8; - u32 i_irefgen_settling_time_ro_standby_1_0 : 2; - u32 reserved195 : 5; - u32 i_plllock_sel_0 : 1; - u32 i_plllock_sel_1 : 1; - u32 i_plllock_cnt_6_0 : 7; - u32 i_plllock_cnt_10_7 : 4; - u32 reserved200 : 4; - }; - u32 val; -}; - -#define TSPLL_RO_BWM_LF 0x370 -union tspll_ro_bwm_lf { - struct { - u32 bw_freqov_high_cri_7_0 : 8; - u32 bw_freqov_high_cri_9_8 : 2; - u32 biascaldone_cri : 1; - u32 plllock_gain_tran_cri : 1; - u32 plllock_true_lock_cri : 1; - u32 pllunlock_flag_cri : 1; - u32 afcerr_cri : 1; - u32 afcdone_cri : 1; - u32 feedfwrdgain_cal_cri_7_0 : 8; - u32 m2fbdivmod_cri_7_0 : 8; - }; - u32 val; -}; - -#define TSPLL_RO_LOCK_E825C 0x3f0 -union tspll_ro_lock_e825c { - struct { - u32 bw_freqov_high_cri_7_0 : 8; - u32 bw_freqov_high_cri_9_8 : 2; - u32 reserved455 : 1; - u32 plllock_gain_tran_cri : 1; - u32 plllock_true_lock_cri : 1; - u32 pllunlock_flag_cri : 1; - u32 afcerr_cri : 1; - u32 afcdone_cri : 1; - u32 feedfwrdgain_cal_cri_7_0 : 8; - u32 reserved462 : 8; - }; - u32 val; -}; - -#define TSPLL_BW_TDC_E825C 0x31c -union tspll_bw_tdc_e825c { - struct { - u32 i_tdc_offset_lock_1_0 : 2; - u32 i_bbthresh1_2_0 : 3; - u32 i_bbthresh2_2_0 : 3; - u32 i_tdcsel_1_0 : 2; - u32 i_tdcovccorr_en_h : 1; - u32 i_divretimeren : 1; - u32 i_bw_ampmeas_window : 1; - u32 i_bw_lowerbound_2_0 : 3; - u32 i_bw_upperbound_2_0 : 3; - u32 i_bw_mode_1_0 : 2; - u32 i_ft_mode_sel_2_0 : 3; - u32 i_bwphase_4_0 : 5; - u32 i_plllock_sel_1_0 : 2; - u32 i_afc_divratio : 1; - }; - u32 val; -}; - -#endif /* _ICE_CGU_REGS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 9a2e6b26d7..bdf6e661b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6,6 +6,7 @@ #include "ice_adminq_cmd.h" #include "ice_flow.h" #include "ice_ptp_hw.h" +#include #define ICE_PF_RESET_WAIT_COUNT 300 #define ICE_MAX_NETLIST_SIZE 10 @@ -185,7 +186,7 @@ static int ice_set_mac_type(struct ice_hw *hw) * ice_is_generic_mac - check if device's mac_type is generic * @hw: pointer to the hardware structure * - * Return: true if mac_type is generic (with SBQ support), false if not + * Return: true if mac_type is ICE_MAC_GENERIC*, false otherwise. */ bool ice_is_generic_mac(struct ice_hw *hw) { @@ -193,120 +194,6 @@ bool ice_is_generic_mac(struct ice_hw *hw) hw->mac_type == ICE_MAC_GENERIC_3K_E825); } -/** - * ice_is_e810 - * @hw: pointer to the hardware structure - * - * returns true if the device is E810 based, false if not. - */ -bool ice_is_e810(struct ice_hw *hw) -{ - return hw->mac_type == ICE_MAC_E810; -} - -/** - * ice_is_e810t - * @hw: pointer to the hardware structure - * - * returns true if the device is E810T based, false if not. - */ -bool ice_is_e810t(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E810C_SFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T: - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T4: - case ICE_SUBDEV_ID_E810T6: - case ICE_SUBDEV_ID_E810T7: - return true; - } - break; - case ICE_DEV_ID_E810C_QSFP: - switch (hw->subsystem_device_id) { - case ICE_SUBDEV_ID_E810T2: - case ICE_SUBDEV_ID_E810T3: - case ICE_SUBDEV_ID_E810T5: - return true; - } - break; - default: - break; - } - - return false; -} - -/** - * ice_is_e822 - Check if a device is E822 family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E822 based, false if not. - */ -bool ice_is_e822(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E822C_BACKPLANE: - case ICE_DEV_ID_E822C_QSFP: - case ICE_DEV_ID_E822C_SFP: - case ICE_DEV_ID_E822C_10G_BASE_T: - case ICE_DEV_ID_E822C_SGMII: - case ICE_DEV_ID_E822L_BACKPLANE: - case ICE_DEV_ID_E822L_SFP: - case ICE_DEV_ID_E822L_10G_BASE_T: - case ICE_DEV_ID_E822L_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e823 - * @hw: pointer to the hardware structure - * - * returns true if the device is E823-L or E823-C based, false if not. - */ -bool ice_is_e823(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E823L_BACKPLANE: - case ICE_DEV_ID_E823L_SFP: - case ICE_DEV_ID_E823L_10G_BASE_T: - case ICE_DEV_ID_E823L_1GBE: - case ICE_DEV_ID_E823L_QSFP: - case ICE_DEV_ID_E823C_BACKPLANE: - case ICE_DEV_ID_E823C_QSFP: - case ICE_DEV_ID_E823C_SFP: - case ICE_DEV_ID_E823C_10G_BASE_T: - case ICE_DEV_ID_E823C_SGMII: - return true; - default: - return false; - } -} - -/** - * ice_is_e825c - Check if a device is E825C family device - * @hw: pointer to the hardware structure - * - * Return: true if the device is E825-C based, false if not. - */ -bool ice_is_e825c(struct ice_hw *hw) -{ - switch (hw->device_id) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return true; - default: - return false; - } -} - /** * ice_is_pf_c827 - check if pf contains c827 phy * @hw: pointer to the hw struct @@ -1248,6 +1135,8 @@ int ice_init_hw(struct ice_hw *hw) } } + hw->lane_num = ice_get_phy_lane_number(hw); + return 0; err_unroll_fltr_mgmt_struct: ice_cleanup_fltr_mgmt_struct(hw); @@ -1434,39 +1323,31 @@ int ice_reset(struct ice_hw *hw, enum ice_reset_req req) } /** - * ice_copy_rxq_ctx_to_hw + * ice_copy_rxq_ctx_to_hw - Copy packed Rx queue context to HW registers * @hw: pointer to the hardware structure - * @ice_rxq_ctx: pointer to the rxq context + * @rxq_ctx: pointer to the packed Rx queue context * @rxq_index: the index of the Rx queue - * - * Copies rxq context from dense structure to HW register space */ -static int -ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index) +static void ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, + const ice_rxq_ctx_buf_t *rxq_ctx, + u32 rxq_index) { - u8 i; - - if (!ice_rxq_ctx) - return -EINVAL; - - if (rxq_index > QRX_CTRL_MAX_INDEX) - return -EINVAL; - /* Copy each dword separately to HW */ - for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { - wr32(hw, QRX_CONTEXT(i, rxq_index), - *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + for (int i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { + u32 ctx = ((const u32 *)rxq_ctx)[i]; - ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, - *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + wr32(hw, QRX_CONTEXT(i, rxq_index), ctx); + + ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, ctx); } - - return 0; } +#define ICE_CTX_STORE(struct_name, struct_field, width, lsb) \ + PACKED_FIELD((lsb) + (width) - 1, (lsb), struct struct_name, struct_field) + /* LAN Rx Queue Context */ -static const struct ice_ctx_ele ice_rlan_ctx_info[] = { - /* Field Width LSB */ +static const struct packed_field_u8 ice_rlan_ctx_fields[] = { + /* Field Width LSB */ ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0), ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13), ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32), @@ -1487,35 +1368,50 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196), ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198), ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201), - { 0 } }; /** - * ice_write_rxq_ctx + * ice_pack_rxq_ctx - Pack Rx queue context into a HW buffer + * @ctx: the Rx queue context to pack + * @buf: the HW buffer to pack into + * + * Pack the Rx queue context from the CPU-friendly unpacked buffer into its + * bit-packed HW layout. + */ +static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx, + ice_rxq_ctx_buf_t *buf) +{ + pack_fields(buf, sizeof(*buf), ctx, ice_rlan_ctx_fields, + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST); +} + +/** + * ice_write_rxq_ctx - Write Rx Queue context to hardware * @hw: pointer to the hardware structure - * @rlan_ctx: pointer to the rxq context + * @rlan_ctx: pointer to the unpacked Rx queue context * @rxq_index: the index of the Rx queue * - * Converts rxq context from sparse to dense structure and then writes - * it to HW register space and enables the hardware to prefetch descriptors - * instead of only fetching them on demand + * Pack the sparse Rx Queue context into dense hardware format and write it + * into the HW register space. + * + * Return: 0 on success, or -EINVAL if the Rx queue index is invalid. */ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index) { - u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; + ice_rxq_ctx_buf_t buf = {}; - if (!rlan_ctx) + if (rxq_index > QRX_CTRL_MAX_INDEX) return -EINVAL; - rlan_ctx->prefena = 1; + ice_pack_rxq_ctx(rlan_ctx, &buf); + ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); - ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); - return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); + return 0; } /* LAN Tx Queue Context */ -const struct ice_ctx_ele ice_tlan_ctx_info[] = { +static const struct packed_field_u8 ice_tlan_ctx_fields[] = { /* Field Width LSB */ ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0), ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57), @@ -1544,10 +1440,22 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), - ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171), - { 0 } }; +/** + * ice_pack_txq_ctx - Pack Tx queue context into a HW buffer + * @ctx: the Tx queue context to pack + * @buf: the HW buffer to pack into + * + * Pack the Tx queue context from the CPU-friendly unpacked buffer into its + * bit-packed HW layout. + */ +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf) +{ + pack_fields(buf, sizeof(*buf), ctx, ice_tlan_ctx_fields, + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST); +} + /* Sideband Queue command wrappers */ /** @@ -2251,7 +2159,8 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, caps->nvm_unified_update); break; case ICE_AQC_CAPS_RDMA: - caps->rdma = (number == 1); + if (IS_ENABLED(CONFIG_INFINIBAND_IRDMA)) + caps->rdma = (number == 1); ice_debug(hw, ICE_DBG_INIT, "%s: rdma = %d\n", prefix, caps->rdma); break; case ICE_AQC_CAPS_MAX_MTU: @@ -2388,16 +2297,16 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0); info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0); - if (!ice_is_e825c(hw)) { + if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) { info->clk_freq = FIELD_GET(ICE_TS_CLK_FREQ_M, number); info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0); } else { - info->clk_freq = ICE_TIME_REF_FREQ_156_250; - info->clk_src = ICE_CLK_SRC_TCXO; + info->clk_freq = ICE_TSPLL_FREQ_156_250; + info->clk_src = ICE_CLK_SRC_TIME_REF; } - if (info->clk_freq < NUM_ICE_TIME_REF_FREQ) { - info->time_ref = (enum ice_time_ref_freq)info->clk_freq; + if (info->clk_freq < NUM_ICE_TSPLL_FREQ) { + info->time_ref = (enum ice_tspll_freq)info->clk_freq; } else { /* Unknown clock frequency, so assume a (probably incorrect) * default to avoid out-of-bounds look ups of frequency @@ -2405,7 +2314,7 @@ ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, */ ice_debug(hw, ICE_DBG_INIT, "1588 func caps: unknown clock frequency %u\n", info->clk_freq); - info->time_ref = ICE_TIME_REF_FREQ_25_000; + info->time_ref = ICE_TSPLL_FREQ_25_000; } ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n", @@ -2513,6 +2422,25 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, ice_recalc_port_limited_caps(hw, &func_p->common_cap); } +/** + * ice_func_id_to_logical_id - map from function id to logical pf id + * @active_function_bitmap: active function bitmap + * @pf_id: function number of device + * + * Return: logical PF ID. + */ +static int ice_func_id_to_logical_id(u32 active_function_bitmap, u8 pf_id) +{ + u8 logical_id = 0; + u8 i; + + for (i = 0; i < pf_id; i++) + if (active_function_bitmap & BIT(i)) + logical_id++; + + return logical_id; +} + /** * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps * @hw: pointer to the HW struct @@ -2530,6 +2458,8 @@ ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, dev_p->num_funcs = hweight32(number); ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n", dev_p->num_funcs); + + hw->logical_pf_id = ice_func_id_to_logical_id(number, hw->pf_id); } /** @@ -3506,7 +3436,7 @@ int ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, msg.msg_addr_low = lower_16_bits(reg_offset); msg.msg_addr_high = receiver_id; msg.opcode = ice_sbq_msg_rd; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; err = ice_sbq_rw_reg(hw, &msg, flag); if (err) @@ -4117,6 +4047,59 @@ ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } +/** + * ice_get_phy_lane_number - Get PHY lane number for current adapter + * @hw: pointer to the hw struct + * + * Return: PHY lane number on success, negative error code otherwise. + */ +int ice_get_phy_lane_number(struct ice_hw *hw) +{ + struct ice_aqc_get_port_options_elem *options; + unsigned int lport = 0; + unsigned int lane; + int err; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); + if (!options) + return -ENOMEM; + + for (lane = 0; lane < ICE_MAX_PORT_PER_PCI_DEV; lane++) { + u8 options_count = ICE_AQC_PORT_OPT_MAX; + u8 speed, active_idx, pending_idx; + bool active_valid, pending_valid; + + err = ice_aq_get_port_options(hw, options, &options_count, lane, + true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (err) + goto err; + + if (!active_valid) + continue; + + speed = options[active_idx].max_lane_speed; + /* If we don't get speed for this lane, it's unoccupied */ + if (speed > ICE_AQC_PORT_OPT_MAX_LANE_200G) + continue; + + if (hw->pf_id == lport) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && + ice_is_dual(hw) && !ice_is_primary(hw)) + lane += ICE_PORTS_PER_QUAD; + kfree(options); + return lane; + } + lport++; + } + + /* PHY lane not found */ + err = -ENXIO; +err: + kfree(options); + return err; +} + /** * ice_aq_sff_eeprom * @hw: pointer to the HW struct @@ -4579,205 +4562,6 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps, /* End of FW Admin Queue command wrappers */ -/** - * ice_pack_ctx_byte - write a byte to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u8 src_byte, dest_byte, mask; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - src_byte = *from; - src_byte <<= shift_width; - src_byte &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_byte, dest, sizeof(dest_byte)); - - dest_byte &= ~mask; /* get the bits not changing */ - dest_byte |= src_byte; /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_byte, sizeof(dest_byte)); -} - -/** - * ice_pack_ctx_word - write a word to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u16 src_word, mask; - __le16 dest_word; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_word = *(u16 *)from; - src_word <<= shift_width; - src_word &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_word, dest, sizeof(dest_word)); - - dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */ - dest_word |= cpu_to_le16(src_word); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_word, sizeof(dest_word)); -} - -/** - * ice_pack_ctx_dword - write a dword to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u32 src_dword, mask; - __le32 dest_dword; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_dword = *(u32 *)from; - src_dword <<= shift_width; - src_dword &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_dword, dest, sizeof(dest_dword)); - - dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */ - dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_dword, sizeof(dest_dword)); -} - -/** - * ice_pack_ctx_qword - write a qword to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u64 src_qword, mask; - __le64 dest_qword; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_qword = *(u64 *)from; - src_qword <<= shift_width; - src_qword &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_qword, dest, sizeof(dest_qword)); - - dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */ - dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_qword, sizeof(dest_qword)); -} - -/** - * ice_set_ctx - set context bits in packed structure - * @hw: pointer to the hardware structure - * @src_ctx: pointer to a generic non-packed context structure - * @dest_ctx: pointer to memory for the packed structure - * @ce_info: List of Rx context elements - */ -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - int f; - - for (f = 0; ce_info[f].width; f++) { - /* We have to deal with each element of the FW response - * using the correct size so that we are correct regardless - * of the endianness of the machine. - */ - if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) { - ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n", - f, ce_info[f].width, ce_info[f].size_of); - continue; - } - switch (ce_info[f].size_of) { - case sizeof(u8): - ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u16): - ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u32): - ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u64): - ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]); - break; - default: - return -EINVAL; - } - } - - return 0; -} - /** * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC * @hw: pointer to the HW struct @@ -5871,6 +5655,96 @@ ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } +/** + * ice_get_pca9575_handle - find and return the PCA9575 controller + * @hw: pointer to the hw struct + * @pca9575_handle: GPIO controller's handle + * + * Find and return the GPIO controller's handle in the netlist. + * When found - the value will be cached in the hw structure and following calls + * will return cached value. + * + * Return: 0 on success, -ENXIO when there's no PCA9575 present. + */ +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) +{ + struct ice_aqc_get_link_topo *cmd; + struct ice_aq_desc desc; + int err; + u8 idx; + + /* If handle was read previously return cached value */ + if (hw->io_expander_handle) { + *pca9575_handle = hw->io_expander_handle; + return 0; + } + +#define SW_PCA9575_SFP_TOPO_IDX 2 +#define SW_PCA9575_QSFP_TOPO_IDX 1 + + /* Check if the SW IO expander controlling SMA exists in the netlist. */ + if (hw->device_id == ICE_DEV_ID_E810C_SFP) + idx = SW_PCA9575_SFP_TOPO_IDX; + else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) + idx = SW_PCA9575_QSFP_TOPO_IDX; + else + return -ENXIO; + + /* If handle was not detected read it from the netlist */ + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + cmd = &desc.params.get_link_topo; + cmd->addr.topo_params.node_type_ctx = + ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL; + cmd->addr.topo_params.index = idx; + + err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (err) + return -ENXIO; + + /* Verify if we found the right IO expander type */ + if (desc.params.get_link_topo.node_part_num != + ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) + return -ENXIO; + + /* If present save the handle and return it */ + hw->io_expander_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + *pca9575_handle = hw->io_expander_handle; + + return 0; +} + +/** + * ice_read_pca9575_reg - read the register from the PCA9575 controller + * @hw: pointer to the hw struct + * @offset: GPIO controller register offset + * @data: pointer to data to be read from the GPIO controller + * + * Return: 0 on success, negative error code otherwise. + */ +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) +{ + struct ice_aqc_link_topo_addr link_topo; + __le16 addr; + u16 handle; + int err; + + memset(&link_topo, 0, sizeof(link_topo)); + + err = ice_get_pca9575_handle(hw, &handle); + if (err) + return err; + + link_topo.handle = cpu_to_le16(handle); + link_topo.topo_params.node_type_ctx = + FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, + ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); + + addr = cpu_to_le16((u16)offset); + + return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); +} + /** * ice_aq_set_gpio * @hw: pointer to the hw struct @@ -6053,6 +5927,44 @@ bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps) return false; } +/** + * ice_is_fw_health_report_supported - checks if firmware supports health events + * @hw: pointer to the hardware structure + * + * Return: true if firmware supports health status reports, + * false otherwise + */ +bool ice_is_fw_health_report_supported(struct ice_hw *hw) +{ + return ice_is_fw_api_min_ver(hw, ICE_FW_API_HEALTH_REPORT_MAJ, + ICE_FW_API_HEALTH_REPORT_MIN, + ICE_FW_API_HEALTH_REPORT_PATCH); +} + +/** + * ice_aq_set_health_status_cfg - Configure FW health events + * @hw: pointer to the HW struct + * @event_source: type of diagnostic events to enable + * + * Configure the health status event types that the firmware will send to this + * PF. The supported event types are: PF-specific, all PFs, and global. + * + * Return: 0 on success, negative error code otherwise. + */ +int ice_aq_set_health_status_cfg(struct ice_hw *hw, u8 event_source) +{ + struct ice_aqc_set_health_status_cfg *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_health_status_cfg; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_health_status_cfg); + + cmd->event_source = event_source; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + /** * ice_aq_set_lldp_mib - Set the LLDP MIB * @hw: pointer to the HW struct @@ -6188,3 +6100,64 @@ u32 ice_get_link_speed(u16 index) return ice_aq_to_link_speed[index]; } + +/** + * ice_read_cgu_reg - Read a CGU register + * @hw: Pointer to the HW struct + * @addr: Register address to read + * @val: Storage for register value read + * + * Read the contents of a register of the Clock Generation Unit. Only + * applicable to E82X devices. + * + * Return: 0 on success, other error codes when failed to read from CGU. + */ +int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) +{ + struct ice_sbq_msg_input cgu_msg = { + .opcode = ice_sbq_msg_rd, + .dest_dev = ice_sbq_dev_cgu, + .msg_addr_low = addr + }; + int err; + + err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n", + addr, err); + return err; + } + + *val = cgu_msg.data; + + return 0; +} + +/** + * ice_write_cgu_reg - Write a CGU register + * @hw: Pointer to the HW struct + * @addr: Register address to write + * @val: Value to write into the register + * + * Write the specified value to a register of the Clock Generation Unit. Only + * applicable to E82X devices. + * + * Return: 0 on success, other error codes when failed to write to CGU. + */ +int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val) +{ + struct ice_sbq_msg_input cgu_msg = { + .opcode = ice_sbq_msg_wr, + .dest_dev = ice_sbq_dev_cgu, + .msg_addr_low = addr, + .data = val + }; + int err; + + err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); + if (err) + ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n", + addr, err); + + return err; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 3b739a7fe4..c695b0bd75 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -39,6 +39,47 @@ #define FEC_RECEIVER_ID_PCS0 (0x33 << FEC_RECV_ID_SHIFT) #define FEC_RECEIVER_ID_PCS1 (0x34 << FEC_RECV_ID_SHIFT) +#define ICE_CGU_R9 0x24 +#define ICE_CGU_R9_TIME_REF_FREQ_SEL GENMASK(2, 0) +#define ICE_CGU_R9_CLK_EREF0_EN BIT(4) +#define ICE_CGU_R9_TIME_REF_EN BIT(5) +#define ICE_CGU_R9_TIME_SYNC_EN BIT(6) +#define ICE_CGU_R9_ONE_PPS_OUT_EN BIT(7) +#define ICE_CGU_R9_ONE_PPS_OUT_AMP GENMASK(19, 18) + +#define ICE_CGU_R16 0x40 +#define ICE_CGU_R16_TSPLL_CK_REFCLKFREQ GENMASK(31, 24) + +#define ICE_CGU_R19 0x4C +#define ICE_CGU_R19_TSPLL_FBDIV_INTGR_E82X GENMASK(7, 0) +#define ICE_CGU_R19_TSPLL_FBDIV_INTGR_E825 GENMASK(9, 0) +#define ICE_CGU_R19_TSPLL_NDIVRATIO GENMASK(19, 16) + +#define ICE_CGU_R22 0x58 +#define ICE_CGU_R22_TIME1588CLK_DIV GENMASK(23, 20) +#define ICE_CGU_R22_TIME1588CLK_DIV2 BIT(30) + +#define ICE_CGU_R23 0x5C +#define ICE_CGU_R24 0x60 +#define ICE_CGU_R24_FBDIV_FRAC GENMASK(21, 0) +#define ICE_CGU_R23_R24_TSPLL_ENABLE BIT(24) +#define ICE_CGU_R23_R24_REF1588_CK_DIV GENMASK(30, 27) +#define ICE_CGU_R23_R24_TIME_REF_SEL BIT(31) + +#define ICE_CGU_BW_TDC 0x31C +#define ICE_CGU_BW_TDC_PLLLOCK_SEL GENMASK(30, 29) + +#define ICE_CGU_RO_LOCK 0x3F0 +#define ICE_CGU_RO_LOCK_TRUE_LOCK BIT(12) +#define ICE_CGU_RO_LOCK_UNLOCK BIT(13) + +#define ICE_CGU_CNTR_BIST 0x344 +#define ICE_CGU_CNTR_BIST_PLLLOCK_SEL_0 BIT(15) +#define ICE_CGU_CNTR_BIST_PLLLOCK_SEL_1 BIT(16) + +#define ICE_CGU_RO_BWM_LF 0x370 +#define ICE_CGU_RO_BWM_LF_TRUE_LOCK BIT(12) + int ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); int ice_check_reset(struct ice_hw *hw); @@ -92,9 +133,8 @@ ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle, bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); -extern const struct ice_ctx_ele ice_tlan_ctx_info[]; -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info); + +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf); extern struct mutex ice_global_cfg_lock_sw; @@ -132,7 +172,6 @@ int ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, struct ice_sq_cd *cd); bool ice_is_generic_mac(struct ice_hw *hw); -bool ice_is_e810(struct ice_hw *hw); int ice_clear_pf_cfg(struct ice_hw *hw); int ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi, @@ -142,6 +181,8 @@ int ice_get_link_default_override(struct ice_link_default_override_tlv *ldo, struct ice_port_info *pi); bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps); +bool ice_is_fw_health_report_supported(struct ice_hw *hw); +int ice_aq_set_health_status_cfg(struct ice_hw *hw, u8 event_source); int ice_aq_get_phy_equalization(struct ice_hw *hw, u16 data_in, u16 op_code, u8 serdes_num, int *output); int @@ -192,6 +233,7 @@ ice_aq_get_port_options(struct ice_hw *hw, int ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, u8 new_option); +int ice_get_phy_lane_number(struct ice_hw *hw); int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, @@ -274,10 +316,6 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); -bool ice_is_e810t(struct ice_hw *hw); -bool ice_is_e822(struct ice_hw *hw); -bool ice_is_e823(struct ice_hw *hw); -bool ice_is_e825c(struct ice_hw *hw); int ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); @@ -304,5 +342,9 @@ int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd); +int ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle); +int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); +int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val); +int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 272fd823a8..59323c0195 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1210,6 +1210,131 @@ ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, return status; } +/** + * ice_is_buffer_metadata - determine if package buffer is a metadata buffer + * @buf: pointer to buffer header + * Return: whether given @buf is a metadata one. + */ +static bool ice_is_buffer_metadata(struct ice_buf_hdr *buf) +{ + return le32_to_cpu(buf->section_entry[0].type) & ICE_METADATA_BUF; +} + +/** + * struct ice_ddp_send_ctx - sending context of current DDP segment + * @hw: pointer to the hardware struct + * + * Keeps current sending state (header, error) for the purpose of proper "last" + * bit setting in ice_aq_download_pkg(). Use via calls to ice_ddp_send_hunk(). + */ +struct ice_ddp_send_ctx { + struct ice_hw *hw; +/* private: only for ice_ddp_send_hunk() */ + struct ice_buf_hdr *hdr; + int err; +}; + +static void ice_ddp_send_ctx_set_err(struct ice_ddp_send_ctx *ctx, int err) +{ + ctx->err = err; +} + +/** + * ice_ddp_send_hunk - send one hunk of data to FW + * @ctx: current segment sending context + * @hunk: next hunk to send, size is always ICE_PKG_BUF_SIZE + * + * Send the next hunk of data to FW, retrying if needed. + * + * Notice: must be called once more with a NULL @hunk to finish up; such call + * will set up the "last" bit of an AQ request. After such call @ctx.hdr is + * cleared, @hw is still valid. + * + * Return: %ICE_DDP_PKG_SUCCESS if there were no problems; a sticky @err + * otherwise. + */ +static enum ice_ddp_state ice_ddp_send_hunk(struct ice_ddp_send_ctx *ctx, + struct ice_buf_hdr *hunk) +{ + struct ice_buf_hdr *prev_hunk = ctx->hdr; + struct ice_hw *hw = ctx->hw; + bool prev_was_last = !hunk; + enum ice_aq_err aq_err; + u32 offset, info; + int attempt, err; + + if (ctx->err) + return ctx->err; + + ctx->hdr = hunk; + if (!prev_hunk) + return ICE_DDP_PKG_SUCCESS; /* no problem so far */ + + for (attempt = 0; attempt < 5; attempt++) { + if (attempt) + msleep(20); + + err = ice_aq_download_pkg(hw, prev_hunk, ICE_PKG_BUF_SIZE, + prev_was_last, &offset, &info, NULL); + + aq_err = hw->adminq.sq_last_status; + if (aq_err != ICE_AQ_RC_ENOSEC && aq_err != ICE_AQ_RC_EBADSIG) + break; + } + + if (err) { + ice_debug(hw, ICE_DBG_PKG, "Pkg download failed: err %d off %d inf %d\n", + err, offset, info); + ctx->err = ice_map_aq_err_to_ddp_state(aq_err); + } else if (attempt) { + dev_dbg(ice_hw_to_dev(hw), + "ice_aq_download_pkg number of retries: %d\n", attempt); + } + + return ctx->err; +} + +/** + * ice_dwnld_cfg_bufs_no_lock + * @ctx: context of the current buffers section to send + * @bufs: pointer to an array of buffers + * @start: buffer index of first buffer to download + * @count: the number of buffers to download + * + * Downloads package configuration buffers to the firmware. Metadata buffers + * are skipped, and the first metadata buffer found indicates that the rest + * of the buffers are all metadata buffers. + */ +static enum ice_ddp_state +ice_dwnld_cfg_bufs_no_lock(struct ice_ddp_send_ctx *ctx, struct ice_buf *bufs, + u32 start, u32 count) +{ + struct ice_buf_hdr *bh; + enum ice_ddp_state err; + + if (!bufs || !count) { + ice_ddp_send_ctx_set_err(ctx, ICE_DDP_PKG_ERR); + return ICE_DDP_PKG_ERR; + } + + bufs += start; + + for (int i = 0; i < count; i++, bufs++) { + bh = (struct ice_buf_hdr *)bufs; + /* Metadata buffers should not be sent to FW, + * their presence means "we are done here". + */ + if (ice_is_buffer_metadata(bh)) + break; + + err = ice_ddp_send_hunk(ctx, bh); + if (err) + return err; + } + + return 0; +} + /** * ice_get_pkg_seg_by_idx * @pkg_hdr: pointer to the package header to be searched @@ -1269,137 +1394,21 @@ ice_is_signing_seg_type_at_idx(struct ice_pkg_hdr *pkg_hdr, u32 idx, return false; } -/** - * ice_is_buffer_metadata - determine if package buffer is a metadata buffer - * @buf: pointer to buffer header - */ -static bool ice_is_buffer_metadata(struct ice_buf_hdr *buf) -{ - if (le32_to_cpu(buf->section_entry[0].type) & ICE_METADATA_BUF) - return true; - - return false; -} - -/** - * ice_is_last_download_buffer - * @buf: pointer to current buffer header - * @idx: index of the buffer in the current sequence - * @count: the buffer count in the current sequence - * - * Note: this routine should only be called if the buffer is not the last buffer - */ -static bool -ice_is_last_download_buffer(struct ice_buf_hdr *buf, u32 idx, u32 count) -{ - struct ice_buf *next_buf; - - if ((idx + 1) == count) - return true; - - /* A set metadata flag in the next buffer will signal that the current - * buffer will be the last buffer downloaded - */ - next_buf = ((struct ice_buf *)buf) + 1; - - return ice_is_buffer_metadata((struct ice_buf_hdr *)next_buf); -} - -/** - * ice_dwnld_cfg_bufs_no_lock - * @hw: pointer to the hardware structure - * @bufs: pointer to an array of buffers - * @start: buffer index of first buffer to download - * @count: the number of buffers to download - * @indicate_last: if true, then set last buffer flag on last buffer download - * - * Downloads package configuration buffers to the firmware. Metadata buffers - * are skipped, and the first metadata buffer found indicates that the rest - * of the buffers are all metadata buffers. - */ -static enum ice_ddp_state -ice_dwnld_cfg_bufs_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 start, - u32 count, bool indicate_last) -{ - enum ice_ddp_state state = ICE_DDP_PKG_SUCCESS; - struct ice_buf_hdr *bh; - enum ice_aq_err err; - u32 offset, info, i; - - if (!bufs || !count) - return ICE_DDP_PKG_ERR; - - /* If the first buffer's first section has its metadata bit set - * then there are no buffers to be downloaded, and the operation is - * considered a success. - */ - bh = (struct ice_buf_hdr *)(bufs + start); - if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF) - return ICE_DDP_PKG_SUCCESS; - - for (i = 0; i < count; i++) { - bool last = false; - int try_cnt = 0; - int status; - - bh = (struct ice_buf_hdr *)(bufs + start + i); - - if (indicate_last) - last = ice_is_last_download_buffer(bh, i, count); - - while (1) { - status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, - last, &offset, &info, - NULL); - if (hw->adminq.sq_last_status != ICE_AQ_RC_ENOSEC && - hw->adminq.sq_last_status != ICE_AQ_RC_EBADSIG) - break; - - try_cnt++; - - if (try_cnt == 5) - break; - - msleep(20); - } - - if (try_cnt) - dev_dbg(ice_hw_to_dev(hw), - "ice_aq_download_pkg number of retries: %d\n", - try_cnt); - - /* Save AQ status from download package */ - if (status) { - ice_debug(hw, ICE_DBG_PKG, "Pkg download failed: err %d off %d inf %d\n", - status, offset, info); - err = hw->adminq.sq_last_status; - state = ice_map_aq_err_to_ddp_state(err); - break; - } - - if (last) - break; - } - - return state; -} - /** * ice_download_pkg_sig_seg - download a signature segment - * @hw: pointer to the hardware structure + * @ctx: context of the current buffers section to send * @seg: pointer to signature segment */ static enum ice_ddp_state -ice_download_pkg_sig_seg(struct ice_hw *hw, struct ice_sign_seg *seg) +ice_download_pkg_sig_seg(struct ice_ddp_send_ctx *ctx, struct ice_sign_seg *seg) { - return ice_dwnld_cfg_bufs_no_lock(hw, seg->buf_tbl.buf_array, 0, - le32_to_cpu(seg->buf_tbl.buf_count), - false); + return ice_dwnld_cfg_bufs_no_lock(ctx, seg->buf_tbl.buf_array, 0, + le32_to_cpu(seg->buf_tbl.buf_count)); } /** * ice_download_pkg_config_seg - download a config segment - * @hw: pointer to the hardware structure + * @ctx: context of the current buffers section to send * @pkg_hdr: pointer to package header * @idx: segment index * @start: starting buffer @@ -1408,8 +1417,9 @@ ice_download_pkg_sig_seg(struct ice_hw *hw, struct ice_sign_seg *seg) * Note: idx must reference a ICE segment */ static enum ice_ddp_state -ice_download_pkg_config_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, - u32 idx, u32 start, u32 count) +ice_download_pkg_config_seg(struct ice_ddp_send_ctx *ctx, + struct ice_pkg_hdr *pkg_hdr, u32 idx, u32 start, + u32 count) { struct ice_buf_table *bufs; struct ice_seg *seg; @@ -1425,46 +1435,56 @@ ice_download_pkg_config_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, if (start >= buf_count || start + count > buf_count) return ICE_DDP_PKG_ERR; - return ice_dwnld_cfg_bufs_no_lock(hw, bufs->buf_array, start, count, - true); + return ice_dwnld_cfg_bufs_no_lock(ctx, bufs->buf_array, start, count); +} + +static bool ice_is_last_sign_seg(u32 flags) +{ + return !(flags & ICE_SIGN_SEG_FLAGS_VALID) || /* behavior prior to valid */ + (flags & ICE_SIGN_SEG_FLAGS_LAST); } /** * ice_dwnld_sign_and_cfg_segs - download a signing segment and config segment - * @hw: pointer to the hardware structure + * @ctx: context of the current buffers section to send * @pkg_hdr: pointer to package header * @idx: segment index (must be a signature segment) * * Note: idx must reference a signature segment */ static enum ice_ddp_state -ice_dwnld_sign_and_cfg_segs(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, - u32 idx) +ice_dwnld_sign_and_cfg_segs(struct ice_ddp_send_ctx *ctx, + struct ice_pkg_hdr *pkg_hdr, u32 idx) { + u32 conf_idx, start, count, flags; enum ice_ddp_state state; struct ice_sign_seg *seg; - u32 conf_idx; - u32 start; - u32 count; seg = (struct ice_sign_seg *)ice_get_pkg_seg_by_idx(pkg_hdr, idx); if (!seg) { state = ICE_DDP_PKG_ERR; - goto exit; + ice_ddp_send_ctx_set_err(ctx, state); + return state; } count = le32_to_cpu(seg->signed_buf_count); - state = ice_download_pkg_sig_seg(hw, seg); + state = ice_download_pkg_sig_seg(ctx, seg); if (state || !count) - goto exit; + return state; conf_idx = le32_to_cpu(seg->signed_seg_idx); start = le32_to_cpu(seg->signed_buf_start); - state = ice_download_pkg_config_seg(hw, pkg_hdr, conf_idx, start, + state = ice_download_pkg_config_seg(ctx, pkg_hdr, conf_idx, start, count); -exit: + /* finish up by sending last hunk with "last" flag set if requested by + * DDP content + */ + flags = le32_to_cpu(seg->flags); + if (ice_is_last_sign_seg(flags)) + state = ice_ddp_send_hunk(ctx, NULL); + return state; } @@ -1519,6 +1539,7 @@ ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) { enum ice_aq_err aq_err = hw->adminq.sq_last_status; enum ice_ddp_state state = ICE_DDP_PKG_ERR; + struct ice_ddp_send_ctx ctx = { .hw = hw }; int status; u32 i; @@ -1539,7 +1560,7 @@ ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) hw->pkg_sign_type)) continue; - state = ice_dwnld_sign_and_cfg_segs(hw, pkg_hdr, i); + state = ice_dwnld_sign_and_cfg_segs(&ctx, pkg_hdr, i); if (state) break; } @@ -1564,6 +1585,7 @@ ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) static enum ice_ddp_state ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) { + struct ice_ddp_send_ctx ctx = { .hw = hw }; enum ice_ddp_state state; struct ice_buf_hdr *bh; int status; @@ -1576,7 +1598,7 @@ ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) * considered a success. */ bh = (struct ice_buf_hdr *)bufs; - if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF) + if (ice_is_buffer_metadata(bh)) return ICE_DDP_PKG_SUCCESS; status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE); @@ -1586,7 +1608,9 @@ ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status); } - state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true); + ice_dwnld_cfg_bufs_no_lock(&ctx, bufs, 0, count); + /* finish up by sending last hunk with "last" flag set */ + state = ice_ddp_send_hunk(&ctx, NULL); if (!state) state = ice_post_dwnld_pkg_actions(hw); @@ -2321,15 +2345,15 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (ice_is_e825c(hw)) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; - } - if (!ice_is_e825c(hw)) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + if (hw->mac_type == ICE_MAC_E810 || + hw->mac_type == ICE_MAC_GENERIC) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (status) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h index 79551da2a4..8a2d57fc5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.h +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h @@ -181,7 +181,10 @@ struct ice_sign_seg { __le32 signed_seg_idx; __le32 signed_buf_start; __le32 signed_buf_count; -#define ICE_SIGN_SEG_RESERVED_COUNT 44 +#define ICE_SIGN_SEG_FLAGS_VALID 0x80000000 +#define ICE_SIGN_SEG_FLAGS_LAST 0x00000001 + __le32 flags; +#define ICE_SIGN_SEG_RESERVED_COUNT 40 u8 reserved[ICE_SIGN_SEG_RESERVED_COUNT]; struct ice_buf_table buf_tbl; }; diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index d5ad6d8400..fcae6022ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -95,7 +95,7 @@ ice_dpll_pin_freq_set(struct ice_pf *pf, struct ice_dpll_pin *pin, } if (ret) { NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to set pin freq:%u on pin:%u\n", + "err:%d %s failed to set pin freq:%u on pin:%u", ret, ice_aq_str(pf->hw.adminq.sq_last_status), freq, pin->idx); @@ -322,7 +322,7 @@ ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, } if (ret) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to enable %s pin:%u\n", + "err:%d %s failed to enable %s pin:%u", ret, ice_aq_str(hw->adminq.sq_last_status), pin_type_name[pin_type], pin->idx); @@ -367,7 +367,7 @@ ice_dpll_pin_disable(struct ice_hw *hw, struct ice_dpll_pin *pin, } if (ret) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to disable %s pin:%u\n", + "err:%d %s failed to disable %s pin:%u", ret, ice_aq_str(hw->adminq.sq_last_status), pin_type_name[pin_type], pin->idx); @@ -479,7 +479,7 @@ ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, err: if (extack) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to update %s pin:%u\n", + "err:%d %s failed to update %s pin:%u", ret, ice_aq_str(pf->hw.adminq.sq_last_status), pin_type_name[pin_type], pin->idx); @@ -518,7 +518,7 @@ ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll, (u8)prio); if (ret) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to set pin prio:%u on pin:%u\n", + "err:%d %s failed to set pin prio:%u on pin:%u", ret, ice_aq_str(pf->hw.adminq.sq_last_status), prio, pin->idx); @@ -1004,7 +1004,7 @@ ice_dpll_pin_phase_adjust_set(const struct dpll_pin *pin, void *pin_priv, mutex_unlock(&pf->dplls.lock); if (ret) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to set pin phase_adjust:%d for pin:%u on dpll:%u\n", + "err:%d %s failed to set pin phase_adjust:%d for pin:%u on dpll:%u", ret, ice_aq_str(pf->hw.adminq.sq_last_status), phase_adjust, p->idx, d->dpll_idx); @@ -1362,7 +1362,7 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, &p->freq); if (ret) NL_SET_ERR_MSG_FMT(extack, - "err:%d %s failed to set pin state:%u for pin:%u on parent:%u\n", + "err:%d %s failed to set pin state:%u for pin:%u on parent:%u", ret, ice_aq_str(pf->hw.adminq.sq_last_status), state, p->idx, parent->idx); @@ -2064,6 +2064,18 @@ static int ice_dpll_init_worker(struct ice_pf *pf) return 0; } +/** + * ice_dpll_phase_range_set - initialize phase adjust range helper + * @range: pointer to phase adjust range struct to be initialized + * @phase_adj: a value to be used as min(-)/max(+) boundary + */ +static void ice_dpll_phase_range_set(struct dpll_pin_phase_adjust_range *range, + u32 phase_adj) +{ + range->min = -phase_adj; + range->max = phase_adj; +} + /** * ice_dpll_init_info_pins_generic - initializes generic pins info * @pf: board private structure @@ -2105,8 +2117,8 @@ static int ice_dpll_init_info_pins_generic(struct ice_pf *pf, bool input) for (i = 0; i < pin_num; i++) { pins[i].idx = i; pins[i].prop.board_label = labels[i]; - pins[i].prop.phase_range.min = phase_adj_max; - pins[i].prop.phase_range.max = -phase_adj_max; + ice_dpll_phase_range_set(&pins[i].prop.phase_range, + phase_adj_max); pins[i].prop.capabilities = cap; pins[i].pf = pf; ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL); @@ -2152,6 +2164,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, struct ice_hw *hw = &pf->hw; struct ice_dpll_pin *pins; unsigned long caps; + u32 phase_adj_max; u8 freq_supp_num; bool input; @@ -2159,11 +2172,13 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, case ICE_DPLL_PIN_TYPE_INPUT: pins = pf->dplls.inputs; num_pins = pf->dplls.num_inputs; + phase_adj_max = pf->dplls.input_phase_adj_max; input = true; break; case ICE_DPLL_PIN_TYPE_OUTPUT: pins = pf->dplls.outputs; num_pins = pf->dplls.num_outputs; + phase_adj_max = pf->dplls.output_phase_adj_max; input = false; break; default: @@ -2188,19 +2203,13 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, return ret; caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE | DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE); - pins[i].prop.phase_range.min = - pf->dplls.input_phase_adj_max; - pins[i].prop.phase_range.max = - -pf->dplls.input_phase_adj_max; } else { - pins[i].prop.phase_range.min = - pf->dplls.output_phase_adj_max; - pins[i].prop.phase_range.max = - -pf->dplls.output_phase_adj_max; ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps); if (ret) return ret; } + ice_dpll_phase_range_set(&pins[i].prop.phase_range, + phase_adj_max); pins[i].prop.capabilities = caps; ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL); if (ret) @@ -2308,8 +2317,10 @@ static int ice_dpll_init_info(struct ice_pf *pf, bool cgu) dp->dpll_idx = abilities.pps_dpll_idx; d->num_inputs = abilities.num_inputs; d->num_outputs = abilities.num_outputs; - d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj); - d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj); + d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj) & + ICE_AQC_GET_CGU_MAX_PHASE_ADJ; + d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj) & + ICE_AQC_GET_CGU_MAX_PHASE_ADJ; alloc_size = sizeof(*d->inputs) * d->num_inputs; d->inputs = kzalloc(alloc_size, GFP_KERNEL); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 4c20188bf0..5b9a7ee278 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (vlan_ops->dis_rx_filtering(uplink_vsi)) goto err_vlan_filtering; - if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_uplink; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; @@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) err_up: ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); -err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); err_vlan_filtering: ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, @@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, ICE_FLTR_TX); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 20ce32dda6..5c7dcf21b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -5,7 +5,7 @@ #define _ICE_ESWITCH_H_ #include -#include "devlink/devlink_port.h" +#include "devlink/port.h" #ifdef CONFIG_ICE_SWITCHDEV void ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf); @@ -60,11 +60,6 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, static inline void ice_eswitch_update_repr(unsigned long *repr_id, struct ice_vsi *vsi) { } -static inline int ice_eswitch_configure(struct ice_pf *pf) -{ - return 0; -} - static inline int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode) { return DEVLINK_ESWITCH_MODE_LEGACY; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 5c1c328042..b080570483 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -693,75 +693,52 @@ static int ice_get_port_topology(struct ice_hw *hw, u8 lport, static int ice_get_tx_rx_equa(struct ice_hw *hw, u8 serdes_num, struct ice_serdes_equalization_to_ethtool *ptr) { + static const int tx = ICE_AQC_OP_CODE_TX_EQU; + static const int rx = ICE_AQC_OP_CODE_RX_EQU; + struct { + int data_in; + int opcode; + int *out; + } aq_params[] = { + { ICE_AQC_TX_EQU_PRE1, tx, &ptr->tx_equ_pre1 }, + { ICE_AQC_TX_EQU_PRE3, tx, &ptr->tx_equ_pre3 }, + { ICE_AQC_TX_EQU_ATTEN, tx, &ptr->tx_equ_atten }, + { ICE_AQC_TX_EQU_POST1, tx, &ptr->tx_equ_post1 }, + { ICE_AQC_TX_EQU_PRE2, tx, &ptr->tx_equ_pre2 }, + { ICE_AQC_RX_EQU_PRE2, rx, &ptr->rx_equ_pre2 }, + { ICE_AQC_RX_EQU_PRE1, rx, &ptr->rx_equ_pre1 }, + { ICE_AQC_RX_EQU_POST1, rx, &ptr->rx_equ_post1 }, + { ICE_AQC_RX_EQU_BFLF, rx, &ptr->rx_equ_bflf }, + { ICE_AQC_RX_EQU_BFHF, rx, &ptr->rx_equ_bfhf }, + { ICE_AQC_RX_EQU_CTLE_GAINHF, rx, &ptr->rx_equ_ctle_gainhf }, + { ICE_AQC_RX_EQU_CTLE_GAINLF, rx, &ptr->rx_equ_ctle_gainlf }, + { ICE_AQC_RX_EQU_CTLE_GAINDC, rx, &ptr->rx_equ_ctle_gaindc }, + { ICE_AQC_RX_EQU_CTLE_BW, rx, &ptr->rx_equ_ctle_bw }, + { ICE_AQC_RX_EQU_DFE_GAIN, rx, &ptr->rx_equ_dfe_gain }, + { ICE_AQC_RX_EQU_DFE_GAIN2, rx, &ptr->rx_equ_dfe_gain_2 }, + { ICE_AQC_RX_EQU_DFE_2, rx, &ptr->rx_equ_dfe_2 }, + { ICE_AQC_RX_EQU_DFE_3, rx, &ptr->rx_equ_dfe_3 }, + { ICE_AQC_RX_EQU_DFE_4, rx, &ptr->rx_equ_dfe_4 }, + { ICE_AQC_RX_EQU_DFE_5, rx, &ptr->rx_equ_dfe_5 }, + { ICE_AQC_RX_EQU_DFE_6, rx, &ptr->rx_equ_dfe_6 }, + { ICE_AQC_RX_EQU_DFE_7, rx, &ptr->rx_equ_dfe_7 }, + { ICE_AQC_RX_EQU_DFE_8, rx, &ptr->rx_equ_dfe_8 }, + { ICE_AQC_RX_EQU_DFE_9, rx, &ptr->rx_equ_dfe_9 }, + { ICE_AQC_RX_EQU_DFE_10, rx, &ptr->rx_equ_dfe_10 }, + { ICE_AQC_RX_EQU_DFE_11, rx, &ptr->rx_equ_dfe_11 }, + { ICE_AQC_RX_EQU_DFE_12, rx, &ptr->rx_equ_dfe_12 }, + }; int err; - err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE1, - ICE_AQC_OP_CODE_TX_EQU, serdes_num, - &ptr->tx_equalization_pre1); - if (err) - return err; + for (int i = 0; i < ARRAY_SIZE(aq_params); i++) { + err = ice_aq_get_phy_equalization(hw, aq_params[i].data_in, + aq_params[i].opcode, + serdes_num, aq_params[i].out); + if (err) + break; + } - err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE3, - ICE_AQC_OP_CODE_TX_EQU, serdes_num, - &ptr->tx_equalization_pre3); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_ATTEN, - ICE_AQC_OP_CODE_TX_EQU, serdes_num, - &ptr->tx_equalization_atten); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_POST1, - ICE_AQC_OP_CODE_TX_EQU, serdes_num, - &ptr->tx_equalization_post1); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_TX_EQU_PRE2, - ICE_AQC_OP_CODE_TX_EQU, serdes_num, - &ptr->tx_equalization_pre2); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_PRE2, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_pre2); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_PRE1, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_pre1); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_POST1, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_post1); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_BFLF, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_bflf); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_BFHF, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_bfhf); - if (err) - return err; - - err = ice_aq_get_phy_equalization(hw, ICE_AQC_RX_EQU_DRATE, - ICE_AQC_OP_CODE_RX_EQU, serdes_num, - &ptr->rx_equalization_drate); - if (err) - return err; - - return 0; + return err; } /** @@ -3838,8 +3815,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.rx_ring && q_vector->tx.tx_ring) - combined++; + combined += min(q_vector->num_ring_tx, q_vector->num_ring_rx); } return combined; @@ -4714,6 +4690,81 @@ static void ice_get_fec_stats(struct net_device *netdev, pi->lport, err); } +#define ICE_ETHTOOL_PFR (ETH_RESET_IRQ | ETH_RESET_DMA | \ + ETH_RESET_FILTER | ETH_RESET_OFFLOAD) + +#define ICE_ETHTOOL_CORER ((ICE_ETHTOOL_PFR | ETH_RESET_RAM) << \ + ETH_RESET_SHARED_SHIFT) + +#define ICE_ETHTOOL_GLOBR (ICE_ETHTOOL_CORER | \ + (ETH_RESET_MAC << ETH_RESET_SHARED_SHIFT) | \ + (ETH_RESET_PHY << ETH_RESET_SHARED_SHIFT)) + +#define ICE_ETHTOOL_VFR ICE_ETHTOOL_PFR + +/** + * ice_ethtool_reset - triggers a given type of reset + * @dev: network interface device structure + * @flags: set of reset flags + * + * Return: 0 on success, -EOPNOTSUPP when using unsupported set of flags. + */ +static int ice_ethtool_reset(struct net_device *dev, u32 *flags) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_pf *pf = np->vsi->back; + enum ice_reset_req reset; + + switch (*flags) { + case ICE_ETHTOOL_CORER: + reset = ICE_RESET_CORER; + break; + case ICE_ETHTOOL_GLOBR: + reset = ICE_RESET_GLOBR; + break; + case ICE_ETHTOOL_PFR: + reset = ICE_RESET_PFR; + break; + default: + netdev_info(dev, "Unsupported set of ethtool flags"); + return -EOPNOTSUPP; + } + + ice_schedule_reset(pf, reset); + + *flags = 0; + + return 0; +} + +/** + * ice_repr_ethtool_reset - triggers a VF reset + * @dev: network interface device structure + * @flags: set of reset flags + * + * Return: 0 on success, + * -EOPNOTSUPP when using unsupported set of flags + * -EBUSY when VF is not ready for reset. + */ +static int ice_repr_ethtool_reset(struct net_device *dev, u32 *flags) +{ + struct ice_repr *repr = ice_netdev_to_repr(dev); + struct ice_vf *vf; + + if (repr->type != ICE_REPR_TYPE_VF || + *flags != ICE_ETHTOOL_VFR) + return -EOPNOTSUPP; + + vf = repr->vf; + + if (ice_check_vf_ready_for_cfg(vf)) + return -EBUSY; + + *flags = 0; + + return ice_reset_vf(vf, ICE_VF_RESET_VFLR | ICE_VF_RESET_LOCK); +} + static const struct ethtool_ops ice_ethtool_ops = { .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@ -4750,6 +4801,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .nway_reset = ice_nway_reset, .get_pauseparam = ice_get_pauseparam, .set_pauseparam = ice_set_pauseparam, + .reset = ice_ethtool_reset, .get_rxfh_key_size = ice_get_rxfh_key_size, .get_rxfh_indir_size = ice_get_rxfh_indir_size, .get_rxfh = ice_get_rxfh, @@ -4802,6 +4854,7 @@ static const struct ethtool_ops ice_ethtool_repr_ops = { .get_strings = ice_repr_get_strings, .get_ethtool_stats = ice_repr_get_ethtool_stats, .get_sset_count = ice_repr_get_sset_count, + .reset = ice_repr_ethtool_reset, }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h index 9acccae386..23b2cfbc96 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.h +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -10,17 +10,33 @@ struct ice_phy_type_to_ethtool { }; struct ice_serdes_equalization_to_ethtool { - int rx_equalization_pre2; - int rx_equalization_pre1; - int rx_equalization_post1; - int rx_equalization_bflf; - int rx_equalization_bfhf; - int rx_equalization_drate; - int tx_equalization_pre1; - int tx_equalization_pre3; - int tx_equalization_atten; - int tx_equalization_post1; - int tx_equalization_pre2; + int rx_equ_pre2; + int rx_equ_pre1; + int rx_equ_post1; + int rx_equ_bflf; + int rx_equ_bfhf; + int rx_equ_ctle_gainhf; + int rx_equ_ctle_gainlf; + int rx_equ_ctle_gaindc; + int rx_equ_ctle_bw; + int rx_equ_dfe_gain; + int rx_equ_dfe_gain_2; + int rx_equ_dfe_2; + int rx_equ_dfe_3; + int rx_equ_dfe_4; + int rx_equ_dfe_5; + int rx_equ_dfe_6; + int rx_equ_dfe_7; + int rx_equ_dfe_8; + int rx_equ_dfe_9; + int rx_equ_dfe_10; + int rx_equ_dfe_11; + int rx_equ_dfe_12; + int tx_equ_pre1; + int tx_equ_pre3; + int tx_equ_atten; + int tx_equ_post1; + int tx_equ_pre2; }; struct ice_regdump_to_ethtool { diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index ee9862ddfe..1d118171de 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1605,22 +1605,19 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf) */ int ice_fdir_create_dflt_rules(struct ice_pf *pf) { + const enum ice_fltr_ptype dflt_rules[] = { + ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP, + }; int err; /* Create perfect TCP and UDP rules in hardware. */ - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP); - if (err) - return err; + for (int i = 0; i < ARRAY_SIZE(dflt_rules); i++) { + err = ice_create_init_fdir_rule(pf, dflt_rules[i]); - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP); - if (err) - return err; - - err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP); + if (err) + break; + } return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 90b9b09931..28b0897adf 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -23,9 +23,6 @@ int ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups, unsigned long *bm, struct list_head *fv_list); int -ice_pkg_buf_unreserve_section(struct ice_buf_build *bld, u16 count); -u16 ice_pkg_buf_get_free_space(struct ice_buf_build *bld); -int ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, u16 buf_size, struct ice_sq_cd *cd); bool diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index f02e8ca553..68b8d453d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -381,32 +381,23 @@ void ice_gnss_exit(struct ice_pf *pf) } /** - * ice_gnss_is_gps_present - Check if GPS HW is present + * ice_gnss_is_module_present - Check if GNSS HW is present * @hw: pointer to HW struct + * + * Return: true when GNSS is present, false otherwise. */ -bool ice_gnss_is_gps_present(struct ice_hw *hw) +bool ice_gnss_is_module_present(struct ice_hw *hw) { - if (!hw->func_caps.ts_func_info.src_tmr_owned) + int err; + u8 data; + + if (!hw->func_caps.ts_func_info.src_tmr_owned || + !ice_is_gps_in_netlist(hw)) return false; - if (!ice_is_gps_in_netlist(hw)) + err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); + if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) return false; -#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) - if (ice_is_e810t(hw)) { - int err; - u8 data; - - err = ice_read_pca9575_reg(hw, ICE_PCA9575_P0_IN, &data); - if (err || !!(data & ICE_P0_GNSS_PRSNT_N)) - return false; - } else { - return false; - } -#else - if (!ice_is_e810t(hw)) - return false; -#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ - return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h index 75e567ad70..15daf603ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.h +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -37,11 +37,11 @@ struct gnss_serial { #if IS_ENABLED(CONFIG_GNSS) void ice_gnss_init(struct ice_pf *pf); void ice_gnss_exit(struct ice_pf *pf); -bool ice_gnss_is_gps_present(struct ice_hw *hw); +bool ice_gnss_is_module_present(struct ice_hw *hw); #else static inline void ice_gnss_init(struct ice_pf *pf) { } static inline void ice_gnss_exit(struct ice_pf *pf) { } -static inline bool ice_gnss_is_gps_present(struct ice_hw *hw) +static inline bool ice_gnss_is_module_present(struct ice_hw *hw) { return false; } diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 8d31bfe28c..aa4bfbcf85 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -6,6 +6,14 @@ #ifndef _ICE_HW_AUTOGEN_H_ #define _ICE_HW_AUTOGEN_H_ +#define GLCOMM_QUANTA_PROF(_i) (0x002D2D68 + ((_i) * 4)) +#define GLCOMM_QUANTA_PROF_MAX_INDEX 15 +#define GLCOMM_QUANTA_PROF_QUANTA_SIZE_S 0 +#define GLCOMM_QUANTA_PROF_QUANTA_SIZE_M ICE_M(0x3FFF, 0) +#define GLCOMM_QUANTA_PROF_MAX_CMD_S 16 +#define GLCOMM_QUANTA_PROF_MAX_CMD_M ICE_M(0xFF, 16) +#define GLCOMM_QUANTA_PROF_MAX_DESC_S 24 +#define GLCOMM_QUANTA_PROF_MAX_DESC_M ICE_M(0x3F, 24) #define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD(_DBQM) (0x000E0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD_HEAD_S 0 @@ -533,10 +541,22 @@ #define PFPM_WUS_MAG_M BIT(1) #define PFPM_WUS_MNG_M BIT(3) #define PFPM_WUS_FW_RST_WK_M BIT(31) +#define E830_PRTMAC_TS_TX_MEM_VALID_H 0x001E2020 +#define E830_PRTMAC_TS_TX_MEM_VALID_L 0x001E2000 #define E830_PRTMAC_CL01_PS_QNT 0x001E32A0 #define E830_PRTMAC_CL01_PS_QNT_CL0_M GENMASK(15, 0) #define E830_PRTMAC_CL01_QNT_THR 0x001E3320 #define E830_PRTMAC_CL01_QNT_THR_CL0_M GENMASK(15, 0) +#define E830_PRTTSYN_TXTIME_H(_i) (0x001E5800 + ((_i) * 32)) +#define E830_PRTTSYN_TXTIME_L(_i) (0x001E5000 + ((_i) * 32)) +#define E830_GLPTM_ART_CTL 0x00088B50 +#define E830_GLPTM_ART_CTL_ACTIVE_M BIT(0) +#define E830_GLPTM_ART_TIME_H 0x00088B54 +#define E830_GLPTM_ART_TIME_L 0x00088B58 +#define E830_GLTSYN_PTMTIME_H(_i) (0x00088B48 + ((_i) * 4)) +#define E830_GLTSYN_PTMTIME_L(_i) (0x00088B40 + ((_i) * 4)) +#define E830_PFPTM_SEM 0x00088B00 +#define E830_PFPTM_SEM_BUSY_M BIT(0) #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) #define VFINT_DYN_CTLN_CLEARPBA_M BIT(1) #define E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH 0x00234000 diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 1ccb572ce2..2410aee59f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag) netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } +/** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + /** * ice_lag_unlink - handle unlink event * @lag: LAG info struct @@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) ice_lag_move_vf_nodes(lag, act_port, pri_port); lag->primary = false; lag->active_port = ICE_LAG_INVALID_PORT; + + /* Config primary's eswitch back to normal operation. */ + ice_lag_config_eswitch(lag, lag->netdev); } else { struct ice_lag *primary_lag; @@ -1296,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); @@ -1419,6 +1450,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) ice_lag_move_vf_nodes(lag, prim_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); return; } @@ -1428,6 +1460,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) /* new active port */ ice_lag_move_vf_nodes(lag, lag->active_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); } else { /* port not set as currently active (e.g. new active port * has already claimed the nodes and filters diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 611577ebc2..77ba26538b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -229,7 +229,7 @@ struct ice_32b_rx_flex_desc_nic { __le16 status_error1; u8 flexi_flags2; u8 ts_low; - __le16 l2tag2_1st; + __le16 raw_csum; __le16 l2tag2_2nd; /* Qword 3 */ @@ -371,29 +371,21 @@ enum ice_rx_flex_desc_status_error_1_bits { ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ }; -#define ICE_RXQ_CTX_SIZE_DWORDS 8 -#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22 #define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS 5 #define GLTCLAN_CQ_CNTX(i, CQ) (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800)) -/* RLAN Rx queue context data - * - * The sizes of the variables may be larger than needed due to crossing byte - * boundaries. If we do not have the width of the variable set to the correct - * size then we could end up shifting bits off the top of the variable when the - * variable is at the top of a byte and crosses over into the next byte. - */ +/* RLAN Rx queue context data */ struct ice_rlan_ctx { u16 head; - u16 cpuid; /* bigger than needed, see above for reason */ + u8 cpuid; #define ICE_RLAN_BASE_S 7 u64 base; u16 qlen; #define ICE_RLAN_CTX_DBUF_S 7 - u16 dbuf; /* bigger than needed, see above for reason */ + u8 dbuf; #define ICE_RLAN_CTX_HBUF_S 6 - u16 hbuf; /* bigger than needed, see above for reason */ + u8 hbuf; u8 dtype; u8 dsize; u8 crcstrip; @@ -401,29 +393,15 @@ struct ice_rlan_ctx { u8 hsplit_0; u8 hsplit_1; u8 showiv; - u32 rxmax; /* bigger than needed, see above for reason */ + u16 rxmax; u8 tphrdesc_ena; u8 tphwdesc_ena; u8 tphdata_ena; u8 tphhead_ena; - u16 lrxqthresh; /* bigger than needed, see above for reason */ + u8 lrxqthresh; u8 prefena; /* NOTE: normally must be set to 1 at init */ }; -struct ice_ctx_ele { - u16 offset; - u16 size_of; - u16 width; - u16 lsb; -}; - -#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ - .offset = offsetof(struct _struct, _ele), \ - .size_of = sizeof_field(struct _struct, _ele), \ - .width = _width, \ - .lsb = _lsb, \ -} - /* for hsplit_0 field of Rx RLAN context */ enum ice_rlan_ctx_rx_hsplit_0 { ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0, @@ -500,10 +478,15 @@ enum ice_tx_desc_len_fields { struct ice_tx_ctx_desc { __le32 tunneling_params; __le16 l2tag2; - __le16 rsvd; + __le16 gcs; __le64 qw1; }; +#define ICE_TX_GCS_DESC_START_M GENMASK(7, 0) +#define ICE_TX_GCS_DESC_OFFSET_M GENMASK(11, 8) +#define ICE_TX_GCS_DESC_TYPE_M GENMASK(14, 12) +#define ICE_TX_GCS_DESC_CSUM_PSH 1 + #define ICE_TXD_CTX_QW1_CMD_S 4 #define ICE_TXD_CTX_QW1_CMD_M (0x7FUL << ICE_TXD_CTX_QW1_CMD_S) @@ -551,18 +534,12 @@ enum ice_tx_ctx_desc_eipt_offload { #define ICE_LAN_TXQ_MAX_QGRPS 127 #define ICE_LAN_TXQ_MAX_QDIS 1023 -/* Tx queue context data - * - * The sizes of the variables may be larger than needed due to crossing byte - * boundaries. If we do not have the width of the variable set to the correct - * size then we could end up shifting bits off the top of the variable when the - * variable is at the top of a byte and crosses over into the next byte. - */ +/* Tx queue context data */ struct ice_tlan_ctx { #define ICE_TLAN_CTX_BASE_S 7 u64 base; /* base is defined in 128-byte units */ u8 port_num; - u16 cgd_num; /* bigger than needed, see above for reason */ + u8 cgd_num; u8 pf_num; u16 vmvf_num; u8 vmvf_type; @@ -573,7 +550,7 @@ struct ice_tlan_ctx { u8 tsyn_ena; u8 internal_usage_flag; u8 alt_vlan; - u16 cpuid; /* bigger than needed, see above for reason */ + u8 cpuid; u8 wb_mode; u8 tphrd_desc; u8 tphrd; @@ -582,7 +559,7 @@ struct ice_tlan_ctx { u16 qnum_in_func; u8 itr_notification_mode; u8 adjust_prof_id; - u32 qlen; /* bigger than needed, see above for reason */ + u16 qlen; u8 quanta_prof_idx; u8 tso_ena; u16 tso_qnum; @@ -590,7 +567,6 @@ struct ice_tlan_ctx { u8 drop_ena; u8 cache_prof_idx; u8 pkt_shaper_prof_idx; - u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a19e1044b3..fcad806a3c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1431,6 +1431,10 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->dev = dev; ring->count = vsi->num_rx_desc; ring->cached_phctime = pf->ptp.cached_phc_time; + + if (ice_is_feature_supported(pf, ICE_F_GCS)) + ring->flags |= ICE_RX_FLAGS_RING_GCS; + WRITE_ONCE(vsi->rx_rings[i], ring); } @@ -1769,9 +1773,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi) * @prio: priority for the RXDID for this queue * @ena_ts: true to enable timestamp and false to disable timestamp */ -void -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, - bool ena_ts) +void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, + bool ena_ts) { int regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); @@ -3885,15 +3888,17 @@ void ice_init_feature_support(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_CGU); if (ice_is_clock_mux_in_netlist(&pf->hw)) ice_set_feature_support(pf, ICE_F_SMA_CTRL); - if (ice_gnss_is_gps_present(&pf->hw)) + if (ice_gnss_is_module_present(&pf->hw)) ice_set_feature_support(pf, ICE_F_GNSS); break; default: break; } - if (pf->hw.mac_type == ICE_MAC_E830) + if (pf->hw.mac_type == ICE_MAC_E830) { ice_set_feature_support(pf, ICE_F_MBX_LIMIT); + ice_set_feature_support(pf, ICE_F_GCS); + } } /** @@ -3939,24 +3944,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); } -/** - * ice_vsi_ctx_set_allow_override - allow destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** - * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - /** * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit * @vsi: pointer to VSI structure diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 1a6cfc8693..6085039bac 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -88,8 +88,6 @@ void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); -int ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); - bool ice_is_safe_mode(struct ice_pf *pf); bool ice_is_rdma_ena(struct ice_pf *pf); bool ice_is_dflt_vsi_in_use(struct ice_port_info *pi); @@ -106,10 +104,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index be5ef359e2..b1bc299040 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -14,7 +14,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "devlink/devlink.h" -#include "devlink/devlink_port.h" +#include "devlink/port.h" #include "ice_sf_eth.h" #include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the @@ -1144,7 +1144,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -1567,6 +1567,9 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ice_aqc_opc_lldp_set_mib_change: ice_dcb_process_lldp_set_mib_change(pf, &event); break; + case ice_aqc_opc_get_health_status: + ice_process_health_status_event(pf, &event); + break; default: dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", qtype, opcode); @@ -1816,6 +1819,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_PQM, 0xffffffff); } @@ -1829,6 +1834,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX); } @@ -1842,6 +1849,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event, + queue); wr32(hw, GL_MDET_RX, 0xffffffff); } @@ -2364,9 +2373,11 @@ static void ice_service_task(struct work_struct *work) struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); unsigned long start_time = jiffies; - /* subtasks */ + if (pf->health_reporters.tx_hang_buf.tx_ring) { + ice_report_tx_hang(pf); + pf->health_reporters.tx_hang_buf.tx_ring = NULL; + } - /* process reset requests first */ ice_reset_subtask(pf); /* bail if a reset/recovery cycle is pending or rebuild failed */ @@ -2761,6 +2772,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi) } } +/** + * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors + * @vsi: the VSI with XDP rings being unmapped + */ +static void ice_unmap_xdp_rings(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.tx_ring = ring; + } +} + /** * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP @@ -2824,7 +2856,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, if (status) { dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n", status); - goto clear_xdp_rings; + goto unmap_xdp_rings; } /* assign the prog only when it's not already present on VSI; @@ -2840,6 +2872,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ice_vsi_assign_bpf_prog(vsi, prog); return 0; +unmap_xdp_rings: + ice_unmap_xdp_rings(vsi); clear_xdp_rings: ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { @@ -2856,6 +2890,8 @@ err_map_xdp: mutex_unlock(&pf->avail_q_mutex); devm_kfree(dev, vsi->xdp_rings); + vsi->xdp_rings = NULL; + return -ENOMEM; } @@ -2871,7 +2907,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; - int i, v_idx; + int i; /* q_vectors are freed in reset path so there's no point in detaching * rings @@ -2879,17 +2915,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; - ice_for_each_q_vector(vsi, v_idx) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) - if (!ring->tx_buf || !ice_ring_is_xdp(ring)) - break; - - /* restore the value of last node prior to XDP setup */ - q_vector->tx.tx_ring = ring; - } + ice_unmap_xdp_rings(vsi); free_qmap: mutex_lock(&pf->avail_q_mutex); @@ -3034,11 +3060,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_vsi_determine_xdp_res(vsi); if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + goto resume_if; } else { xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, ICE_XDP_CFG_FULL); - if (xdp_ring_err) + if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + goto resume_if; + } } xdp_features_set_redirect_target(vsi->netdev, true); /* reallocate Rx queues that are used for zero-copy */ @@ -3056,6 +3085,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } +resume_if: if (if_running) ret = ice_up(vsi); @@ -3174,12 +3204,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) hw = &pf->hw; tx = &pf->ptp.port.tx; spin_lock_irqsave(&tx->lock, flags); - ice_ptp_complete_tx_single_tstamp(tx); + if (tx->init) { + ice_ptp_complete_tx_single_tstamp(tx); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock_irqrestore(&tx->lock, flags); val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | @@ -3281,22 +3313,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (ice_pf_state_is_nominal(pf) && - pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { - struct ice_ptp_tx *tx = &pf->ptp.port.tx; - unsigned long flags; - u8 idx; - spin_lock_irqsave(&tx->lock, flags); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); - spin_unlock_irqrestore(&tx->lock, flags); - } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { - set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); - ret = IRQ_WAKE_THREAD; - } + ret = ice_ptp_ts_irq(pf); } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3666,6 +3684,12 @@ void ice_set_netdev_features(struct net_device *netdev) */ netdev->hw_features |= NETIF_F_RXFCS; + /* Mutual exclusivity for TSO and GCS is enforced by the set features + * ndo callback. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS)) + netdev->hw_features |= NETIF_F_HW_CSUM; + netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE); } @@ -4043,8 +4067,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) } clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); - if (func_caps->common_cap.ieee_1588 && - !(pf->hw.mac_type == ICE_MAC_E830)) + if (func_caps->common_cap.ieee_1588) set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); pf->max_pf_txqs = func_caps->common_cap.num_txq; @@ -4554,6 +4577,34 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) return 0; } +/** + * ice_init_supported_rxdids - Initialize supported Rx descriptor IDs + * @hw: pointer to the hardware structure + * @pf: pointer to pf structure + * + * The pf->supported_rxdids bitmap is used to indicate to VFs which descriptor + * formats the PF hardware supports. The exact list of supported RXDIDs + * depends on the loaded DDP package. The IDs can be determined by reading the + * GLFLXP_RXDID_FLAGS register after the DDP package is loaded. + * + * Note that the legacy 32-byte RXDID 0 is always supported but is not listed + * in the DDP package. The 16-byte legacy descriptor is never supported by + * VFs. + */ +static void ice_init_supported_rxdids(struct ice_hw *hw, struct ice_pf *pf) +{ + pf->supported_rxdids = BIT(ICE_RXDID_LEGACY_1); + + for (int i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { + u32 regval; + + regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); + if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) + & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) + pf->supported_rxdids |= BIT(i); + } +} + /** * ice_init_ddp_config - DDP related configuration * @hw: pointer to the hardware structure @@ -4588,6 +4639,9 @@ static int ice_init_ddp_config(struct ice_hw *hw, struct ice_pf *pf) ice_load_pkg(firmware, pf); release_firmware(firmware); + /* Initialize the supported Rx descriptor IDs after loading DDP */ + ice_init_supported_rxdids(hw, pf); + return 0; } @@ -5012,12 +5066,14 @@ static int ice_init_devlink(struct ice_pf *pf) ice_devlink_init_regions(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { + ice_health_deinit(pf); ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); @@ -5031,6 +5087,12 @@ static int ice_init(struct ice_pf *pf) if (err) return err; + if (pf->hw.mac_type == ICE_MAC_E830) { + err = pci_enable_ptm(pf->pdev, NULL); + if (err) + dev_dbg(ice_pf_to_dev(pf), "PCIe PTM not supported by PCIe bus/controller\n"); + } + err = ice_alloc_vsis(pf); if (err) goto err_alloc_vsis; @@ -5866,7 +5928,7 @@ static int __init ice_module_init(void) ice_adv_lnk_speed_maps_init(); - ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME); + ice_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, KBUILD_MODNAME); if (!ice_wq) { pr_err("Failed to create workqueue\n"); return status; @@ -6339,10 +6401,12 @@ ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features) int err = 0; /* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking - * if either bit is set + * if either bit is set. In switchdev mode Rx filtering should never be + * enabled. */ - if (features & - (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) + if ((features & + (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)) && + !ice_is_eswitch_mode_switchdev(vsi->back)) err = vlan_ops->ena_rx_filtering(vsi); else err = vlan_ops->dis_rx_filtering(vsi); @@ -6490,13 +6554,24 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if (changed & NETIF_F_HW_TC) { bool ena = !!(features & NETIF_F_HW_TC); - ena ? set_bit(ICE_FLAG_CLS_FLOWER, pf->flags) : - clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + assign_bit(ICE_FLAG_CLS_FLOWER, pf->flags, ena); } if (changed & NETIF_F_LOOPBACK) ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); + /* Due to E830 hardware limitations, TSO (NETIF_F_ALL_TSO) with GCS + * (NETIF_F_HW_CSUM) is not supported. + */ + if (ice_is_feature_supported(pf, ICE_F_GCS) && + ((features & NETIF_F_HW_CSUM) && (features & NETIF_F_ALL_TSO))) { + if (netdev->features & NETIF_F_HW_CSUM) + dev_err(ice_pf_to_dev(pf), "To enable TSO, you must first disable HW checksum.\n"); + else + dev_err(ice_pf_to_dev(pf), "To enable HW checksum, you must first disable TSO.\n"); + return -EIO; + } + return ret; } @@ -6720,7 +6795,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, true); } /* Perform an initial read of the statistics registers now to @@ -7190,7 +7265,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } @@ -7723,6 +7798,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + ice_health_clear(pf); + ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); @@ -8213,16 +8290,18 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) if (tx_ring) { struct ice_hw *hw = &pf->hw; - u32 head, val = 0; + u32 head, intr = 0; head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ - val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); + intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", vsi->vsi_num, txqueue, tx_ring->next_to_clean, - head, tx_ring->next_to_use, val); + head, tx_ring->next_to_use, intr); + + ice_prep_tx_hang_report(pf, tx_ring, vsi->vsi_num, head, intr); } pf->tx_timeout_last_recovery = jiffies; diff --git a/drivers/net/ethernet/intel/ice/ice_parser.h b/drivers/net/ethernet/intel/ice/ice_parser.h index 6509d80762..4f56d53d56 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser.h +++ b/drivers/net/ethernet/intel/ice/ice_parser.h @@ -257,7 +257,6 @@ ice_pg_nm_cam_match(struct ice_pg_nm_cam_item *table, int size, /*** ICE_SID_RXPARSER_BOOST_TCAM and ICE_SID_LBL_RXPARSER_TMEM sections ***/ #define ICE_BST_TCAM_TABLE_SIZE 256 #define ICE_BST_TCAM_KEY_SIZE 20 -#define ICE_BST_KEY_TCAM_SIZE 19 /* Boost TCAM item */ struct ice_bst_tcam_item { @@ -401,7 +400,6 @@ u16 ice_xlt_kb_flag_get(struct ice_xlt_kb *kb, u64 pkt_flag); #define ICE_PARSER_GPR_NUM 128 #define ICE_PARSER_FLG_NUM 64 #define ICE_PARSER_ERR_NUM 16 -#define ICE_BST_KEY_SIZE 10 #define ICE_MARKER_ID_SIZE 9 #define ICE_MARKER_MAX_SIZE \ (ICE_MARKER_ID_SIZE * BITS_PER_BYTE - 1) @@ -431,13 +429,13 @@ struct ice_parser_rt { u8 pkt_buf[ICE_PARSER_MAX_PKT_LEN + ICE_PARSER_PKT_REV]; u16 pkt_len; u16 po; - u8 bst_key[ICE_BST_KEY_SIZE]; + u8 bst_key[ICE_BST_TCAM_KEY_SIZE]; struct ice_pg_cam_key pg_key; + u8 pg_prio; struct ice_alu *alu0; struct ice_alu *alu1; struct ice_alu *alu2; struct ice_pg_cam_action *action; - u8 pg_prio; struct ice_gpr_pu pu; u8 markers[ICE_MARKER_ID_SIZE]; bool protocols[ICE_PO_PAIR_SIZE]; diff --git a/drivers/net/ethernet/intel/ice/ice_parser_rt.c b/drivers/net/ethernet/intel/ice/ice_parser_rt.c index dedf5e854e..3995d662e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_parser_rt.c +++ b/drivers/net/ethernet/intel/ice/ice_parser_rt.c @@ -125,22 +125,20 @@ static void ice_bst_key_init(struct ice_parser_rt *rt, else key[idd] = imem->b_kb.prio; - idd = ICE_BST_KEY_TCAM_SIZE - 1; + idd = ICE_BST_TCAM_KEY_SIZE - 2; for (i = idd; i >= 0; i--) { int j; j = ho + idd - i; if (j < ICE_PARSER_MAX_PKT_LEN) - key[i] = rt->pkt_buf[ho + idd - i]; + key[i] = rt->pkt_buf[j]; else key[i] = 0; } - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "Generated Boost TCAM Key:\n"); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "%02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", - key[0], key[1], key[2], key[3], key[4], - key[5], key[6], key[7], key[8], key[9]); - ice_debug(rt->psr->hw, ICE_DBG_PARSER, "\n"); + ice_debug_array_w_prefix(rt->psr->hw, ICE_DBG_PARSER, + KBUILD_MODNAME ": Generated Boost TCAM Key", + key, ICE_BST_TCAM_KEY_SIZE); } static u16 ice_bit_rev_u16(u16 v, int len) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 8b21011045..0f94dd4dec 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -4,7 +4,6 @@ #include "ice.h" #include "ice_lib.h" #include "ice_trace.h" -#include "ice_cgu_regs.h" static const char ice_pin_names[][64] = { "SDP0", @@ -16,28 +15,28 @@ static const char ice_pin_names[][64] = { }; static const struct ice_ptp_pin_desc ice_pin_desc_e82x[] = { - /* name, gpio */ - { TIME_SYNC, { 4, -1 }}, - { ONE_PPS, { -1, 5 }}, + /* name, gpio, delay */ + { TIME_SYNC, { 4, -1 }, { 0, 0 }}, + { ONE_PPS, { -1, 5 }, { 0, 11 }}, }; static const struct ice_ptp_pin_desc ice_pin_desc_e825c[] = { - /* name, gpio */ - { SDP0, { 0, 0 }}, - { SDP1, { 1, 1 }}, - { SDP2, { 2, 2 }}, - { SDP3, { 3, 3 }}, - { TIME_SYNC, { 4, -1 }}, - { ONE_PPS, { -1, 5 }}, + /* name, gpio, delay */ + { SDP0, { 0, 0 }, { 15, 14 }}, + { SDP1, { 1, 1 }, { 15, 14 }}, + { SDP2, { 2, 2 }, { 15, 14 }}, + { SDP3, { 3, 3 }, { 15, 14 }}, + { TIME_SYNC, { 4, -1 }, { 11, 0 }}, + { ONE_PPS, { -1, 5 }, { 0, 9 }}, }; static const struct ice_ptp_pin_desc ice_pin_desc_e810[] = { - /* name, gpio */ - { SDP0, { 0, 0 }}, - { SDP1, { 1, 1 }}, - { SDP2, { 2, 2 }}, - { SDP3, { 3, 3 }}, - { ONE_PPS, { -1, 5 }}, + /* name, gpio, delay */ + { SDP0, { 0, 0 }, { 0, 1 }}, + { SDP1, { 1, 1 }, { 0, 1 }}, + { SDP2, { 2, 2 }, { 0, 1 }}, + { SDP3, { 3, 3 }, { 0, 1 }}, + { ONE_PPS, { -1, 5 }, { 0, 1 }}, }; static const char ice_pin_names_nvm[][64] = { @@ -49,12 +48,12 @@ static const char ice_pin_names_nvm[][64] = { }; static const struct ice_ptp_pin_desc ice_pin_desc_e810_sma[] = { - /* name, gpio */ - { GNSS, { 1, -1 }}, - { SMA1, { 1, 0 }}, - { UFL1, { -1, 0 }}, - { SMA2, { 3, 2 }}, - { UFL2, { 3, -1 }}, + /* name, gpio, delay */ + { GNSS, { 1, -1 }, { 0, 0 }}, + { SMA1, { 1, 0 }, { 0, 1 }}, + { UFL1, { -1, 0 }, { 0, 1 }}, + { SMA2, { 3, 2 }, { 0, 1 }}, + { UFL2, { 3, -1 }, { 0, 0 }}, }; static struct ice_pf *ice_get_ctrl_pf(struct ice_pf *pf) @@ -298,18 +297,30 @@ void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) * @sts: Optional parameter for holding a pair of system timestamps from * the system clock. Will be ignored if NULL is given. */ -static u64 -ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) +u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts) { struct ice_hw *hw = &pf->hw; u32 hi, lo, lo2; u8 tmr_idx; + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + tmr_idx = ice_get_ptp_src_clock_index(hw); guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); /* Read the system timestamp pre PHC read */ ptp_read_system_prets(sts); + if (hw->mac_type == ICE_MAC_E830) { + u64 clk_time = rd64(hw, E830_GLTSYN_TIME_L(tmr_idx)); + + /* Read the system timestamp post PHC read */ + ptp_read_system_postts(sts); + + return clk_time; + } + lo = rd32(hw, GLTSYN_TIME_L(tmr_idx)); /* Read the system timestamp post PHC read */ @@ -971,28 +982,6 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) tx->len = 0; } -/** - * ice_ptp_init_tx_eth56g - Initialize tracking for Tx timestamps - * @pf: Board private structure - * @tx: the Tx tracking structure to initialize - * @port: the port this structure tracks - * - * Initialize the Tx timestamp tracker for this port. ETH56G PHYs - * have independent memory blocks for all ports. - * - * Return: 0 for success, -ENOMEM when failed to allocate Tx tracker - */ -static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, - u8 port) -{ - tx->block = port; - tx->offset = 0; - tx->len = INDEX_PER_PORT_ETH56G; - tx->has_ready_bitmap = 1; - - return ice_ptp_alloc_tx_tracker(tx); -} - /** * ice_ptp_init_tx_e82x - Initialize tracking for Tx timestamps * @pf: Board private structure @@ -1003,9 +992,11 @@ static int ice_ptp_init_tx_eth56g(struct ice_pf *pf, struct ice_ptp_tx *tx, * the timestamp block is shared for all ports in the same quad. To avoid * ports using the same timestamp index, logically break the block of * registers into chunks based on the port number. + * + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) +static int ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, + u8 port) { tx->block = ICE_GET_QUAD_NUM(port); tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X; @@ -1016,24 +1007,27 @@ ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) } /** - * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps + * ice_ptp_init_tx - Initialize tracking for Tx timestamps * @pf: Board private structure * @tx: the Tx tracking structure to initialize + * @port: the port this structure tracks * - * Initialize the Tx timestamp tracker for this PF. For E810 devices, each - * port has its own block of timestamps, independent of the other ports. + * Initialize the Tx timestamp tracker for this PF. For all PHYs except E82X, + * each port has its own block of timestamps, independent of the other ports. + * + * Return: 0 on success, -ENOMEM when out of memory */ -static int -ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) +static int ice_ptp_init_tx(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->block = pf->hw.port_info->lport; + tx->block = port; tx->offset = 0; - tx->len = INDEX_PER_PORT_E810; + tx->len = INDEX_PER_PORT; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, * verify new timestamps against cached copy of the last read * timestamp. */ - tx->has_ready_bitmap = 0; + tx->has_ready_bitmap = pf->hw.mac_type != ICE_MAC_E810; return ice_ptp_alloc_tx_tracker(tx); } @@ -1318,20 +1312,21 @@ ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port) struct ice_hw *hw = &pf->hw; int err; - if (ice_is_e810(hw)) - return 0; - mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_stop_phy_timer_eth56g(hw, port, true); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: kthread_cancel_delayed_work_sync(&ptp_port->ov_work); err = ice_stop_phy_timer_e82x(hw, port, true); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_stop_phy_timer_eth56g(hw, port, true); + break; default: err = -ENODEV; } @@ -1361,19 +1356,17 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) unsigned long flags; int err; - if (ice_is_e810(hw)) - return 0; - if (!ptp_port->link_up) return ice_ptp_port_phy_stop(ptp_port); mutex_lock(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_start_phy_timer_eth56g(hw, port); + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + err = 0; break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: /* Start the PHY timer in Vernier mode */ kthread_cancel_delayed_work_sync(&ptp_port->ov_work); @@ -1398,6 +1391,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_start_phy_timer_eth56g(hw, port); + break; default: err = -ENODEV; } @@ -1414,10 +1410,9 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) /** * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure - * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, bool linkup) { struct ice_ptp_port *ptp_port; struct ice_hw *hw = &pf->hw; @@ -1425,14 +1420,7 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) if (pf->ptp.state != ICE_PTP_READY) return; - if (WARN_ON_ONCE(port >= hw->ptp.num_lports)) - return; - ptp_port = &pf->ptp.port; - if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo) - port *= 2; - if (WARN_ON_ONCE(ptp_port->port_num != port)) - return; /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; @@ -1440,12 +1428,14 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Skip HW writes if reset is in progress */ if (pf->hw.reset_ongoing) return; - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: - /* Do not reconfigure E810 PHY */ + + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + /* Do not reconfigure E810 or E830 PHY */ return; - case ICE_PHY_ETH56G: - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_port_phy_restart(ptp_port); return; default: @@ -1473,24 +1463,11 @@ static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold) ice_ptp_reset_ts_memory(hw); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: { - int port; - - for (port = 0; port < hw->ptp.num_lports; port++) { - int err; - - err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); - if (err) { - dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", - port, err); - return err; - } - } - + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return 0; - } - case ICE_PHY_E82X: { + case ICE_MAC_GENERIC: { int quad; for (quad = 0; quad < ICE_GET_QUAD_NUM(hw->ptp.num_lports); @@ -1507,12 +1484,24 @@ static int ice_ptp_cfg_phy_interrupt(struct ice_pf *pf, bool ena, u32 threshold) return 0; } - case ICE_PHY_E810: + case ICE_MAC_GENERIC_3K_E825: { + int port; + + for (port = 0; port < hw->ptp.num_lports; port++) { + int err; + + err = ice_phy_cfg_intr_eth56g(hw, port, ena, threshold); + if (err) { + dev_err(dev, "Failed to configure PHY interrupt for port %d, err %d\n", + port, err); + return err; + } + } + return 0; - case ICE_PHY_UNSUP: + } + case ICE_MAC_UNKNOWN: default: - dev_warn(dev, "%s: Unexpected PHY model %d\n", __func__, - ice_get_phy_model(hw)); return -EOPNOTSUPP; } } @@ -1592,18 +1581,29 @@ void ice_ptp_extts_event(struct ice_pf *pf) * Event is defined in GLTSYN_EVNT_0 register */ for (chan = 0; chan < GLTSYN_EVNT_H_IDX_MAX; chan++) { - /* Check if channel is enabled */ - if (pf->ptp.ext_ts_irq & (1 << chan)) { - lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx)); - hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx)); - event.timestamp = (((u64)hi) << 32) | lo; - event.type = PTP_CLOCK_EXTTS; - event.index = chan; + int pin_desc_idx; - /* Fire event */ - ptp_clock_event(pf->ptp.clock, &event); - pf->ptp.ext_ts_irq &= ~(1 << chan); + /* Check if channel is enabled */ + if (!(pf->ptp.ext_ts_irq & (1 << chan))) + continue; + + lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx)); + hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx)); + event.timestamp = (u64)hi << 32 | lo; + + /* Add delay compensation */ + pin_desc_idx = ice_ptp_find_pin_idx(pf, PTP_PF_EXTTS, chan); + if (pin_desc_idx >= 0) { + const struct ice_ptp_pin_desc *desc; + + desc = &pf->ptp.ice_pin_desc[pin_desc_idx]; + event.timestamp -= desc->delay[0]; } + + event.type = PTP_CLOCK_EXTTS; + event.index = chan; + pf->ptp.ext_ts_irq &= ~(1 << chan); + ptp_clock_event(pf->ptp.clock, &event); } } @@ -1737,11 +1737,11 @@ static int ice_ptp_write_perout(struct ice_hw *hw, unsigned int chan, /* 0. Reset mode & out_en in AUX_OUT */ wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0); - if (ice_is_e825c(hw)) { + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) { int err; /* Enable/disable CGU 1PPS output for E825C */ - err = ice_cgu_cfg_pps_out(hw, !!period); + err = ice_tspll_cfg_pps_out_e825c(hw, !!period); if (err) return err; } @@ -1799,9 +1799,9 @@ static int ice_ptp_write_perout(struct ice_hw *hw, unsigned int chan, static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, int on) { + unsigned int gpio_pin, prop_delay_ns; u64 clk, period, start, phase; struct ice_hw *hw = &pf->hw; - unsigned int gpio_pin; int pin_desc_idx; if (rq->flags & ~PTP_PEROUT_PHASE) @@ -1812,6 +1812,7 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, return -EIO; gpio_pin = pf->ptp.ice_pin_desc[pin_desc_idx].gpio[1]; + prop_delay_ns = pf->ptp.ice_pin_desc[pin_desc_idx].delay[1]; period = rq->period.sec * NSEC_PER_SEC + rq->period.nsec; /* If we're disabling the output or period is 0, clear out CLKO and TGT @@ -1821,7 +1822,7 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, return ice_ptp_write_perout(hw, rq->index, gpio_pin, 0, 0); if (strncmp(pf->ptp.pin_desc[pin_desc_idx].name, "1PPS", 64) == 0 && - period != NSEC_PER_SEC && hw->ptp.phy_model == ICE_PHY_E82X) { + period != NSEC_PER_SEC && hw->mac_type == ICE_MAC_GENERIC) { dev_err(ice_pf_to_dev(pf), "1PPS pin supports only 1 s period\n"); return -EOPNOTSUPP; } @@ -1844,11 +1845,11 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq, * from now, so we have time to write it to HW. */ clk = ice_ptp_read_src_clk_reg(pf, NULL) + NSEC_PER_MSEC * 500; - if (rq->flags & PTP_PEROUT_PHASE || start <= clk - ice_prop_delay(hw)) + if (rq->flags & PTP_PEROUT_PHASE || start <= clk - prop_delay_ns) start = div64_u64(clk + period - 1, period) * period + phase; /* Compensate for propagation delay from the generator to the pin. */ - start -= ice_prop_delay(hw); + start -= prop_delay_ns; return ice_ptp_write_perout(hw, rq->index, gpio_pin, start, period); } @@ -2076,7 +2077,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) /* For Vernier mode on E82X, we need to recalibrate after new settime. * Start with marking timestamps as invalid. */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) { + if (hw->mac_type == ICE_MAC_GENERIC) { err = ice_ptp_clear_phy_offset_ready_e82x(hw); if (err) dev_warn(ice_pf_to_dev(pf), "Failed to mark timestamps as invalid before settime\n"); @@ -2100,7 +2101,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) ice_ptp_enable_all_perout(pf); /* Recalibrate and re-enable timestamp blocks for E822/E823 */ - if (ice_get_phy_model(hw) == ICE_PHY_E82X) + if (hw->mac_type == ICE_MAC_GENERIC) ice_ptp_restart_all_phy(pf); exit: if (err) { @@ -2178,93 +2179,158 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) return 0; } -#ifdef CONFIG_ICE_HWTS /** - * ice_ptp_get_syncdevicetime - Get the cross time stamp info + * struct ice_crosststamp_cfg - Device cross timestamp configuration + * @lock_reg: The hardware semaphore lock to use + * @lock_busy: Bit in the semaphore lock indicating the lock is busy + * @ctl_reg: The hardware register to request cross timestamp + * @ctl_active: Bit in the control register to request cross timestamp + * @art_time_l: Lower 32-bits of ART system time + * @art_time_h: Upper 32-bits of ART system time + * @dev_time_l: Lower 32-bits of device time (per timer index) + * @dev_time_h: Upper 32-bits of device time (per timer index) + */ +struct ice_crosststamp_cfg { + /* HW semaphore lock register */ + u32 lock_reg; + u32 lock_busy; + + /* Capture control register */ + u32 ctl_reg; + u32 ctl_active; + + /* Time storage */ + u32 art_time_l; + u32 art_time_h; + u32 dev_time_l[2]; + u32 dev_time_h[2]; +}; + +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e82x = { + .lock_reg = PFHH_SEM, + .lock_busy = PFHH_SEM_BUSY_M, + .ctl_reg = GLHH_ART_CTL, + .ctl_active = GLHH_ART_CTL_ACTIVE_M, + .art_time_l = GLHH_ART_TIME_L, + .art_time_h = GLHH_ART_TIME_H, + .dev_time_l[0] = GLTSYN_HHTIME_L(0), + .dev_time_h[0] = GLTSYN_HHTIME_H(0), + .dev_time_l[1] = GLTSYN_HHTIME_L(1), + .dev_time_h[1] = GLTSYN_HHTIME_H(1), +}; + +#ifdef CONFIG_ICE_HWTS +static const struct ice_crosststamp_cfg ice_crosststamp_cfg_e830 = { + .lock_reg = E830_PFPTM_SEM, + .lock_busy = E830_PFPTM_SEM_BUSY_M, + .ctl_reg = E830_GLPTM_ART_CTL, + .ctl_active = E830_GLPTM_ART_CTL_ACTIVE_M, + .art_time_l = E830_GLPTM_ART_TIME_L, + .art_time_h = E830_GLPTM_ART_TIME_H, + .dev_time_l[0] = E830_GLTSYN_PTMTIME_L(0), + .dev_time_h[0] = E830_GLTSYN_PTMTIME_H(0), + .dev_time_l[1] = E830_GLTSYN_PTMTIME_L(1), + .dev_time_h[1] = E830_GLTSYN_PTMTIME_H(1), +}; + +#endif /* CONFIG_ICE_HWTS */ +/** + * struct ice_crosststamp_ctx - Device cross timestamp context + * @snapshot: snapshot of system clocks for historic interpolation + * @pf: pointer to the PF private structure + * @cfg: pointer to hardware configuration for cross timestamp + */ +struct ice_crosststamp_ctx { + struct system_time_snapshot snapshot; + struct ice_pf *pf; + const struct ice_crosststamp_cfg *cfg; +}; + +/** + * ice_capture_crosststamp - Capture a device/system cross timestamp * @device: Current device time * @system: System counter value read synchronously with device time - * @ctx: Context provided by timekeeping code + * @__ctx: Context passed from ice_ptp_getcrosststamp * * Read device and system (ART) clock simultaneously and return the corrected * clock values in ns. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_get_syncdevicetime(ktime_t *device, - struct system_counterval_t *system, - void *ctx) +static int ice_capture_crosststamp(ktime_t *device, + struct system_counterval_t *system, + void *__ctx) { - struct ice_pf *pf = (struct ice_pf *)ctx; - struct ice_hw *hw = &pf->hw; - u32 hh_lock, hh_art_ctl; - int i; + struct ice_crosststamp_ctx *ctx = __ctx; + const struct ice_crosststamp_cfg *cfg; + u32 lock, ctl, ts_lo, ts_hi, tmr_idx; + struct ice_pf *pf; + struct ice_hw *hw; + int err; + u64 ts; -#define MAX_HH_HW_LOCK_TRIES 5 -#define MAX_HH_CTL_LOCK_TRIES 100 + cfg = ctx->cfg; + pf = ctx->pf; + hw = &pf->hw; - for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) { - /* Get the HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - if (hh_lock & PFHH_SEM_BUSY_M) { - usleep_range(10000, 15000); - continue; - } - break; - } - if (hh_lock & PFHH_SEM_BUSY_M) { - dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n"); + tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; + if (tmr_idx > 1) + return -EINVAL; + + /* Poll until we obtain the cross-timestamp hardware semaphore */ + err = rd32_poll_timeout(hw, cfg->lock_reg, lock, + !(lock & cfg->lock_busy), + 10 * USEC_PER_MSEC, 50 * USEC_PER_MSEC); + if (err) { + dev_err(ice_pf_to_dev(pf), "PTP failed to get cross timestamp lock\n"); return -EBUSY; } + /* Snapshot system time for historic interpolation */ + ktime_get_snapshot(&ctx->snapshot); + /* Program cmd to master timer */ ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME); /* Start the ART and device clock sync sequence */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M; - wr32(hw, GLHH_ART_CTL, hh_art_ctl); + ctl = rd32(hw, cfg->ctl_reg); + ctl |= cfg->ctl_active; + wr32(hw, cfg->ctl_reg, ctl); - for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) { - /* Wait for sync to complete */ - hh_art_ctl = rd32(hw, GLHH_ART_CTL); - if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) { - udelay(1); - continue; - } else { - u32 hh_ts_lo, hh_ts_hi, tmr_idx; - u64 hh_ts; + /* Poll until hardware completes the capture */ + err = rd32_poll_timeout(hw, cfg->ctl_reg, ctl, !(ctl & cfg->ctl_active), + 5, 20 * USEC_PER_MSEC); + if (err) + goto err_timeout; - tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; - /* Read ART time */ - hh_ts_lo = rd32(hw, GLHH_ART_TIME_L); - hh_ts_hi = rd32(hw, GLHH_ART_TIME_H); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - system->cycles = hh_ts; - system->cs_id = CSID_X86_ART; - /* Read Device source clock time */ - hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx)); - hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx)); - hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo; - *device = ns_to_ktime(hh_ts); - break; - } - } + /* Read ART system time */ + ts_lo = rd32(hw, cfg->art_time_l); + ts_hi = rd32(hw, cfg->art_time_h); + ts = ((u64)ts_hi << 32) | ts_lo; + system->cycles = ts; + system->cs_id = CSID_X86_ART; + system->use_nsecs = true; + /* Read Device source clock time */ + ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); + ts_hi = rd32(hw, cfg->dev_time_h[tmr_idx]); + ts = ((u64)ts_hi << 32) | ts_lo; + *device = ns_to_ktime(ts); + +err_timeout: /* Clear the master timer */ ice_ptp_src_cmd(hw, ICE_PTP_NOP); /* Release HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); - hh_lock = hh_lock & ~PFHH_SEM_BUSY_M; - wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock); + lock = rd32(hw, cfg->lock_reg); + lock &= ~cfg->lock_busy; + wr32(hw, cfg->lock_reg, lock); - if (i == MAX_HH_CTL_LOCK_TRIES) - return -ETIMEDOUT; - - return 0; + return err; } /** - * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp + * ice_ptp_getcrosststamp - Capture a device cross timestamp * @info: the driver's PTP info structure * @cts: The memory to fill the cross timestamp info * @@ -2272,22 +2338,36 @@ ice_ptp_get_syncdevicetime(ktime_t *device, * clock. Fill the cross timestamp information and report it back to the * caller. * - * This is only valid for E822 and E823 devices which have support for - * generating the cross timestamp via PCIe PTM. - * * In order to correctly correlate the ART timestamp back to the TSC time, the * CPU must have X86_FEATURE_TSC_KNOWN_FREQ. + * + * Return: zero on success, or a negative error code on failure. */ -static int -ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info, - struct system_device_crosststamp *cts) +static int ice_ptp_getcrosststamp(struct ptp_clock_info *info, + struct system_device_crosststamp *cts) { struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_crosststamp_ctx ctx = { + .pf = pf, + }; - return get_device_system_crosststamp(ice_ptp_get_syncdevicetime, - pf, NULL, cts); -} + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + ctx.cfg = &ice_crosststamp_cfg_e82x; + break; +#ifdef CONFIG_ICE_HWTS + case ICE_MAC_E830: + ctx.cfg = &ice_crosststamp_cfg_e830; + break; #endif /* CONFIG_ICE_HWTS */ + default: + return -EOPNOTSUPP; + } + + return get_device_system_crosststamp(ice_capture_crosststamp, &ctx, + &ctx.snapshot, cts); +} /** * ice_ptp_get_ts_config - ioctl interface to read the timestamping config @@ -2548,13 +2628,9 @@ static int ice_ptp_parse_sdp_entries(struct ice_pf *pf, __le16 *entries, */ static void ice_ptp_set_funcs_e82x(struct ice_pf *pf) { -#ifdef CONFIG_ICE_HWTS - if (boot_cpu_has(X86_FEATURE_ART) && - boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) - pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp_e82x; + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; -#endif /* CONFIG_ICE_HWTS */ - if (ice_is_e825c(&pf->hw)) { + if (pf->hw.mac_type == ICE_MAC_GENERIC_3K_E825) { pf->ptp.ice_pin_desc = ice_pin_desc_e825c; pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e825c); } else { @@ -2620,6 +2696,28 @@ err: } } +/** + * ice_ptp_set_funcs_e830 - Set specialized functions for E830 support + * @pf: Board private structure + * + * Assign functions to the PTP capabiltiies structure for E830 devices. + * Functions which operate across all device families should be set directly + * in ice_ptp_set_caps. Only add functions here which are distinct for E830 + * devices. + */ +static void ice_ptp_set_funcs_e830(struct ice_pf *pf) +{ +#ifdef CONFIG_ICE_HWTS + if (pcie_ptm_enabled(pf->pdev) && boot_cpu_has(X86_FEATURE_ART)) + pf->ptp.info.getcrosststamp = ice_ptp_getcrosststamp; + +#endif /* CONFIG_ICE_HWTS */ + /* Rest of the config is the same as base E810 */ + pf->ptp.ice_pin_desc = ice_pin_desc_e810; + pf->ptp.info.n_pins = ICE_PIN_DESC_ARR_LEN(ice_pin_desc_e810); + ice_ptp_setup_pin_cfg(pf); +} + /** * ice_ptp_set_caps - Set PTP capabilities * @pf: Board private structure @@ -2642,10 +2740,20 @@ static void ice_ptp_set_caps(struct ice_pf *pf) info->enable = ice_ptp_gpio_enable; info->verify = ice_verify_pin; - if (ice_is_e810(&pf->hw)) + switch (pf->hw.mac_type) { + case ICE_MAC_E810: ice_ptp_set_funcs_e810(pf); - else + return; + case ICE_MAC_E830: + ice_ptp_set_funcs_e830(pf); + return; + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: ice_ptp_set_funcs_e82x(pf); + return; + default: + return; + } } /** @@ -2755,6 +2863,68 @@ enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) } } +/** + * ice_ptp_ts_irq - Process the PTP Tx timestamps in IRQ context + * @pf: Board private structure + * + * Return: IRQ_WAKE_THREAD if Tx timestamp read has to be handled in the bottom + * half of the interrupt and IRQ_HANDLED otherwise. + */ +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + switch (hw->mac_type) { + case ICE_MAC_E810: + /* E810 capable of low latency timestamping with interrupt can + * request a single timestamp in the top half and wait for + * a second LL TS interrupt from the FW when it's ready. + */ + if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { + struct ice_ptp_tx *tx = &pf->ptp.port.tx; + u8 idx, last; + + if (!ice_pf_state_is_nominal(pf)) + return IRQ_HANDLED; + + spin_lock(&tx->lock); + if (tx->init) { + last = tx->last_ll_ts_idx_read + 1; + idx = find_next_bit_wrap(tx->in_use, tx->len, + last); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } + spin_unlock(&tx->lock); + + return IRQ_HANDLED; + } + fallthrough; /* non-LL_TS E810 */ + case ICE_MAC_GENERIC: + case ICE_MAC_GENERIC_3K_E825: + /* All other devices process timestamps in the bottom half due + * to sleeping or polling. + */ + if (!ice_ptp_pf_handles_tx_interrupt(pf)) + return IRQ_HANDLED; + + set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); + return IRQ_WAKE_THREAD; + case ICE_MAC_E830: + /* E830 can read timestamps in the top half using rd32() */ + if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + /* Process outstanding Tx timestamps. If there + * is more work, re-arm the interrupt to trigger again. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } + return IRQ_HANDLED; + default: + return IRQ_HANDLED; + } +} + /** * ice_ptp_maybe_trigger_tx_interrupt - Trigger Tx timstamp interrupt * @pf: Board private structure @@ -2775,7 +2945,7 @@ static void ice_ptp_maybe_trigger_tx_interrupt(struct ice_pf *pf) bool trigger_oicr = false; unsigned int i; - if (ice_is_e810(hw)) + if (!pf->ptp.port.tx.has_ready_bitmap) return; if (!ice_pf_src_tmr_owned(pf)) @@ -2821,6 +2991,32 @@ static void ice_ptp_periodic_work(struct kthread_work *work) msecs_to_jiffies(err ? 10 : 500)); } +/** + * ice_ptp_prepare_rebuild_sec - Prepare second NAC for PTP reset or rebuild + * @pf: Board private structure + * @rebuild: rebuild if true, prepare if false + * @reset_type: the reset type being performed + */ +static void ice_ptp_prepare_rebuild_sec(struct ice_pf *pf, bool rebuild, + enum ice_reset_req reset_type) +{ + struct list_head *entry; + + list_for_each(entry, &pf->adapter->ports.ports) { + struct ice_ptp_port *port = list_entry(entry, + struct ice_ptp_port, + list_node); + struct ice_pf *peer_pf = ptp_port_to_pf(port); + + if (!ice_is_primary(&peer_pf->hw)) { + if (rebuild) + ice_ptp_rebuild(peer_pf, reset_type); + else + ice_ptp_prepare_for_reset(peer_pf, reset_type); + } + } +} + /** * ice_ptp_prepare_for_reset - Prepare PTP for reset * @pf: Board private structure @@ -2829,6 +3025,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_ptp *ptp = &pf->ptp; + struct ice_hw *hw = &pf->hw; u8 src_tmr; if (ptp->state != ICE_PTP_READY) @@ -2844,6 +3041,9 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (reset_type == ICE_RESET_PFR) return; + if (ice_pf_src_tmr_owned(pf) && hw->mac_type == ICE_MAC_GENERIC_3K_E825) + ice_ptp_prepare_rebuild_sec(pf, false, reset_type); + ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); /* Disable periodic outputs */ @@ -2877,6 +3077,10 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) if (err) return err; + err = ice_tspll_init(hw); + if (err) + return err; + /* Acquire the global hardware lock */ if (!ice_ptp_lock(hw)) { err = -EBUSY; @@ -2885,10 +3089,8 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) /* Write the increment time value to PHY and LAN */ err = ice_ptp_write_incval(hw, ice_base_incval(pf)); - if (err) { - ice_ptp_unlock(hw); - return err; - } + if (err) + goto err_unlock; /* Write the initial Time value to PHY and LAN using the cached PHC * time before the reset and time difference between stopping and @@ -2901,10 +3103,8 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) ts = ktime_to_timespec64(ktime_get_real()); } err = ice_ptp_write_init(pf, &ts); - if (err) { - ice_ptp_unlock(hw); - return err; - } + if (err) + goto err_unlock; /* Release the global hardware lock */ ice_ptp_unlock(hw); @@ -2914,20 +3114,22 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) */ ice_ptp_flush_all_tx_tracker(pf); - if (!ice_is_e810(hw)) { - /* Enable quad interrupts */ - err = ice_ptp_cfg_phy_interrupt(pf, true, 1); - if (err) - return err; + /* Enable quad interrupts */ + err = ice_ptp_cfg_phy_interrupt(pf, true, 1); + if (err) + return err; - ice_ptp_restart_all_phy(pf); - } + ice_ptp_restart_all_phy(pf); /* Re-enable all periodic outputs and external timestamp events */ ice_ptp_enable_all_perout(pf); ice_ptp_enable_all_extts(pf); return 0; + +err_unlock: + ice_ptp_unlock(hw); + return err; } /** @@ -2967,12 +3169,6 @@ err: dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err); } -static bool ice_is_primary(struct ice_hw *hw) -{ - return ice_is_e825c(hw) && ice_is_dual(hw) ? - !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) : true; -} - static int ice_ptp_setup_adapter(struct ice_pf *pf) { if (!ice_pf_src_tmr_owned(pf) || !ice_is_primary(&pf->hw)) @@ -2988,7 +3184,7 @@ static int ice_ptp_setup_pf(struct ice_pf *pf) struct ice_ptp *ctrl_ptp = ice_get_ctrl_ptp(pf); struct ice_ptp *ptp = &pf->ptp; - if (WARN_ON(!ctrl_ptp) || ice_get_phy_model(&pf->hw) == ICE_PHY_UNSUP) + if (WARN_ON(!ctrl_ptp) || pf->hw.mac_type == ICE_MAC_UNKNOWN) return -ENODEV; INIT_LIST_HEAD(&ptp->port.list_node); @@ -3005,7 +3201,7 @@ static void ice_ptp_cleanup_pf(struct ice_pf *pf) { struct ice_ptp *ptp = &pf->ptp; - if (ice_get_phy_model(&pf->hw) != ICE_PHY_UNSUP) { + if (pf->hw.mac_type != ICE_MAC_UNKNOWN) { mutex_lock(&pf->adapter->ports.lock); list_del(&ptp->port.list_node); mutex_unlock(&pf->adapter->ports.lock); @@ -3052,6 +3248,13 @@ static int ice_ptp_init_owner(struct ice_pf *pf) return err; } + err = ice_tspll_init(hw); + if (err) { + dev_err(ice_pf_to_dev(pf), "Failed to initialize CGU, status %d\n", + err); + return err; + } + /* Acquire the global hardware lock */ if (!ice_ptp_lock(hw)) { err = -EBUSY; @@ -3060,18 +3263,14 @@ static int ice_ptp_init_owner(struct ice_pf *pf) /* Write the increment time value to PHY and LAN */ err = ice_ptp_write_incval(hw, ice_base_incval(pf)); - if (err) { - ice_ptp_unlock(hw); - goto err_exit; - } + if (err) + goto err_unlock; ts = ktime_to_timespec64(ktime_get_real()); /* Write the initial Time value to PHY and LAN */ err = ice_ptp_write_init(pf, &ts); - if (err) { - ice_ptp_unlock(hw); - goto err_exit; - } + if (err) + goto err_unlock; /* Release the global hardware lock */ ice_ptp_unlock(hw); @@ -3091,6 +3290,10 @@ err_clk: pf->ptp.clock = NULL; err_exit: return err; + +err_unlock: + ice_ptp_unlock(hw); + return err; } /** @@ -3125,6 +3328,8 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) * ice_ptp_init_port - Initialize PTP port structure * @pf: Board private structure * @ptp_port: PTP port structure + * + * Return: 0 on success, -ENODEV on invalid MAC type, -ENOMEM on failed alloc. */ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) { @@ -3132,16 +3337,14 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) mutex_init(&ptp_port->ps_lock); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_tx_eth56g(pf, &ptp_port->tx, - ptp_port->port_num); - case ICE_PHY_E810: - return ice_ptp_init_tx_e810(pf, &ptp_port->tx); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_init_tx(pf, &ptp_port->tx, ptp_port->port_num); + case ICE_MAC_GENERIC: kthread_init_delayed_work(&ptp_port->ov_work, ice_ptp_wait_for_offsets); - return ice_ptp_init_tx_e82x(pf, &ptp_port->tx, ptp_port->port_num); default: @@ -3160,8 +3363,8 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) */ static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf) { - switch (ice_get_phy_model(&pf->hw)) { - case ICE_PHY_E82X: + switch (pf->hw.mac_type) { + case ICE_MAC_GENERIC: /* E822 based PHY has the clock owner process the interrupt * for all ports. */ @@ -3196,6 +3399,12 @@ void ice_ptp_init(struct ice_pf *pf) ptp->state = ICE_PTP_INITIALIZING; + if (hw->lane_num < 0) { + err = hw->lane_num; + goto err_exit; + } + ptp->port.port_num = hw->lane_num; + ice_ptp_init_hw(hw); ice_ptp_init_tx_interrupt_mode(pf); @@ -3216,10 +3425,6 @@ void ice_ptp_init(struct ice_pf *pf) if (err) goto err_exit; - ptp->port.port_num = hw->pf_id; - if (ice_is_e825c(hw) && hw->ptp.is_2x50g_muxed_topo) - ptp->port.port_num = hw->pf_id * 2; - err = ice_ptp_init_port(pf, &ptp->port); if (err) goto err_exit; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 824e73b677..3b769a0cad 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -128,8 +128,7 @@ struct ice_ptp_tx { /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 #define INDEX_PER_PORT_E82X 16 -#define INDEX_PER_PORT_E810 64 -#define INDEX_PER_PORT_ETH56G 64 +#define INDEX_PER_PORT 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -211,6 +210,7 @@ enum ice_ptp_pin_nvm { * struct ice_ptp_pin_desc - hardware pin description data * @name_idx: index of the name of pin in ice_pin_names * @gpio: the associated GPIO input and output pins + * @delay: input and output signal delays in nanoseconds * * Structure describing a PTP-capable GPIO pin that extends ptp_pin_desc array * for the device. Device families have separate sets of available pins with @@ -219,6 +219,7 @@ enum ice_ptp_pin_nvm { struct ice_ptp_pin_desc { int name_idx; int gpio[2]; + unsigned int delay[2]; }; /** @@ -302,6 +303,9 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_req_tx_single_tstamp(struct ice_ptp_tx *tx, u8 idx); void ice_ptp_complete_tx_single_tstamp(struct ice_ptp_tx *tx); enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); +irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf); +u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts); u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx); @@ -310,7 +314,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -340,6 +344,17 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf) return true; } +static inline irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) +{ + return IRQ_HANDLED; +} + +static inline u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf, + struct ptp_system_timestamp *sts) +{ + return 0; +} + static inline u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, const struct ice_pkt_ctx *pkt_ctx) @@ -358,7 +373,7 @@ static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf, } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +static inline void ice_ptp_link_change(struct ice_pf *pf, bool linkup) { } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h index 585ce200c6..19dddd9b53 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h @@ -10,70 +10,25 @@ /* Constants defined for the PTP 1588 clock hardware. */ const struct ice_phy_reg_info_eth56g eth56g_phy_res[NUM_ETH56G_PHY_RES] = { - /* ETH56G_PHY_REG_PTP */ - { - /* base_addr */ - { - 0x092000, - 0x126000, - 0x1BA000, - 0x24E000, - 0x2E2000, - }, - /* step */ - 0x98, + [ETH56G_PHY_REG_PTP] = { + .base_addr = 0x092000, + .step = 0x98, }, - /* ETH56G_PHY_MEM_PTP */ - { - /* base_addr */ - { - 0x093000, - 0x127000, - 0x1BB000, - 0x24F000, - 0x2E3000, - }, - /* step */ - 0x200, + [ETH56G_PHY_MEM_PTP] = { + .base_addr = 0x093000, + .step = 0x200, }, - /* ETH56G_PHY_REG_XPCS */ - { - /* base_addr */ - { - 0x000000, - 0x009400, - 0x128000, - 0x1BC000, - 0x250000, - }, - /* step */ - 0x21000, + [ETH56G_PHY_REG_XPCS] = { + .base_addr = 0x000000, + .step = 0x21000, }, - /* ETH56G_PHY_REG_MAC */ - { - /* base_addr */ - { - 0x085000, - 0x119000, - 0x1AD000, - 0x241000, - 0x2D5000, - }, - /* step */ - 0x1000, + [ETH56G_PHY_REG_MAC] = { + .base_addr = 0x085000, + .step = 0x1000, }, - /* ETH56G_PHY_REG_GPCS */ - { - /* base_addr */ - { - 0x084000, - 0x118000, - 0x1AC000, - 0x240000, - 0x2D4000, - }, - /* step */ - 0x400, + [ETH56G_PHY_REG_GPCS] = { + .base_addr = 0x084000, + .step = 0x400, }, }; @@ -131,7 +86,7 @@ struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD] = { .rx_offset = { .serdes = 0xffffeb27, /* -10.42424 */ .no_fec = 0xffffcccd, /* -25.6 */ - .fc = 0xfffe0014, /* -255.96 */ + .fc = 0xfffc557b, /* -469.26 */ .sfd = 0x4a4, /* 2.32 */ .bs_ds = 0x32 /* 0.0969697 */ } @@ -326,7 +281,7 @@ struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD] = { /* struct ice_time_ref_info_e82x * - * E822 hardware can use different sources as the reference for the PTP + * E82X hardware can use different sources as the reference for the PTP * hardware clock. Each clock has different characteristics such as a slightly * different frequency, etc. * @@ -334,226 +289,53 @@ struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD] = { * reference. See the struct ice_time_ref_info_e82x for information about the * meaning of each constant. */ -const struct ice_time_ref_info_e82x e82x_time_ref[NUM_ICE_TIME_REF_FREQ] = { - /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */ +const struct ice_time_ref_info_e82x e82x_time_ref[NUM_ICE_TSPLL_FREQ] = { + /* ICE_TSPLL_FREQ_25_000 -> 25 MHz */ { /* pll_freq */ 823437500, /* 823.4375 MHz PLL */ /* nominal_incval */ 0x136e44fabULL, - /* pps_delay */ - 11, }, - /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */ + /* ICE_TSPLL_FREQ_122_880 -> 122.88 MHz */ { /* pll_freq */ 783360000, /* 783.36 MHz */ /* nominal_incval */ 0x146cc2177ULL, - /* pps_delay */ - 12, }, - /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */ + /* ICE_TSPLL_FREQ_125_000 -> 125 MHz */ { /* pll_freq */ 796875000, /* 796.875 MHz */ /* nominal_incval */ 0x141414141ULL, - /* pps_delay */ - 12, }, - /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */ + /* ICE_TSPLL_FREQ_153_600 -> 153.6 MHz */ { /* pll_freq */ 816000000, /* 816 MHz */ /* nominal_incval */ 0x139b9b9baULL, - /* pps_delay */ - 12, }, - /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */ + /* ICE_TSPLL_FREQ_156_250 -> 156.25 MHz */ { /* pll_freq */ 830078125, /* 830.78125 MHz */ /* nominal_incval */ 0x134679aceULL, - /* pps_delay */ - 11, }, - /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */ + /* ICE_TSPLL_FREQ_245_760 -> 245.76 MHz */ { /* pll_freq */ 783360000, /* 783.36 MHz */ /* nominal_incval */ 0x146cc2177ULL, - /* pps_delay */ - 12, - }, -}; - -const struct ice_cgu_pll_params_e82x e822_cgu_params[NUM_ICE_TIME_REF_FREQ] = { - /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */ - { - /* refclk_pre_div */ - 1, - /* feedback_div */ - 197, - /* frac_n_div */ - 2621440, - /* post_pll_div */ - 6, - }, - - /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */ - { - /* refclk_pre_div */ - 5, - /* feedback_div */ - 223, - /* frac_n_div */ - 524288, - /* post_pll_div */ - 7, - }, - - /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */ - { - /* refclk_pre_div */ - 5, - /* feedback_div */ - 223, - /* frac_n_div */ - 524288, - /* post_pll_div */ - 7, - }, - - /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */ - { - /* refclk_pre_div */ - 5, - /* feedback_div */ - 159, - /* frac_n_div */ - 1572864, - /* post_pll_div */ - 6, - }, - - /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */ - { - /* refclk_pre_div */ - 5, - /* feedback_div */ - 159, - /* frac_n_div */ - 1572864, - /* post_pll_div */ - 6, - }, - - /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */ - { - /* refclk_pre_div */ - 10, - /* feedback_div */ - 223, - /* frac_n_div */ - 524288, - /* post_pll_div */ - 7, - }, -}; - -const -struct ice_cgu_pll_params_e825c e825c_cgu_params[NUM_ICE_TIME_REF_FREQ] = { - /* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x19, - /* tspll_ndivratio */ - 1, - /* tspll_fbdiv_intgr */ - 320, - /* tspll_fbdiv_frac */ - 0, - /* ref1588_ck_div */ - 0, - }, - - /* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x29, - /* tspll_ndivratio */ - 3, - /* tspll_fbdiv_intgr */ - 195, - /* tspll_fbdiv_frac */ - 1342177280UL, - /* ref1588_ck_div */ - 0, - }, - - /* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x3E, - /* tspll_ndivratio */ - 2, - /* tspll_fbdiv_intgr */ - 128, - /* tspll_fbdiv_frac */ - 0, - /* ref1588_ck_div */ - 0, - }, - - /* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x33, - /* tspll_ndivratio */ - 3, - /* tspll_fbdiv_intgr */ - 156, - /* tspll_fbdiv_frac */ - 1073741824UL, - /* ref1588_ck_div */ - 0, - }, - - /* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x1F, - /* tspll_ndivratio */ - 5, - /* tspll_fbdiv_intgr */ - 256, - /* tspll_fbdiv_frac */ - 0, - /* ref1588_ck_div */ - 0, - }, - - /* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */ - { - /* tspll_ck_refclkfreq */ - 0x52, - /* tspll_ndivratio */ - 3, - /* tspll_fbdiv_intgr */ - 97, - /* tspll_fbdiv_frac */ - 2818572288UL, - /* ref1588_ck_div */ - 0, }, }; @@ -761,9 +543,9 @@ const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD] = { /* rx_desk_rsgb_par */ 644531250, /* 644.53125 MHz Reed Solomon gearbox */ /* tx_desk_rsgb_pcs */ - 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + 390625000, /* 390.625 MHz Reed Solomon gearbox */ /* rx_desk_rsgb_pcs */ - 644531250, /* 644.53125 MHz Reed Solomon gearbox */ + 390625000, /* 390.625 MHz Reed Solomon gearbox */ /* tx_fixed_delay */ 1620, /* pmd_adj_divisor */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index ce9ee05c0e..e8e439fd64 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -6,7 +6,6 @@ #include "ice_common.h" #include "ice_ptp_hw.h" #include "ice_ptp_consts.h" -#include "ice_cgu_regs.h" static struct dpll_pin_frequency ice_cgu_pin_freq_common[] = { DPLL_PIN_FREQUENCY_1PPS, @@ -225,547 +224,6 @@ static u64 ice_ptp_read_src_incval(struct ice_hw *hw) return ((u64)(hi & INCVAL_HIGH_M) << 32) | lo; } -/** - * ice_read_cgu_reg_e82x - Read a CGU register - * @hw: pointer to the HW struct - * @addr: Register address to read - * @val: storage for register value read - * - * Read the contents of a register of the Clock Generation Unit. Only - * applicable to E822 devices. - * - * Return: 0 on success, other error codes when failed to read from CGU - */ -static int ice_read_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 *val) -{ - struct ice_sbq_msg_input cgu_msg = { - .opcode = ice_sbq_msg_rd, - .dest_dev = cgu, - .msg_addr_low = addr - }; - int err; - - err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n", - addr, err); - return err; - } - - *val = cgu_msg.data; - - return 0; -} - -/** - * ice_write_cgu_reg_e82x - Write a CGU register - * @hw: pointer to the HW struct - * @addr: Register address to write - * @val: value to write into the register - * - * Write the specified value to a register of the Clock Generation Unit. Only - * applicable to E822 devices. - * - * Return: 0 on success, other error codes when failed to write to CGU - */ -static int ice_write_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 val) -{ - struct ice_sbq_msg_input cgu_msg = { - .opcode = ice_sbq_msg_wr, - .dest_dev = cgu, - .msg_addr_low = addr, - .data = val - }; - int err; - - err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n", - addr, err); - return err; - } - - return err; -} - -/** - * ice_clk_freq_str - Convert time_ref_freq to string - * @clk_freq: Clock frequency - * - * Return: specified TIME_REF clock frequency converted to a string - */ -static const char *ice_clk_freq_str(enum ice_time_ref_freq clk_freq) -{ - switch (clk_freq) { - case ICE_TIME_REF_FREQ_25_000: - return "25 MHz"; - case ICE_TIME_REF_FREQ_122_880: - return "122.88 MHz"; - case ICE_TIME_REF_FREQ_125_000: - return "125 MHz"; - case ICE_TIME_REF_FREQ_153_600: - return "153.6 MHz"; - case ICE_TIME_REF_FREQ_156_250: - return "156.25 MHz"; - case ICE_TIME_REF_FREQ_245_760: - return "245.76 MHz"; - default: - return "Unknown"; - } -} - -/** - * ice_clk_src_str - Convert time_ref_src to string - * @clk_src: Clock source - * - * Return: specified clock source converted to its string name - */ -static const char *ice_clk_src_str(enum ice_clk_src clk_src) -{ - switch (clk_src) { - case ICE_CLK_SRC_TCXO: - return "TCXO"; - case ICE_CLK_SRC_TIME_REF: - return "TIME_REF"; - default: - return "Unknown"; - } -} - -/** - * ice_cfg_cgu_pll_e82x - Configure the Clock Generation Unit - * @hw: pointer to the HW struct - * @clk_freq: Clock frequency to program - * @clk_src: Clock source to select (TIME_REF, or TCXO) - * - * Configure the Clock Generation Unit with the desired clock frequency and - * time reference, enabling the PLL which drives the PTP hardware clock. - * - * Return: - * * %0 - success - * * %-EINVAL - input parameters are incorrect - * * %-EBUSY - failed to lock TS PLL - * * %other - CGU read/write failure - */ -static int ice_cfg_cgu_pll_e82x(struct ice_hw *hw, - enum ice_time_ref_freq clk_freq, - enum ice_clk_src clk_src) -{ - union tspll_ro_bwm_lf bwm_lf; - union nac_cgu_dword19 dw19; - union nac_cgu_dword22 dw22; - union nac_cgu_dword24 dw24; - union nac_cgu_dword9 dw9; - int err; - - if (clk_freq >= NUM_ICE_TIME_REF_FREQ) { - dev_warn(ice_hw_to_dev(hw), "Invalid TIME_REF frequency %u\n", - clk_freq); - return -EINVAL; - } - - if (clk_src >= NUM_ICE_CLK_SRC) { - dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n", - clk_src); - return -EINVAL; - } - - if (clk_src == ICE_CLK_SRC_TCXO && - clk_freq != ICE_TIME_REF_FREQ_25_000) { - dev_warn(ice_hw_to_dev(hw), - "TCXO only supports 25 MHz frequency\n"); - return -EINVAL; - } - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD9, &dw9.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD24, &dw24.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, TSPLL_RO_BWM_LF, &bwm_lf.val); - if (err) - return err; - - /* Log the current clock configuration */ - ice_debug(hw, ICE_DBG_PTP, "Current CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", - dw24.ts_pll_enable ? "enabled" : "disabled", - ice_clk_src_str(dw24.time_ref_sel), - ice_clk_freq_str(dw9.time_ref_freq_sel), - bwm_lf.plllock_true_lock_cri ? "locked" : "unlocked"); - - /* Disable the PLL before changing the clock source or frequency */ - if (dw24.ts_pll_enable) { - dw24.ts_pll_enable = 0; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD24, dw24.val); - if (err) - return err; - } - - /* Set the frequency */ - dw9.time_ref_freq_sel = clk_freq; - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD9, dw9.val); - if (err) - return err; - - /* Configure the TS PLL feedback divisor */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD19, &dw19.val); - if (err) - return err; - - dw19.tspll_fbdiv_intgr = e822_cgu_params[clk_freq].feedback_div; - dw19.tspll_ndivratio = 1; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD19, dw19.val); - if (err) - return err; - - /* Configure the TS PLL post divisor */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD22, &dw22.val); - if (err) - return err; - - dw22.time1588clk_div = e822_cgu_params[clk_freq].post_pll_div; - dw22.time1588clk_sel_div2 = 0; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD22, dw22.val); - if (err) - return err; - - /* Configure the TS PLL pre divisor and clock source */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD24, &dw24.val); - if (err) - return err; - - dw24.ref1588_ck_div = e822_cgu_params[clk_freq].refclk_pre_div; - dw24.tspll_fbdiv_frac = e822_cgu_params[clk_freq].frac_n_div; - dw24.time_ref_sel = clk_src; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD24, dw24.val); - if (err) - return err; - - /* Finally, enable the PLL */ - dw24.ts_pll_enable = 1; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD24, dw24.val); - if (err) - return err; - - /* Wait to verify if the PLL locks */ - usleep_range(1000, 5000); - - err = ice_read_cgu_reg_e82x(hw, TSPLL_RO_BWM_LF, &bwm_lf.val); - if (err) - return err; - - if (!bwm_lf.plllock_true_lock_cri) { - dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n"); - return -EBUSY; - } - - /* Log the current clock configuration */ - ice_debug(hw, ICE_DBG_PTP, "New CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", - dw24.ts_pll_enable ? "enabled" : "disabled", - ice_clk_src_str(dw24.time_ref_sel), - ice_clk_freq_str(dw9.time_ref_freq_sel), - bwm_lf.plllock_true_lock_cri ? "locked" : "unlocked"); - - return 0; -} - -/** - * ice_cfg_cgu_pll_e825c - Configure the Clock Generation Unit for E825-C - * @hw: pointer to the HW struct - * @clk_freq: Clock frequency to program - * @clk_src: Clock source to select (TIME_REF, or TCXO) - * - * Configure the Clock Generation Unit with the desired clock frequency and - * time reference, enabling the PLL which drives the PTP hardware clock. - * - * Return: - * * %0 - success - * * %-EINVAL - input parameters are incorrect - * * %-EBUSY - failed to lock TS PLL - * * %other - CGU read/write failure - */ -static int ice_cfg_cgu_pll_e825c(struct ice_hw *hw, - enum ice_time_ref_freq clk_freq, - enum ice_clk_src clk_src) -{ - union tspll_ro_lock_e825c ro_lock; - union nac_cgu_dword16_e825c dw16; - union nac_cgu_dword23_e825c dw23; - union nac_cgu_dword19 dw19; - union nac_cgu_dword22 dw22; - union nac_cgu_dword24 dw24; - union nac_cgu_dword9 dw9; - int err; - - if (clk_freq >= NUM_ICE_TIME_REF_FREQ) { - dev_warn(ice_hw_to_dev(hw), "Invalid TIME_REF frequency %u\n", - clk_freq); - return -EINVAL; - } - - if (clk_src >= NUM_ICE_CLK_SRC) { - dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n", - clk_src); - return -EINVAL; - } - - if (clk_src == ICE_CLK_SRC_TCXO && - clk_freq != ICE_TIME_REF_FREQ_156_250) { - dev_warn(ice_hw_to_dev(hw), - "TCXO only supports 156.25 MHz frequency\n"); - return -EINVAL; - } - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD9, &dw9.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD24, &dw24.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD16_E825C, &dw16.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD23_E825C, &dw23.val); - if (err) - return err; - - err = ice_read_cgu_reg_e82x(hw, TSPLL_RO_LOCK_E825C, &ro_lock.val); - if (err) - return err; - - /* Log the current clock configuration */ - ice_debug(hw, ICE_DBG_PTP, "Current CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", - dw24.ts_pll_enable ? "enabled" : "disabled", - ice_clk_src_str(dw23.time_ref_sel), - ice_clk_freq_str(dw9.time_ref_freq_sel), - ro_lock.plllock_true_lock_cri ? "locked" : "unlocked"); - - /* Disable the PLL before changing the clock source or frequency */ - if (dw23.ts_pll_enable) { - dw23.ts_pll_enable = 0; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD23_E825C, - dw23.val); - if (err) - return err; - } - - /* Set the frequency */ - dw9.time_ref_freq_sel = clk_freq; - - /* Enable the correct receiver */ - if (clk_src == ICE_CLK_SRC_TCXO) { - dw9.time_ref_en = 0; - dw9.clk_eref0_en = 1; - } else { - dw9.time_ref_en = 1; - dw9.clk_eref0_en = 0; - } - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD9, dw9.val); - if (err) - return err; - - /* Choose the referenced frequency */ - dw16.tspll_ck_refclkfreq = - e825c_cgu_params[clk_freq].tspll_ck_refclkfreq; - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD16_E825C, dw16.val); - if (err) - return err; - - /* Configure the TS PLL feedback divisor */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD19, &dw19.val); - if (err) - return err; - - dw19.tspll_fbdiv_intgr = - e825c_cgu_params[clk_freq].tspll_fbdiv_intgr; - dw19.tspll_ndivratio = - e825c_cgu_params[clk_freq].tspll_ndivratio; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD19, dw19.val); - if (err) - return err; - - /* Configure the TS PLL post divisor */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD22, &dw22.val); - if (err) - return err; - - /* These two are constant for E825C */ - dw22.time1588clk_div = 5; - dw22.time1588clk_sel_div2 = 0; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD22, dw22.val); - if (err) - return err; - - /* Configure the TS PLL pre divisor and clock source */ - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD23_E825C, &dw23.val); - if (err) - return err; - - dw23.ref1588_ck_div = - e825c_cgu_params[clk_freq].ref1588_ck_div; - dw23.time_ref_sel = clk_src; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD23_E825C, dw23.val); - if (err) - return err; - - dw24.tspll_fbdiv_frac = - e825c_cgu_params[clk_freq].tspll_fbdiv_frac; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD24, dw24.val); - if (err) - return err; - - /* Finally, enable the PLL */ - dw23.ts_pll_enable = 1; - - err = ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD23_E825C, dw23.val); - if (err) - return err; - - /* Wait to verify if the PLL locks */ - usleep_range(1000, 5000); - - err = ice_read_cgu_reg_e82x(hw, TSPLL_RO_LOCK_E825C, &ro_lock.val); - if (err) - return err; - - if (!ro_lock.plllock_true_lock_cri) { - dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n"); - return -EBUSY; - } - - /* Log the current clock configuration */ - ice_debug(hw, ICE_DBG_PTP, "New CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n", - dw24.ts_pll_enable ? "enabled" : "disabled", - ice_clk_src_str(dw23.time_ref_sel), - ice_clk_freq_str(dw9.time_ref_freq_sel), - ro_lock.plllock_true_lock_cri ? "locked" : "unlocked"); - - return 0; -} - -#define ICE_ONE_PPS_OUT_AMP_MAX 3 - -/** - * ice_cgu_cfg_pps_out - Configure 1PPS output from CGU - * @hw: pointer to the HW struct - * @enable: true to enable 1PPS output, false to disable it - * - * Return: 0 on success, other negative error code when CGU read/write failed - */ -int ice_cgu_cfg_pps_out(struct ice_hw *hw, bool enable) -{ - union nac_cgu_dword9 dw9; - int err; - - err = ice_read_cgu_reg_e82x(hw, NAC_CGU_DWORD9, &dw9.val); - if (err) - return err; - - dw9.one_pps_out_en = enable; - dw9.one_pps_out_amp = enable * ICE_ONE_PPS_OUT_AMP_MAX; - return ice_write_cgu_reg_e82x(hw, NAC_CGU_DWORD9, dw9.val); -} - -/** - * ice_cfg_cgu_pll_dis_sticky_bits_e82x - disable TS PLL sticky bits - * @hw: pointer to the HW struct - * - * Configure the Clock Generation Unit TS PLL sticky bits so they don't latch on - * losing TS PLL lock, but always show current state. - * - * Return: 0 on success, other error codes when failed to read/write CGU - */ -static int ice_cfg_cgu_pll_dis_sticky_bits_e82x(struct ice_hw *hw) -{ - union tspll_cntr_bist_settings cntr_bist; - int err; - - err = ice_read_cgu_reg_e82x(hw, TSPLL_CNTR_BIST_SETTINGS, - &cntr_bist.val); - if (err) - return err; - - /* Disable sticky lock detection so lock err reported is accurate */ - cntr_bist.i_plllock_sel_0 = 0; - cntr_bist.i_plllock_sel_1 = 0; - - return ice_write_cgu_reg_e82x(hw, TSPLL_CNTR_BIST_SETTINGS, - cntr_bist.val); -} - -/** - * ice_cfg_cgu_pll_dis_sticky_bits_e825c - disable TS PLL sticky bits for E825-C - * @hw: pointer to the HW struct - * - * Configure the Clock Generation Unit TS PLL sticky bits so they don't latch on - * losing TS PLL lock, but always show current state. - * - * Return: 0 on success, other error codes when failed to read/write CGU - */ -static int ice_cfg_cgu_pll_dis_sticky_bits_e825c(struct ice_hw *hw) -{ - union tspll_bw_tdc_e825c bw_tdc; - int err; - - err = ice_read_cgu_reg_e82x(hw, TSPLL_BW_TDC_E825C, &bw_tdc.val); - if (err) - return err; - - bw_tdc.i_plllock_sel_1_0 = 0; - - return ice_write_cgu_reg_e82x(hw, TSPLL_BW_TDC_E825C, bw_tdc.val); -} - -/** - * ice_init_cgu_e82x - Initialize CGU with settings from firmware - * @hw: pointer to the HW structure - * - * Initialize the Clock Generation Unit of the E822 device. - * - * Return: 0 on success, other error codes when failed to read/write/cfg CGU - */ -static int ice_init_cgu_e82x(struct ice_hw *hw) -{ - struct ice_ts_func_info *ts_info = &hw->func_caps.ts_func_info; - int err; - - /* Disable sticky lock detection so lock err reported is accurate */ - if (ice_is_e825c(hw)) - err = ice_cfg_cgu_pll_dis_sticky_bits_e825c(hw); - else - err = ice_cfg_cgu_pll_dis_sticky_bits_e82x(hw); - if (err) - return err; - - /* Configure the CGU PLL using the parameters from the function - * capabilities. - */ - if (ice_is_e825c(hw)) - err = ice_cfg_cgu_pll_e825c(hw, ts_info->time_ref, - (enum ice_clk_src)ts_info->clk_src); - else - err = ice_cfg_cgu_pll_e82x(hw, ts_info->time_ref, - (enum ice_clk_src)ts_info->clk_src); - - return err; -} - /** * ice_ptp_tmr_cmd_to_src_reg - Convert to source timer command value * @hw: pointer to HW struct @@ -827,8 +285,9 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw, /* Certain hardware families share the same register values for the * port register and source timer register. */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ice_ptp_tmr_cmd_to_src_reg(hw, cmd) & TS_CMD_MASK_E810; default: break; @@ -873,8 +332,12 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw, */ void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 cmd_val = ice_ptp_tmr_cmd_to_src_reg(hw, cmd); + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + wr32(hw, GLTSYN_CMD, cmd_val); } @@ -890,41 +353,78 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw) { struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + if (!ice_is_primary(hw)) + hw = ice_get_primary_hw(pf); + guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock); wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD); ice_flush(hw); } +/** + * ice_ptp_cfg_sync_delay - Configure PHC to PHY synchronization delay + * @hw: pointer to HW struct + * @delay: delay between PHC and PHY SYNC command execution in nanoseconds + */ +static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay) +{ + wr32(hw, GLTSYN_SYNC_DLAY, delay); + ice_flush(hw); +} + /* 56G PHY device functions * * The following functions operate on devices with the ETH 56G PHY. */ +/** + * ice_ptp_get_dest_dev_e825 - get destination PHY for given port number + * @hw: pointer to the HW struct + * @port: destination port + * + * Return: destination sideband queue PHY device. + */ +static enum ice_sbq_dev_id ice_ptp_get_dest_dev_e825(struct ice_hw *hw, + u8 port) +{ + u8 curr_phy, tgt_phy; + + tgt_phy = port >= hw->ptp.ports_per_phy; + curr_phy = hw->lane_num >= hw->ptp.ports_per_phy; + /* In the driver, lanes 4..7 are in fact 0..3 on a second PHY. + * On a single complex E825C, PHY 0 is always destination device phy_0 + * and PHY 1 is phy_0_peer. + * On dual complex E825C, device phy_0 points to PHY on a current + * complex and phy_0_peer to PHY on a different complex. + */ + if ((!ice_is_dual(hw) && tgt_phy == 1) || + (ice_is_dual(hw) && tgt_phy != curr_phy)) + return ice_sbq_dev_phy_0_peer; + else + return ice_sbq_dev_phy_0; +} + /** * ice_write_phy_eth56g - Write a PHY port register * @hw: pointer to the HW struct - * @phy_idx: PHY index + * @port: destination port * @addr: PHY register address * @val: Value to write * * Return: 0 on success, other error codes when failed to write to PHY */ -static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, - u32 val) +static int ice_write_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 val) { - struct ice_sbq_msg_input phy_msg; + struct ice_sbq_msg_input msg = { + .dest_dev = ice_ptp_get_dest_dev_e825(hw, port), + .opcode = ice_sbq_msg_wr, + .msg_addr_low = lower_16_bits(addr), + .msg_addr_high = upper_16_bits(addr), + .data = val + }; int err; - phy_msg.opcode = ice_sbq_msg_wr; - - phy_msg.msg_addr_low = lower_16_bits(addr); - phy_msg.msg_addr_high = upper_16_bits(addr); - - phy_msg.data = val; - phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx]; - - err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD); - + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n", err); @@ -935,41 +435,36 @@ static int ice_write_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, /** * ice_read_phy_eth56g - Read a PHY port register * @hw: pointer to the HW struct - * @phy_idx: PHY index + * @port: destination port * @addr: PHY register address * @val: Value to write * * Return: 0 on success, other error codes when failed to read from PHY */ -static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, - u32 *val) +static int ice_read_phy_eth56g(struct ice_hw *hw, u8 port, u32 addr, u32 *val) { - struct ice_sbq_msg_input phy_msg; + struct ice_sbq_msg_input msg = { + .dest_dev = ice_ptp_get_dest_dev_e825(hw, port), + .opcode = ice_sbq_msg_rd, + .msg_addr_low = lower_16_bits(addr), + .msg_addr_high = upper_16_bits(addr) + }; int err; - phy_msg.opcode = ice_sbq_msg_rd; - - phy_msg.msg_addr_low = lower_16_bits(addr); - phy_msg.msg_addr_high = upper_16_bits(addr); - - phy_msg.data = 0; - phy_msg.dest_dev = hw->ptp.phy.eth56g.phy_addr[phy_idx]; - - err = ice_sbq_rw_reg(hw, &phy_msg, ICE_AQ_FLAG_RD); - if (err) { + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); + if (err) ice_debug(hw, ICE_DBG_PTP, "PTP failed to send msg to phy %d\n", err); - return err; - } + else + *val = msg.data; - *val = phy_msg.data; - - return 0; + return err; } /** * ice_phy_res_address_eth56g - Calculate a PHY port register address - * @port: Port number to be written + * @hw: pointer to the HW struct + * @lane: Lane number to be written * @res_type: resource type (register/memory) * @offset: Offset from PHY port register base * @addr: The result address @@ -978,17 +473,19 @@ static int ice_read_phy_eth56g(struct ice_hw *hw, u8 phy_idx, u32 addr, * * %0 - success * * %EINVAL - invalid port number or resource type */ -static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type, - u32 offset, u32 *addr) +static int ice_phy_res_address_eth56g(struct ice_hw *hw, u8 lane, + enum eth56g_res_type res_type, + u32 offset, + u32 *addr) { - u8 lane = port % ICE_PORTS_PER_QUAD; - u8 phy = ICE_GET_QUAD_NUM(port); - if (res_type >= NUM_ETH56G_PHY_RES) return -EINVAL; - *addr = eth56g_phy_res[res_type].base[phy] + + /* Lanes 4..7 are in fact 0..3 on a second PHY */ + lane %= hw->ptp.ports_per_phy; + *addr = eth56g_phy_res[res_type].base_addr + lane * eth56g_phy_res[res_type].step + offset; + return 0; } @@ -1008,19 +505,17 @@ static int ice_phy_res_address_eth56g(u8 port, enum eth56g_res_type res_type, static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, u32 val, enum eth56g_res_type res_type) { - u8 phy_port = port % hw->ptp.ports_per_phy; - u8 phy_idx = port / hw->ptp.ports_per_phy; u32 addr; int err; if (port >= hw->ptp.num_lports) return -EINVAL; - err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr); + err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr); if (err) return err; - return ice_write_phy_eth56g(hw, phy_idx, addr, val); + return ice_write_phy_eth56g(hw, port, addr, val); } /** @@ -1039,19 +534,17 @@ static int ice_write_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, static int ice_read_port_eth56g(struct ice_hw *hw, u8 port, u32 offset, u32 *val, enum eth56g_res_type res_type) { - u8 phy_port = port % hw->ptp.ports_per_phy; - u8 phy_idx = port / hw->ptp.ports_per_phy; u32 addr; int err; if (port >= hw->ptp.num_lports) return -EINVAL; - err = ice_phy_res_address_eth56g(phy_port, res_type, offset, &addr); + err = ice_phy_res_address_eth56g(hw, port, res_type, offset, &addr); if (err) return err; - return ice_read_phy_eth56g(hw, phy_idx, addr, val); + return ice_read_phy_eth56g(hw, port, addr, val); } /** @@ -1200,6 +693,56 @@ static int ice_write_port_mem_eth56g(struct ice_hw *hw, u8 port, u16 offset, return ice_write_port_eth56g(hw, port, offset, val, ETH56G_PHY_MEM_PTP); } +/** + * ice_write_quad_ptp_reg_eth56g - Write a PHY quad register + * @hw: pointer to the HW struct + * @offset: PHY register offset + * @port: Port number + * @val: Value to write + * + * Return: + * * %0 - success + * * %EIO - invalid port number or resource type + * * %other - failed to write to PHY + */ +static int ice_write_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, + u32 offset, u32 val) +{ + u32 addr; + + if (port >= hw->ptp.num_lports) + return -EIO; + + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base_addr + offset; + + return ice_write_phy_eth56g(hw, port, addr, val); +} + +/** + * ice_read_quad_ptp_reg_eth56g - Read a PHY quad register + * @hw: pointer to the HW struct + * @offset: PHY register offset + * @port: Port number + * @val: Value to read + * + * Return: + * * %0 - success + * * %EIO - invalid port number or resource type + * * %other - failed to read from PHY + */ +static int ice_read_quad_ptp_reg_eth56g(struct ice_hw *hw, u8 port, + u32 offset, u32 *val) +{ + u32 addr; + + if (port >= hw->ptp.num_lports) + return -EIO; + + addr = eth56g_phy_res[ETH56G_PHY_REG_PTP].base_addr + offset; + + return ice_read_phy_eth56g(hw, port, addr, val); +} + /** * ice_is_64b_phy_reg_eth56g - Check if this is a 64bit PHY register * @low_addr: the low address to check @@ -1518,8 +1061,8 @@ static int ice_read_ptp_tstamp_eth56g(struct ice_hw *hw, u8 port, u8 idx, * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = ((u64)hi) << TS_PHY_HIGH_S | ((u64)lo & TS_PHY_LOW_M); - + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -1918,7 +1461,6 @@ ice_phy_get_speed_eth56g(struct ice_link_status *li) */ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) { - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); u32 val; int err; @@ -1933,8 +1475,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) switch (ice_phy_get_speed_eth56g(&hw->port_info->phy.link_info)) { case ICE_ETH56G_LNK_SPD_1G: case ICE_ETH56G_LNK_SPD_2_5G: - err = ice_read_ptp_reg_eth56g(hw, port_blk, - PHY_GPCS_CONFIG_REG0, &val); + err = ice_read_quad_ptp_reg_eth56g(hw, port, + PHY_GPCS_CONFIG_REG0, &val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read PHY_GPCS_CONFIG_REG0, status: %d", err); @@ -1945,8 +1487,8 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) val |= FIELD_PREP(PHY_GPCS_CONFIG_REG0_TX_THR_M, ICE_ETH56G_NOMINAL_TX_THRESH); - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_GPCS_CONFIG_REG0, val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, + PHY_GPCS_CONFIG_REG0, val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to write PHY_GPCS_CONFIG_REG0, status: %d", err); @@ -1987,50 +1529,47 @@ static int ice_phy_cfg_parpcs_eth56g(struct ice_hw *hw, u8 port) */ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port) { - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); - u8 blk_port = port & (ICE_PORTS_PER_QUAD - 1); + u8 quad_lane = port % ICE_PORTS_PER_QUAD; + u32 addr, val, peer_delay; bool enable, sfd_ena; - u32 val, peer_delay; int err; enable = hw->ptp.phy.eth56g.onestep_ena; peer_delay = hw->ptp.phy.eth56g.peer_delay; sfd_ena = hw->ptp.phy.eth56g.sfd_ena; - /* PHY_PTP_1STEP_CONFIG */ - err = ice_read_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, &val); + addr = PHY_PTP_1STEP_CONFIG; + err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &val); if (err) return err; if (enable) - val |= blk_port; + val |= BIT(quad_lane); else - val &= ~blk_port; + val &= ~BIT(quad_lane); val &= ~(PHY_PTP_1STEP_T1S_UP64_M | PHY_PTP_1STEP_T1S_DELTA_M); - err = ice_write_ptp_reg_eth56g(hw, port_blk, PHY_PTP_1STEP_CONFIG, val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; - /* PHY_PTP_1STEP_PEER_DELAY */ + addr = PHY_PTP_1STEP_PEER_DELAY(quad_lane); val = FIELD_PREP(PHY_PTP_1STEP_PD_DELAY_M, peer_delay); if (peer_delay) val |= PHY_PTP_1STEP_PD_ADD_PD_M; val |= PHY_PTP_1STEP_PD_DLY_V_M; - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_PTP_1STEP_PEER_DELAY(blk_port), val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; val &= ~PHY_PTP_1STEP_PD_DLY_V_M; - err = ice_write_ptp_reg_eth56g(hw, port_blk, - PHY_PTP_1STEP_PEER_DELAY(blk_port), val); + err = ice_write_quad_ptp_reg_eth56g(hw, port, addr, val); if (err) return err; - /* PHY_MAC_XIF_MODE */ - err = ice_read_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, &val); + addr = PHY_MAC_XIF_MODE; + err = ice_read_mac_reg_eth56g(hw, port, addr, &val); if (err) return err; @@ -2050,7 +1589,7 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port) FIELD_PREP(PHY_MAC_XIF_TS_BIN_MODE_M, enable) | FIELD_PREP(PHY_MAC_XIF_TS_SFD_ENA_M, sfd_ena); - return ice_write_mac_reg_eth56g(hw, port, PHY_MAC_XIF_MODE, val); + return ice_write_mac_reg_eth56g(hw, port, addr, val); } /** @@ -2092,21 +1631,22 @@ static u32 ice_ptp_calc_bitslip_eth56g(struct ice_hw *hw, u8 port, u32 bs, bool fc, bool rs, enum ice_eth56g_link_spd spd) { - u8 port_offset = port & (ICE_PORTS_PER_QUAD - 1); - u8 port_blk = port & ~(ICE_PORTS_PER_QUAD - 1); u32 bitslip; int err; if (!bs || rs) return 0; - if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G) + if (spd == ICE_ETH56G_LNK_SPD_1G || spd == ICE_ETH56G_LNK_SPD_2_5G) { err = ice_read_gpcs_reg_eth56g(hw, port, PHY_GPCS_BITSLIP, &bitslip); - else - err = ice_read_ptp_reg_eth56g(hw, port_blk, - PHY_REG_SD_BIT_SLIP(port_offset), - &bitslip); + } else { + u8 quad_lane = port % ICE_PORTS_PER_QUAD; + u32 addr; + + addr = PHY_REG_SD_BIT_SLIP(quad_lane); + err = ice_read_quad_ptp_reg_eth56g(hw, port, addr, &bitslip); + } if (err) return 0; @@ -2350,6 +1890,7 @@ int ice_phy_cfg_intr_eth56g(struct ice_hw *hw, u8 port, bool ena, u8 threshold) static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port, u64 *phy_time, u64 *phc_time) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u64 tx_time, rx_time; u32 zo, lo; u8 tmr_idx; @@ -2369,8 +1910,13 @@ static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port, ice_ptp_exec_tmr_cmd(hw); /* Read the captured PHC time from the shadow time registers */ - zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx)); - lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx)); + if (ice_is_primary(hw)) { + zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx)); + lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx)); + } else { + zo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_0(tmr_idx)); + lo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_L(tmr_idx)); + } *phc_time = (u64)lo << 32 | zo; /* Read the captured PHY time from the PHY shadow registers */ @@ -2507,6 +2053,7 @@ int ice_stop_phy_timer_eth56g(struct ice_hw *hw, u8 port, bool soft_reset) */ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) { + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); u32 lo, hi; u64 incval; u8 tmr_idx; @@ -2532,8 +2079,13 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) if (err) return err; - lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); - hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + if (ice_is_primary(hw)) { + lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx)); + } else { + lo = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_L(tmr_idx)); + hi = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_H(tmr_idx)); + } incval = (u64)hi << 32 | lo; err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval); @@ -2563,42 +2115,6 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port) return 0; } -/** - * ice_sb_access_ena_eth56g - Enable SB devices (PHY and others) access - * @hw: pointer to HW struct - * @enable: Enable or disable access - * - * Enable sideband devices (PHY and others) access. - */ -static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable) -{ - u32 val = rd32(hw, PF_SB_REM_DEV_CTL); - - if (enable) - val |= BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1); - else - val &= ~(BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1)); - - wr32(hw, PF_SB_REM_DEV_CTL, val); -} - -/** - * ice_ptp_init_phc_eth56g - Perform E82X specific PHC initialization - * @hw: pointer to HW struct - * - * Perform PHC initialization steps specific to E82X devices. - * - * Return: - * * %0 - success - * * %other - failed to initialize CGU - */ -static int ice_ptp_init_phc_eth56g(struct ice_hw *hw) -{ - ice_sb_access_ena_eth56g(hw, true); - /* Initialize the Clock Generation Unit */ - return ice_init_cgu_e82x(hw); -} - /** * ice_ptp_read_tx_hwtstamp_status_eth56g - Get TX timestamp status * @hw: pointer to the HW struct @@ -2666,59 +2182,21 @@ static int ice_get_phy_tx_tstamp_ready_eth56g(struct ice_hw *hw, u8 port, } /** - * ice_is_muxed_topo - detect breakout 2x50G topology for E825C - * @hw: pointer to the HW struct - * - * Return: true if it's 2x50 breakout topology, false otherwise - */ -static bool ice_is_muxed_topo(struct ice_hw *hw) -{ - u8 link_topo; - bool mux; - u32 val; - - val = rd32(hw, GLGEN_SWITCH_MODE_CONFIG); - mux = FIELD_GET(GLGEN_SWITCH_MODE_CONFIG_25X4_QUAD_M, val); - val = rd32(hw, GLGEN_MAC_LINK_TOPO); - link_topo = FIELD_GET(GLGEN_MAC_LINK_TOPO_LINK_TOPO_M, val); - - return (mux && link_topo == ICE_LINK_TOPO_UP_TO_2_LINKS); -} - -/** - * ice_ptp_init_phy_e825c - initialize PHY parameters + * ice_ptp_init_phy_e825 - initialize PHY parameters * @hw: pointer to the HW struct */ -static void ice_ptp_init_phy_e825c(struct ice_hw *hw) +static void ice_ptp_init_phy_e825(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; struct ice_eth56g_params *params; - u8 phy; - ptp->phy_model = ICE_PHY_ETH56G; params = &ptp->phy.eth56g; params->onestep_ena = false; params->peer_delay = 0; params->sfd_ena = false; - params->phy_addr[0] = eth56g_phy_0; - params->phy_addr[1] = eth56g_phy_1; params->num_phys = 2; ptp->ports_per_phy = 4; ptp->num_lports = params->num_phys * ptp->ports_per_phy; - - ice_sb_access_ena_eth56g(hw, true); - for (phy = 0; phy < params->num_phys; phy++) { - u32 phy_rev; - int err; - - err = ice_read_phy_eth56g(hw, phy, PHY_REG_REVISION, &phy_rev); - if (err || phy_rev != PHY_REVISION_ETH56G) { - ptp->phy_model = ICE_PHY_UNSUP; - return; - } - } - - ptp->is_2x50g_muxed_topo = ice_is_muxed_topo(hw); } /* E822 family functions @@ -2737,10 +2215,9 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw, struct ice_sbq_msg_input *msg, u8 port, u16 offset) { - int phy_port, phy, quadtype; + int phy_port, quadtype; phy_port = port % hw->ptp.ports_per_phy; - phy = port / hw->ptp.ports_per_phy; quadtype = ICE_GET_QUAD_NUM(port) % ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy); @@ -2752,12 +2229,7 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw, msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port); } - if (phy == 0) - msg->dest_dev = rmn_0; - else if (phy == 1) - msg->dest_dev = rmn_1; - else - msg->dest_dev = rmn_2; + msg->dest_dev = ice_sbq_dev_phy_0; } /** @@ -3080,7 +2552,7 @@ static int ice_fill_quad_msg_e82x(struct ice_hw *hw, if (quad >= ICE_GET_QUAD_NUM(hw->ptp.num_lports)) return -EINVAL; - msg->dest_dev = rmn_0; + msg->dest_dev = ice_sbq_dev_phy_0; if (!(quad % ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy))) addr = Q_0_BASE + offset; @@ -3199,7 +2671,8 @@ ice_read_phy_tstamp_e82x(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp) * lower 8 bits in the low register, and the upper 32 bits in the high * register. */ - *tstamp = FIELD_PREP(TS_PHY_HIGH_M, hi) | FIELD_PREP(TS_PHY_LOW_M, lo); + *tstamp = FIELD_PREP(PHY_40B_HIGH_M, hi) | + FIELD_PREP(PHY_40B_LOW_M, lo); return 0; } @@ -3301,7 +2774,6 @@ static int ice_ptp_set_vernier_wl(struct ice_hw *hw) */ static int ice_ptp_init_phc_e82x(struct ice_hw *hw) { - int err; u32 val; /* Enable reading switch and PHY registers over the sideband queue */ @@ -3311,11 +2783,6 @@ static int ice_ptp_init_phc_e82x(struct ice_hw *hw) val |= (PF_SB_REM_DEV_CTL_SWITCH_READ | PF_SB_REM_DEV_CTL_PHY0); wr32(hw, PF_SB_REM_DEV_CTL, val); - /* Initialize the Clock Generation Unit */ - err = ice_init_cgu_e82x(hw); - if (err) - return err; - /* Set window length for all the ports */ return ice_ptp_set_vernier_wl(hw); } @@ -4772,7 +4239,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold) */ static void ice_ptp_init_phy_e82x(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E82X; ptp->num_lports = 8; ptp->ports_per_phy = 8; } @@ -4799,7 +4265,7 @@ static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val) msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); msg.opcode = ice_sbq_msg_rd; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { @@ -4829,7 +4295,7 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) msg.msg_addr_low = lower_16_bits(addr); msg.msg_addr_high = upper_16_bits(addr); msg.opcode = ice_sbq_msg_wr; - msg.dest_dev = rmn_0; + msg.dest_dev = ice_sbq_dev_phy_0; msg.data = val; err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); @@ -4966,7 +4432,8 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) /* For E810 devices, the timestamp is reported with the lower 32 bits * in the low register, and the upper 8 bits in the high register. */ - *tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M); + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); return 0; } @@ -5029,8 +4496,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw) u8 tmr_idx; int err; - /* Ensure synchronization delay is zero */ - wr32(hw, GLTSYN_SYNC_DLAY, 0); + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx), @@ -5295,68 +4761,6 @@ ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) * to access the extended GPIOs available. */ -/** - * ice_get_pca9575_handle - * @hw: pointer to the hw struct - * @pca9575_handle: GPIO controller's handle - * - * Find and return the GPIO controller's handle in the netlist. - * When found - the value will be cached in the hw structure and following calls - * will return cached value - */ -static int -ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) -{ - struct ice_aqc_get_link_topo *cmd; - struct ice_aq_desc desc; - int status; - u8 idx; - - /* If handle was read previously return cached value */ - if (hw->io_expander_handle) { - *pca9575_handle = hw->io_expander_handle; - return 0; - } - - /* If handle was not detected read it from the netlist */ - cmd = &desc.params.get_link_topo; - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - - /* Set node type to GPIO controller */ - cmd->addr.topo_params.node_type_ctx = - (ICE_AQC_LINK_TOPO_NODE_TYPE_M & - ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL); - -#define SW_PCA9575_SFP_TOPO_IDX 2 -#define SW_PCA9575_QSFP_TOPO_IDX 1 - - /* Check if the SW IO expander controlling SMA exists in the netlist. */ - if (hw->device_id == ICE_DEV_ID_E810C_SFP) - idx = SW_PCA9575_SFP_TOPO_IDX; - else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) - idx = SW_PCA9575_QSFP_TOPO_IDX; - else - return -EOPNOTSUPP; - - cmd->addr.topo_params.index = idx; - - status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); - if (status) - return -EOPNOTSUPP; - - /* Verify if we found the right IO expander type */ - if (desc.params.get_link_topo.node_part_num != - ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) - return -EOPNOTSUPP; - - /* If present save the handle and return it */ - hw->io_expander_handle = - le16_to_cpu(desc.params.get_link_topo.addr.handle); - *pca9575_handle = hw->io_expander_handle; - - return 0; -} - /** * ice_read_sma_ctrl * @hw: pointer to the hw struct @@ -5421,37 +4825,6 @@ int ice_write_sma_ctrl(struct ice_hw *hw, u8 data) return status; } -/** - * ice_read_pca9575_reg - * @hw: pointer to the hw struct - * @offset: GPIO controller register offset - * @data: pointer to data to be read from the GPIO controller - * - * Read the register from the GPIO controller - */ -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data) -{ - struct ice_aqc_link_topo_addr link_topo; - __le16 addr; - u16 handle; - int err; - - memset(&link_topo, 0, sizeof(link_topo)); - - err = ice_get_pca9575_handle(hw, &handle); - if (err) - return err; - - link_topo.handle = cpu_to_le16(handle); - link_topo.topo_params.node_type_ctx = - FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, - ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED); - - addr = cpu_to_le16((u16)offset); - - return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL); -} - /** * ice_ptp_read_sdp_ac - read SDP available connections section from NVM * @hw: pointer to the HW struct @@ -5518,18 +4891,138 @@ exit: */ static void ice_ptp_init_phy_e810(struct ice_ptp_hw *ptp) { - ptp->phy_model = ICE_PHY_E810; ptp->num_lports = 8; ptp->ports_per_phy = 4; init_waitqueue_head(&ptp->phy.e810.atqbal_wq); } +/* E830 functions + * + * The following functions operate on the E830 series devices. + * + */ + +/** + * ice_ptp_init_phc_e830 - Perform E830 specific PHC initialization + * @hw: pointer to HW struct + * + * Perform E830-specific PTP hardware clock initialization steps. + */ +static void ice_ptp_init_phc_e830(const struct ice_hw *hw) +{ + ice_ptp_cfg_sync_delay(hw, ICE_E810_E830_SYNC_DELAY); +} + +/** + * ice_ptp_write_direct_incval_e830 - Prep PHY port increment value change + * @hw: pointer to HW struct + * @incval: The new 40bit increment value to prepare + * + * Prepare the PHY port for a new increment value by programming the PHC + * GLTSYN_INCVAL_L and GLTSYN_INCVAL_H registers. The actual change is + * completed by FW automatically. + */ +static void ice_ptp_write_direct_incval_e830(const struct ice_hw *hw, + u64 incval) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_INCVAL_L(tmr_idx), lower_32_bits(incval)); + wr32(hw, GLTSYN_INCVAL_H(tmr_idx), upper_32_bits(incval)); +} + +/** + * ice_ptp_write_direct_phc_time_e830 - Prepare PHY port with initial time + * @hw: Board private structure + * @time: Time to initialize the PHY port clock to + * + * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the + * initial clock time. The time will not actually be programmed until the + * driver issues an ICE_PTP_INIT_TIME command. + * + * The time value is the upper 32 bits of the PHY timer, usually in units of + * nominal nanoseconds. + */ +static void ice_ptp_write_direct_phc_time_e830(const struct ice_hw *hw, + u64 time) +{ + u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + + wr32(hw, GLTSYN_TIME_0(tmr_idx), 0); + wr32(hw, GLTSYN_TIME_L(tmr_idx), lower_32_bits(time)); + wr32(hw, GLTSYN_TIME_H(tmr_idx), upper_32_bits(time)); +} + +/** + * ice_ptp_port_cmd_e830 - Prepare all external PHYs for a timer command + * @hw: pointer to HW struct + * @cmd: Command to be sent to the port + * + * Prepare the external PHYs connected to this device for a timer sync + * command. + * + * Return: 0 on success, negative error code when PHY write failed + */ +static int ice_ptp_port_cmd_e830(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +{ + u32 val = ice_ptp_tmr_cmd_to_port_reg(hw, cmd); + + return ice_write_phy_reg_e810(hw, E830_ETH_GLTSYN_CMD, val); +} + +/** + * ice_read_phy_tstamp_e830 - Read a PHY timestamp out of the external PHY + * @hw: pointer to the HW struct + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block of the external PHY + * on the E830 device. + */ +static void ice_read_phy_tstamp_e830(const struct ice_hw *hw, u8 idx, + u64 *tstamp) +{ + u32 hi, lo; + + hi = rd32(hw, E830_PRTTSYN_TXTIME_H(idx)); + lo = rd32(hw, E830_PRTTSYN_TXTIME_L(idx)); + + /* For E830 devices, the timestamp is reported with the lower 32 bits + * in the low register, and the upper 8 bits in the high register. + */ + *tstamp = FIELD_PREP(PHY_EXT_40B_HIGH_M, hi) | + FIELD_PREP(PHY_EXT_40B_LOW_M, lo); +} + +/** + * ice_get_phy_tx_tstamp_ready_e830 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + */ +static void ice_get_phy_tx_tstamp_ready_e830(const struct ice_hw *hw, u8 port, + u64 *tstamp_ready) +{ + *tstamp_ready = rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_H); + *tstamp_ready <<= 32; + *tstamp_ready |= rd32(hw, E830_PRTMAC_TS_TX_MEM_VALID_L); +} + +/** + * ice_ptp_init_phy_e830 - initialize PHY parameters + * @ptp: pointer to the PTP HW struct + */ +static void ice_ptp_init_phy_e830(struct ice_ptp_hw *ptp) +{ + ptp->num_lports = 8; + ptp->ports_per_phy = 4; +} + /* Device agnostic functions * - * The following functions implement shared behavior common to both E822 and - * E810 devices, possibly calling a device specific implementation where - * necessary. + * The following functions implement shared behavior common to all devices, + * possibly calling a device specific implementation where necessary. */ /** @@ -5592,14 +5085,22 @@ void ice_ptp_init_hw(struct ice_hw *hw) { struct ice_ptp_hw *ptp = &hw->ptp; - if (ice_is_e822(hw) || ice_is_e823(hw)) - ice_ptp_init_phy_e82x(ptp); - else if (ice_is_e810(hw)) + switch (hw->mac_type) { + case ICE_MAC_E810: ice_ptp_init_phy_e810(ptp); - else if (ice_is_e825c(hw)) - ice_ptp_init_phy_e825c(hw); - else - ptp->phy_model = ICE_PHY_UNSUP; + break; + case ICE_MAC_E830: + ice_ptp_init_phy_e830(ptp); + break; + case ICE_MAC_GENERIC: + ice_ptp_init_phy_e82x(ptp); + break; + case ICE_MAC_GENERIC_3K_E825: + ice_ptp_init_phy_e825(hw); + break; + default: + return; + } } /** @@ -5620,11 +5121,11 @@ void ice_ptp_init_hw(struct ice_hw *hw) static int ice_ptp_write_port_cmd(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: return ice_ptp_write_port_cmd_e82x(hw, port, cmd); + case ICE_MAC_GENERIC_3K_E825: + return ice_ptp_write_port_cmd_eth56g(hw, port, cmd); default: return -EOPNOTSUPP; } @@ -5685,9 +5186,11 @@ static int ice_ptp_port_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) u32 port; /* PHY models which can program all ports simultaneously */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_port_cmd_e810(hw, cmd); + case ICE_MAC_E830: + return ice_ptp_port_cmd_e830(hw, cmd); default: break; } @@ -5758,23 +5261,29 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Source timers */ + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_phc_time_e830(hw, time); + return 0; + } + wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time)); wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time)); wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0); /* PHY timers */ /* Fill Rx and Tx ports and send msg to PHY */ - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_time_eth56g(hw, - (u32)(time & 0xFFFFFFFF)); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_time_e82x(hw, time & 0xFFFFFFFF); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_time_eth56g(hw, + (u32)(time & 0xFFFFFFFF)); + break; default: err = -EOPNOTSUPP; } @@ -5806,20 +5315,26 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; + /* For E830 we don't need to use shadow registers, its automatic */ + if (hw->mac_type == ICE_MAC_E830) { + ice_ptp_write_direct_incval_e830(hw, incval); + return 0; + } + /* Shadow Adjust */ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_incval_eth56g(hw, incval); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_incval_e810(hw, incval); break; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_incval_e82x(hw, incval); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_incval_eth56g(hw, incval); + break; default: err = -EOPNOTSUPP; } @@ -5879,16 +5394,19 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - err = ice_ptp_prep_phy_adj_eth56g(hw, adj); - break; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: err = ice_ptp_prep_phy_adj_e810(hw, adj); break; - case ICE_PHY_E82X: + case ICE_MAC_E830: + /* E830 sync PHYs automatically after setting GLTSYN_SHADJ */ + return 0; + case ICE_MAC_GENERIC: err = ice_ptp_prep_phy_adj_e82x(hw, adj); break; + case ICE_MAC_GENERIC_3K_E825: + err = ice_ptp_prep_phy_adj_eth56g(hw, adj); + break; default: err = -EOPNOTSUPP; } @@ -5912,13 +5430,16 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) */ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_read_phy_tstamp_e830(hw, idx, tstamp); + return 0; + case ICE_MAC_GENERIC: return ice_read_phy_tstamp_e82x(hw, block, idx, tstamp); + case ICE_MAC_GENERIC_3K_E825: + return ice_read_ptp_tstamp_eth56g(hw, block, idx, tstamp); default: return -EOPNOTSUPP; } @@ -5942,13 +5463,13 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) */ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_clear_ptp_tstamp_eth56g(hw, block, idx); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_clear_phy_tstamp_e810(hw, block, idx); - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_clear_phy_tstamp_e82x(hw, block, idx); + case ICE_MAC_GENERIC_3K_E825: + return ice_clear_ptp_tstamp_eth56g(hw, block, idx); default: return -EOPNOTSUPP; } @@ -6005,14 +5526,14 @@ static int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx) */ void ice_ptp_reset_ts_memory(struct ice_hw *hw) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - ice_ptp_reset_ts_memory_eth56g(hw); - break; - case ICE_PHY_E82X: + switch (hw->mac_type) { + case ICE_MAC_GENERIC: ice_ptp_reset_ts_memory_e82x(hw); break; - case ICE_PHY_E810: + case ICE_MAC_GENERIC_3K_E825: + ice_ptp_reset_ts_memory_eth56g(hw); + break; + case ICE_MAC_E810: default: return; } @@ -6034,13 +5555,16 @@ int ice_ptp_init_phc(struct ice_hw *hw) /* Clear event err indications for auxiliary pins */ (void)rd32(hw, GLTSYN_STAT(src_idx)); - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_ptp_init_phc_eth56g(hw); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_ptp_init_phc_e810(hw); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_ptp_init_phc_e830(hw); + return 0; + case ICE_MAC_GENERIC: return ice_ptp_init_phc_e82x(hw); + case ICE_MAC_GENERIC_3K_E825: + return 0; default: return -EOPNOTSUPP; } @@ -6059,17 +5583,19 @@ int ice_ptp_init_phc(struct ice_hw *hw) */ int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) { - switch (ice_get_phy_model(hw)) { - case ICE_PHY_ETH56G: - return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, - tstamp_ready); - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: return ice_get_phy_tx_tstamp_ready_e810(hw, block, tstamp_ready); - case ICE_PHY_E82X: + case ICE_MAC_E830: + ice_get_phy_tx_tstamp_ready_e830(hw, block, tstamp_ready); + return 0; + case ICE_MAC_GENERIC: return ice_get_phy_tx_tstamp_ready_e82x(hw, block, tstamp_ready); - break; + case ICE_MAC_GENERIC_3K_E825: + return ice_get_phy_tx_tstamp_ready_eth56g(hw, block, + tstamp_ready); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 54bc67d628..da4ccb44ba 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -65,14 +65,14 @@ enum ice_eth56g_link_spd { /** * struct ice_phy_reg_info_eth56g - ETH56G PHY register parameters - * @base: base address for each PHY block + * @base_addr: base address for each PHY block * @step: step between PHY lanes * * Characteristic information for the various PHY register parameters in the * ETH56G devices */ struct ice_phy_reg_info_eth56g { - u32 base[NUM_ETH56G_PHY_RES]; + u32 base_addr; u32 step; }; @@ -80,7 +80,6 @@ struct ice_phy_reg_info_eth56g { * struct ice_time_ref_info_e82x * @pll_freq: Frequency of PLL that drives timer ticks in Hz * @nominal_incval: increment to generate nanoseconds in GLTSYN_TIME_L - * @pps_delay: propagation delay of the PPS output signal * * Characteristic information for the various TIME_REF sources possible in the * E822 devices @@ -88,7 +87,6 @@ struct ice_phy_reg_info_eth56g { struct ice_time_ref_info_e82x { u64 pll_freq; u64 nominal_incval; - u8 pps_delay; }; /** @@ -196,23 +194,6 @@ struct ice_eth56g_mac_reg_cfg { extern const struct ice_eth56g_mac_reg_cfg eth56g_mac_cfg[NUM_ICE_ETH56G_LNK_SPD]; -/** - * struct ice_cgu_pll_params_e82x - E82X CGU parameters - * @refclk_pre_div: Reference clock pre-divisor - * @feedback_div: Feedback divisor - * @frac_n_div: Fractional divisor - * @post_pll_div: Post PLL divisor - * - * Clock Generation Unit parameters used to program the PLL based on the - * selected TIME_REF frequency. - */ -struct ice_cgu_pll_params_e82x { - u32 refclk_pre_div; - u32 feedback_div; - u32 frac_n_div; - u32 post_pll_div; -}; - #define E810C_QSFP_C827_0_HANDLE 2 #define E810C_QSFP_C827_1_HANDLE 3 enum ice_e810_c827_idx { @@ -284,31 +265,6 @@ struct ice_cgu_pin_desc { struct dpll_pin_frequency *freq_supp; }; -extern const struct -ice_cgu_pll_params_e82x e822_cgu_params[NUM_ICE_TIME_REF_FREQ]; - -/** - * struct ice_cgu_pll_params_e825c - E825C CGU parameters - * @tspll_ck_refclkfreq: tspll_ck_refclkfreq selection - * @tspll_ndivratio: ndiv ratio that goes directly to the pll - * @tspll_fbdiv_intgr: TS PLL integer feedback divide - * @tspll_fbdiv_frac: TS PLL fractional feedback divide - * @ref1588_ck_div: clock divider for tspll ref - * - * Clock Generation Unit parameters used to program the PLL based on the - * selected TIME_REF/TCXO frequency. - */ -struct ice_cgu_pll_params_e825c { - u32 tspll_ck_refclkfreq; - u32 tspll_ndivratio; - u32 tspll_fbdiv_intgr; - u32 tspll_fbdiv_frac; - u32 ref1588_ck_div; -}; - -extern const struct -ice_cgu_pll_params_e825c e825c_cgu_params[NUM_ICE_TIME_REF_FREQ]; - #define E810C_QSFP_C827_0_HANDLE 2 #define E810C_QSFP_C827_1_HANDLE 3 @@ -316,7 +272,7 @@ ice_cgu_pll_params_e825c e825c_cgu_params[NUM_ICE_TIME_REF_FREQ]; extern const struct ice_phy_reg_info_eth56g eth56g_phy_res[NUM_ETH56G_PHY_RES]; /* Table of constants related to possible TIME_REF sources */ -extern const struct ice_time_ref_info_e82x e82x_time_ref[NUM_ICE_TIME_REF_FREQ]; +extern const struct ice_time_ref_info_e82x e82x_time_ref[NUM_ICE_TSPLL_FREQ]; /* Table of constants for Vernier calibration on E822 */ extern const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD]; @@ -326,12 +282,10 @@ extern const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD]; */ #define ICE_E810_PLL_FREQ 812500000 #define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL -#define ICE_E810_OUT_PROP_DELAY_NS 1 -#define ICE_E825C_OUT_PROP_DELAY_NS 11 +#define ICE_E810_E830_SYNC_DELAY 0 /* Device agnostic functions */ u8 ice_get_ptp_src_clock_index(struct ice_hw *hw); -int ice_cgu_cfg_pps_out(struct ice_hw *hw, bool enable); bool ice_ptp_lock(struct ice_hw *hw); void ice_ptp_unlock(struct ice_hw *hw); void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd); @@ -360,7 +314,8 @@ void ice_ptp_reset_ts_memory_quad_e82x(struct ice_hw *hw, u8 quad); * * Returns the current TIME_REF from the capabilities structure. */ -static inline enum ice_time_ref_freq ice_e82x_time_ref(const struct ice_hw *hw) + +static inline enum ice_tspll_freq ice_e82x_time_ref(const struct ice_hw *hw) { return hw->func_caps.ts_func_info.time_ref; } @@ -374,26 +329,21 @@ static inline enum ice_time_ref_freq ice_e82x_time_ref(const struct ice_hw *hw) * change, such as an update to the CGU registers. */ static inline void -ice_set_e82x_time_ref(struct ice_hw *hw, enum ice_time_ref_freq time_ref) +ice_set_e82x_time_ref(struct ice_hw *hw, enum ice_tspll_freq time_ref) { hw->func_caps.ts_func_info.time_ref = time_ref; } -static inline u64 ice_e82x_pll_freq(enum ice_time_ref_freq time_ref) +static inline u64 ice_e82x_pll_freq(enum ice_tspll_freq time_ref) { return e82x_time_ref[time_ref].pll_freq; } -static inline u64 ice_e82x_nominal_incval(enum ice_time_ref_freq time_ref) +static inline u64 ice_e82x_nominal_incval(enum ice_tspll_freq time_ref) { return e82x_time_ref[time_ref].nominal_incval; } -static inline u64 ice_e82x_pps_delay(enum ice_time_ref_freq time_ref) -{ - return e82x_time_ref[time_ref].pps_delay; -} - /* E822 Vernier calibration functions */ int ice_stop_phy_timer_e82x(struct ice_hw *hw, u8 port, bool soft_reset); int ice_start_phy_timer_e82x(struct ice_hw *hw, u8 port); @@ -404,8 +354,6 @@ int ice_phy_cfg_intr_e82x(struct ice_hw *hw, u8 quad, bool ena, u8 threshold); /* E810 family functions */ int ice_read_sma_ctrl(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl(struct ice_hw *hw, u8 data); -int ice_read_pca9575_reg(struct ice_hw *hw, u8 offset, u8 *data); -bool ice_is_pca9575_present(struct ice_hw *hw); int ice_ptp_read_sdp_ac(struct ice_hw *hw, __le16 *entries, uint *num_entries); int ice_cgu_get_num_pins(struct ice_hw *hw, bool input); enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input); @@ -424,8 +372,6 @@ int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, int ice_ptp_read_tx_hwtstamp_status_eth56g(struct ice_hw *hw, u32 *ts_status); int ice_stop_phy_timer_eth56g(struct ice_hw *hw, u8 port, bool soft_reset); int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port); -int ice_phy_cfg_tx_offset_eth56g(struct ice_hw *hw, u8 port); -int ice_phy_cfg_rx_offset_eth56g(struct ice_hw *hw, u8 port); int ice_phy_cfg_intr_eth56g(struct ice_hw *hw, u8 port, bool ena, u8 threshold); int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port); @@ -435,20 +381,6 @@ int ice_phy_cfg_ptp_1step_eth56g(struct ice_hw *hw, u8 port); #define ICE_ETH56G_NOMINAL_THRESH4 0x7777 #define ICE_ETH56G_NOMINAL_TX_THRESH 0x6 -static inline u64 ice_prop_delay(const struct ice_hw *hw) -{ - switch (hw->ptp.phy_model) { - case ICE_PHY_ETH56G: - return ICE_E825C_OUT_PROP_DELAY_NS; - case ICE_PHY_E810: - return ICE_E810_OUT_PROP_DELAY_NS; - case ICE_PHY_E82X: - return ice_e82x_pps_delay(ice_e82x_time_ref(hw)); - default: - return 0; - } -} - /** * ice_get_base_incval - Get base clock increment value * @hw: pointer to the HW struct @@ -457,23 +389,19 @@ static inline u64 ice_prop_delay(const struct ice_hw *hw) */ static inline u64 ice_get_base_incval(struct ice_hw *hw) { - switch (hw->ptp.phy_model) { - case ICE_PHY_ETH56G: - return ICE_ETH56G_NOMINAL_INCVAL; - case ICE_PHY_E810: + switch (hw->mac_type) { + case ICE_MAC_E810: + case ICE_MAC_E830: return ICE_PTP_NOMINAL_INCVAL_E810; - case ICE_PHY_E82X: + case ICE_MAC_GENERIC: return ice_e82x_nominal_incval(ice_e82x_time_ref(hw)); + case ICE_MAC_GENERIC_3K_E825: + return ICE_ETH56G_NOMINAL_INCVAL; default: return 0; } } -static inline bool ice_is_dual(struct ice_hw *hw) -{ - return !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M); -} - #define PFTSYN_SEM_BYTES 4 #define ICE_PTP_CLOCK_INDEX_0 0x00 @@ -676,19 +604,25 @@ static inline bool ice_is_dual(struct ice_hw *hw) /* E810 timer command register */ #define E810_ETH_GLTSYN_CMD 0x03000344 +/* E830 timer command register */ +#define E830_ETH_GLTSYN_CMD 0x00088814 + +/* E810 PHC time register */ +#define E830_GLTSYN_TIME_L(_tmr_idx) (0x0008A000 + 0x1000 * (_tmr_idx)) + /* Source timer incval macros */ #define INCVAL_HIGH_M 0xFF -/* Timestamp block macros */ +/* PHY 40b registers macros */ +#define PHY_EXT_40B_LOW_M GENMASK(31, 0) +#define PHY_EXT_40B_HIGH_M GENMASK_ULL(39, 32) +#define PHY_40B_LOW_M GENMASK(7, 0) +#define PHY_40B_HIGH_M GENMASK_ULL(39, 8) #define TS_VALID BIT(0) #define TS_LOW_M 0xFFFFFFFF #define TS_HIGH_M 0xFF #define TS_HIGH_S 32 -#define TS_PHY_LOW_M 0xFF -#define TS_PHY_HIGH_M 0xFFFFFFFF -#define TS_PHY_HIGH_S 8 - #define BYTES_PER_IDX_ADDR_L_U 8 #define BYTES_PER_IDX_ADDR_L 4 @@ -799,36 +733,19 @@ static inline bool ice_is_dual(struct ice_hw *hw) #define PHY_MAC_XIF_TS_SFD_ENA_M ICE_M(0x1, 20) #define PHY_MAC_XIF_GMII_TS_SEL_M ICE_M(0x1, 21) -/* GPCS config register */ -#define PHY_GPCS_CONFIG_REG0 0x268 -#define PHY_GPCS_CONFIG_REG0_TX_THR_M ICE_M(0xF, 24) -#define PHY_GPCS_BITSLIP 0x5C - #define PHY_TS_INT_CONFIG_THRESHOLD_M ICE_M(0x3F, 0) #define PHY_TS_INT_CONFIG_ENA_M BIT(6) -/* 1-step PTP config */ -#define PHY_PTP_1STEP_CONFIG 0x270 -#define PHY_PTP_1STEP_T1S_UP64_M ICE_M(0xF, 4) -#define PHY_PTP_1STEP_T1S_DELTA_M ICE_M(0xF, 8) -#define PHY_PTP_1STEP_PEER_DELAY(_port) (0x274 + 4 * (_port)) -#define PHY_PTP_1STEP_PD_ADD_PD_M ICE_M(0x1, 0) -#define PHY_PTP_1STEP_PD_DELAY_M ICE_M(0x3fffffff, 1) -#define PHY_PTP_1STEP_PD_DLY_V_M ICE_M(0x1, 31) - /* Macros to derive offsets for TimeStampLow and TimeStampHigh */ #define PHY_TSTAMP_L(x) (((x) * 8) + 0) #define PHY_TSTAMP_U(x) (((x) * 8) + 4) -#define PHY_REG_REVISION 0x85000 - #define PHY_REG_DESKEW_0 0x94 #define PHY_REG_DESKEW_0_RLEVEL GENMASK(6, 0) #define PHY_REG_DESKEW_0_RLEVEL_FRAC GENMASK(9, 7) #define PHY_REG_DESKEW_0_RLEVEL_FRAC_W 3 #define PHY_REG_DESKEW_0_VALID GENMASK(10, 10) -#define PHY_REG_GPCS_BITSLIP 0x5C #define PHY_REG_SD_BIT_SLIP(_port_offset) (0x29C + 4 * (_port_offset)) #define PHY_REVISION_ETH56G 0x10200 #define PHY_VENDOR_TXLANE_THRESH 0x2000C @@ -848,7 +765,21 @@ static inline bool ice_is_dual(struct ice_hw *hw) #define PHY_MAC_BLOCKTIME 0x50 #define PHY_MAC_MARKERTIME 0x54 #define PHY_MAC_TX_OFFSET 0x58 +#define PHY_GPCS_BITSLIP 0x5C #define PHY_PTP_INT_STATUS 0x7FD140 +/* ETH56G registers shared per quad */ +/* GPCS config register */ +#define PHY_GPCS_CONFIG_REG0 0x268 +#define PHY_GPCS_CONFIG_REG0_TX_THR_M GENMASK(27, 24) +/* 1-step PTP config */ +#define PHY_PTP_1STEP_CONFIG 0x270 +#define PHY_PTP_1STEP_T1S_UP64_M GENMASK(7, 4) +#define PHY_PTP_1STEP_T1S_DELTA_M GENMASK(11, 8) +#define PHY_PTP_1STEP_PEER_DELAY(_quad_lane) (0x274 + 4 * (_quad_lane)) +#define PHY_PTP_1STEP_PD_ADD_PD_M BIT(0) +#define PHY_PTP_1STEP_PD_DELAY_M GENMASK(30, 1) +#define PHY_PTP_1STEP_PD_DLY_V_M BIT(31) + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 970a99a52b..fb7a1b9a43 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -4,7 +4,7 @@ #include "ice.h" #include "ice_eswitch.h" #include "devlink/devlink.h" -#include "devlink/devlink_port.h" +#include "devlink/port.h" #include "ice_sriov.h" #include "ice_tc_lib.h" #include "ice_dcb_lib.h" diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 3b0054faf7..183dd5457d 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -46,13 +46,10 @@ struct ice_sbq_evt_desc { u8 data[24]; }; -enum ice_sbq_msg_dev { - eth56g_phy_0 = 0x02, - rmn_0 = 0x02, - rmn_1 = 0x03, - rmn_2 = 0x04, - cgu = 0x06, - eth56g_phy_1 = 0x0D, +enum ice_sbq_dev_id { + ice_sbq_dev_phy_0 = 0x02, + ice_sbq_dev_cgu = 0x06, + ice_sbq_dev_phy_0_peer = 0x0D, }; enum ice_sbq_msg_opcode { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 6ca13c5dcb..d9d09296d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -84,6 +84,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) return NULL; } +/** + * ice_sched_find_next_vsi_node - find the next node for a given VSI + * @vsi_node: VSI support node to start search with + * + * Return: Next VSI support node, or NULL. + * + * The function returns a pointer to the next node from the VSI layer + * assigned to the given VSI, or NULL if there is no such a node. + */ +static struct ice_sched_node * +ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node) +{ + unsigned int vsi_handle = vsi_node->vsi_handle; + + while ((vsi_node = vsi_node->sibling) != NULL) + if (vsi_node->vsi_handle == vsi_handle) + break; + + return vsi_node; +} + /** * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd * @hw: pointer to the HW struct @@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, if (parent->num_children < max_child_nodes) { new_num_nodes = max_child_nodes - parent->num_children; } else { - /* This parent is full, try the next sibling */ - parent = parent->sibling; + /* This parent is full, + * try the next available sibling. + */ + parent = ice_sched_find_next_vsi_node(parent); /* Don't modify the first node TEID memory if the * first node was added already in the above call. * Instead send some temp memory for all other @@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, /* get the first queue group node from VSI sub-tree */ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); while (qgrp_node) { + struct ice_sched_node *next_vsi_node; + /* make sure the qgroup node is part of the VSI subtree */ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) if (qgrp_node->num_children < max_children && qgrp_node->owner == owner) break; qgrp_node = qgrp_node->sibling; + if (qgrp_node) + continue; + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + if (!next_vsi_node) + break; + + vsi_node = next_vsi_node; + qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); } /* Select the best queue group */ @@ -1604,16 +1638,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node, /** * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes * @hw: pointer to the HW struct - * @num_qs: number of queues + * @num_new_qs: number of new queues that will be added to the tree * @num_nodes: num nodes array * * This function calculates the number of VSI child nodes based on the * number of queues. */ static void -ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes) +ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes) { - u16 num = num_qs; + u16 num = num_new_qs; u8 i, qgl, vsil; qgl = ice_sched_get_qgrp_layer(hw); @@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle, if (!parent) return -EIO; - if (i == vsil) + /* Do not modify the VSI handle for already existing VSI nodes, + * (if no new VSI node was added to the tree). + * Assign the VSI handle only to newly added VSI nodes. + */ + if (i == vsil && num_added) parent->vsi_handle = vsi_handle; } @@ -1812,6 +1850,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc) num_nodes); } +/** + * ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count + * @hw: pointer to the HW struct + * @vsi_node: pointer to the leftmost VSI node that needs to be extended + * @new_numqs: new number of queues that has to be handled by the VSI + * @new_num_nodes: pointer to nodes count table to modify the VSI layer entry + * + * This function recalculates the number of supported nodes that need to + * be added after adding more Tx queues for a given VSI. + * The number of new VSI support nodes that shall be added will be saved + * to the @new_num_nodes table for the VSI layer. + */ +static void +ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw, + struct ice_sched_node *vsi_node, + unsigned int new_numqs, u16 *new_num_nodes) +{ + u32 vsi_nodes_cnt = 1; + u32 max_queue_cnt = 1; + u32 qgl, vsil; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + + for (u32 i = vsil; i <= qgl; i++) + max_queue_cnt *= hw->max_children[i]; + + while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL) + vsi_nodes_cnt++; + + if (new_numqs > (max_queue_cnt * vsi_nodes_cnt)) + new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) - + vsi_nodes_cnt; +} + /** * ice_sched_update_vsi_child_nodes - update VSI child nodes * @pi: port information structure @@ -1863,15 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, return status; } - if (new_numqs) - ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); - /* Keep the max number of queue configuration all the time. Update the - * tree only if number of queues > previous number of queues. This may + ice_sched_recalc_vsi_support_nodes(hw, vsi_node, + new_numqs, new_num_nodes); + ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs, + new_num_nodes); + + /* Never decrease the number of queues in the tree. Update the tree + * only if number of queues > previous number of queues. This may * leave some extra nodes in the tree if number of queues < previous * number but that wouldn't harm anything. Removing those extra nodes * may complicate the code if those nodes are part of SRL or * individually rate limited. + * Also, add the required VSI support nodes if the existing ones cannot + * handle the requested new number of queues. */ + status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node, + new_num_nodes); + if (status) + return status; + status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, new_num_nodes, owner); if (status) @@ -2012,6 +2095,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node) return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF); } +/** + * ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI + * @pi: port information structure + * @vsi_node: pointer to the leftmost node of the VSI to be removed + * @owner: LAN or RDMA + * @tc: TC number + * + * Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC. + * + * This function removes all the VSI support nodes associated with a given VSI + * and its LAN or RDMA children nodes from the scheduler tree. + */ +static int +ice_sched_rm_vsi_subtree(struct ice_port_info *pi, + struct ice_sched_node *vsi_node, u8 owner, u8 tc) +{ + u16 vsi_handle = vsi_node->vsi_handle; + bool all_vsi_nodes_removed = true; + int j = 0; + + while (vsi_node) { + struct ice_sched_node *next_vsi_node; + + if (ice_sched_is_leaf_node_present(vsi_node)) { + ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc); + return -EBUSY; + } + while (j < vsi_node->num_children) { + if (vsi_node->children[j]->owner == owner) + ice_free_sched_node(pi, vsi_node->children[j]); + else + j++; + } + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + + /* remove the VSI if it has no children */ + if (!vsi_node->num_children) + ice_free_sched_node(pi, vsi_node); + else + all_vsi_nodes_removed = false; + + vsi_node = next_vsi_node; + } + + /* clean up aggregator related VSI info if any */ + if (all_vsi_nodes_removed) + ice_sched_rm_agg_vsi_info(pi, vsi_handle); + + return 0; +} + /** * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes * @pi: port information structure @@ -2038,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) ice_for_each_traffic_class(i) { struct ice_sched_node *vsi_node, *tc_node; - u8 j = 0; tc_node = ice_sched_get_tc_node(pi, i); if (!tc_node) @@ -2048,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!vsi_node) continue; - if (ice_sched_is_leaf_node_present(vsi_node)) { - ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i); - status = -EBUSY; + status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i); + if (status) goto exit_sched_rm_vsi_cfg; - } - while (j < vsi_node->num_children) { - if (vsi_node->children[j]->owner == owner) { - ice_free_sched_node(pi, vsi_node->children[j]); - /* reset the counter again since the num - * children will be updated after node removal - */ - j = 0; - } else { - j++; - } - } - /* remove the VSI if it has no children */ - if (!vsi_node->num_children) { - ice_free_sched_node(pi, vsi_node); - vsi_ctx->sched.vsi_node[i] = NULL; + vsi_ctx->sched.vsi_node[i] = NULL; - /* clean up aggregator related VSI info if any */ - ice_sched_rm_agg_vsi_info(pi, vsi_handle); - } if (owner == ICE_SCHED_NODE_OWNER_LAN) vsi_ctx->sched.max_lanq[i] = 0; else diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 75d7147e1c..1a2c94375c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -5,8 +5,8 @@ #include "ice_txrx.h" #include "ice_fltr.h" #include "ice_sf_eth.h" -#include "devlink/devlink_port.h" #include "devlink/devlink.h" +#include "devlink/port.h" static const struct net_device_ops ice_sf_netdev_ops = { .ndo_open = ice_open, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 0e740342e2..4a91e0aaf0 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4784,7 +4784,8 @@ ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, */ if (found && recp[i].tun_type == rinfo->tun_type && recp[i].need_pass_l2 == rinfo->need_pass_l2 && - recp[i].allow_pass_l2 == rinfo->allow_pass_l2) + recp[i].allow_pass_l2 == rinfo->allow_pass_l2 && + recp[i].priority == rinfo->priority) return i; /* Return the recipe ID */ } } diff --git a/drivers/net/ethernet/intel/ice/ice_tspll.c b/drivers/net/ethernet/intel/ice/ice_tspll.c new file mode 100644 index 0000000000..66320a4ab8 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tspll.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_ptp_hw.h" + +static const struct +ice_tspll_params_e82x e82x_tspll_params[NUM_ICE_TSPLL_FREQ] = { + [ICE_TSPLL_FREQ_25_000] = { + .refclk_pre_div = 1, + .post_pll_div = 6, + .feedback_div = 197, + .frac_n_div = 2621440, + }, + [ICE_TSPLL_FREQ_122_880] = { + .refclk_pre_div = 5, + .post_pll_div = 7, + .feedback_div = 223, + .frac_n_div = 524288 + }, + [ICE_TSPLL_FREQ_125_000] = { + .refclk_pre_div = 5, + .post_pll_div = 7, + .feedback_div = 223, + .frac_n_div = 524288 + }, + [ICE_TSPLL_FREQ_153_600] = { + .refclk_pre_div = 5, + .post_pll_div = 6, + .feedback_div = 159, + .frac_n_div = 1572864 + }, + [ICE_TSPLL_FREQ_156_250] = { + .refclk_pre_div = 5, + .post_pll_div = 6, + .feedback_div = 159, + .frac_n_div = 1572864 + }, + [ICE_TSPLL_FREQ_245_760] = { + .refclk_pre_div = 10, + .post_pll_div = 7, + .feedback_div = 223, + .frac_n_div = 524288 + }, +}; + +/** + * ice_tspll_clk_freq_str - Convert time_ref_freq to string + * @clk_freq: Clock frequency + * + * Return: specified TIME_REF clock frequency converted to a string. + */ +static const char *ice_tspll_clk_freq_str(enum ice_tspll_freq clk_freq) +{ + switch (clk_freq) { + case ICE_TSPLL_FREQ_25_000: + return "25 MHz"; + case ICE_TSPLL_FREQ_122_880: + return "122.88 MHz"; + case ICE_TSPLL_FREQ_125_000: + return "125 MHz"; + case ICE_TSPLL_FREQ_153_600: + return "153.6 MHz"; + case ICE_TSPLL_FREQ_156_250: + return "156.25 MHz"; + case ICE_TSPLL_FREQ_245_760: + return "245.76 MHz"; + default: + return "Unknown"; + } +} + +/** + * ice_tspll_default_freq - Return default frequency for a MAC type + * @mac_type: MAC type + * + * Return: default TSPLL frequency for a correct MAC type, -ERANGE otherwise. + */ +static enum ice_tspll_freq ice_tspll_default_freq(enum ice_mac_type mac_type) +{ + switch (mac_type) { + case ICE_MAC_GENERIC: + return ICE_TSPLL_FREQ_25_000; + case ICE_MAC_GENERIC_3K_E825: + return ICE_TSPLL_FREQ_156_250; + default: + return -ERANGE; + } +} + +/** + * ice_tspll_check_params - Check if TSPLL params are correct + * @hw: Pointer to the HW struct + * @clk_freq: Clock frequency to program + * @clk_src: Clock source to select (TIME_REF or TCXO) + * + * Return: true if TSPLL params are correct, false otherwise. + */ +static bool ice_tspll_check_params(struct ice_hw *hw, + enum ice_tspll_freq clk_freq, + enum ice_clk_src clk_src) +{ + if (clk_freq >= NUM_ICE_TSPLL_FREQ) { + dev_warn(ice_hw_to_dev(hw), "Invalid TSPLL frequency %u\n", + clk_freq); + return false; + } + + if (clk_src >= NUM_ICE_CLK_SRC) { + dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n", + clk_src); + return false; + } + + if ((hw->mac_type == ICE_MAC_GENERIC_3K_E825 || + clk_src == ICE_CLK_SRC_TCXO) && + clk_freq != ice_tspll_default_freq(hw->mac_type)) { + dev_warn(ice_hw_to_dev(hw), "Unsupported frequency for this clock source\n"); + return false; + } + + return true; +} + +/** + * ice_tspll_clk_src_str - Convert time_ref_src to string + * @clk_src: Clock source + * + * Return: specified clock source converted to its string name + */ +static const char *ice_tspll_clk_src_str(enum ice_clk_src clk_src) +{ + switch (clk_src) { + case ICE_CLK_SRC_TCXO: + return "TCXO"; + case ICE_CLK_SRC_TIME_REF: + return "TIME_REF"; + default: + return "Unknown"; + } +} + +/** + * ice_tspll_log_cfg - Log current/new TSPLL configuration + * @hw: Pointer to the HW struct + * @enable: CGU enabled/disabled + * @clk_src: Current clock source + * @tspll_freq: Current clock frequency + * @lock: CGU lock status + * @new_cfg: true if this is a new config + */ +static void ice_tspll_log_cfg(struct ice_hw *hw, bool enable, u8 clk_src, + u8 tspll_freq, bool lock, bool new_cfg) +{ + dev_dbg(ice_hw_to_dev(hw), + "%s TSPLL configuration -- %s, src %s, freq %s, PLL %s\n", + new_cfg ? "New" : "Current", str_enabled_disabled(enable), + ice_tspll_clk_src_str((enum ice_clk_src)clk_src), + ice_tspll_clk_freq_str((enum ice_tspll_freq)tspll_freq), + lock ? "locked" : "unlocked"); +} + +/** + * ice_tspll_cfg_e82x - Configure the Clock Generation Unit TSPLL + * @hw: Pointer to the HW struct + * @clk_freq: Clock frequency to program + * @clk_src: Clock source to select (TIME_REF, or TCXO) + * + * Configure the Clock Generation Unit with the desired clock frequency and + * time reference, enabling the PLL which drives the PTP hardware clock. + * + * Return: + * * %0 - success + * * %-EINVAL - input parameters are incorrect + * * %-EBUSY - failed to lock TSPLL + * * %other - CGU read/write failure + */ +static int ice_tspll_cfg_e82x(struct ice_hw *hw, enum ice_tspll_freq clk_freq, + enum ice_clk_src clk_src) +{ + u32 val, r9, r24; + int err; + + err = ice_read_cgu_reg(hw, ICE_CGU_R9, &r9); + if (err) + return err; + + err = ice_read_cgu_reg(hw, ICE_CGU_R24, &r24); + if (err) + return err; + + err = ice_read_cgu_reg(hw, ICE_CGU_RO_BWM_LF, &val); + if (err) + return err; + + ice_tspll_log_cfg(hw, !!FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r24), + FIELD_GET(ICE_CGU_R23_R24_TIME_REF_SEL, r24), + FIELD_GET(ICE_CGU_R9_TIME_REF_FREQ_SEL, r9), + !!FIELD_GET(ICE_CGU_RO_BWM_LF_TRUE_LOCK, val), + false); + + /* Disable the PLL before changing the clock source or frequency */ + if (FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r24)) { + r24 &= ~ICE_CGU_R23_R24_TSPLL_ENABLE; + + err = ice_write_cgu_reg(hw, ICE_CGU_R24, r24); + if (err) + return err; + } + + /* Set the frequency */ + r9 &= ~ICE_CGU_R9_TIME_REF_FREQ_SEL; + r9 |= FIELD_PREP(ICE_CGU_R9_TIME_REF_FREQ_SEL, clk_freq); + err = ice_write_cgu_reg(hw, ICE_CGU_R9, r9); + if (err) + return err; + + /* Configure the TSPLL feedback divisor */ + err = ice_read_cgu_reg(hw, ICE_CGU_R19, &val); + if (err) + return err; + + val &= ~(ICE_CGU_R19_TSPLL_FBDIV_INTGR_E82X | ICE_CGU_R19_TSPLL_NDIVRATIO); + val |= FIELD_PREP(ICE_CGU_R19_TSPLL_FBDIV_INTGR_E82X, + e82x_tspll_params[clk_freq].feedback_div); + val |= FIELD_PREP(ICE_CGU_R19_TSPLL_NDIVRATIO, 1); + + err = ice_write_cgu_reg(hw, ICE_CGU_R19, val); + if (err) + return err; + + /* Configure the TSPLL post divisor */ + err = ice_read_cgu_reg(hw, ICE_CGU_R22, &val); + if (err) + return err; + + val &= ~(ICE_CGU_R22_TIME1588CLK_DIV | + ICE_CGU_R22_TIME1588CLK_DIV2); + val |= FIELD_PREP(ICE_CGU_R22_TIME1588CLK_DIV, + e82x_tspll_params[clk_freq].post_pll_div); + + err = ice_write_cgu_reg(hw, ICE_CGU_R22, val); + if (err) + return err; + + /* Configure the TSPLL pre divisor and clock source */ + err = ice_read_cgu_reg(hw, ICE_CGU_R24, &r24); + if (err) + return err; + + r24 &= ~(ICE_CGU_R23_R24_REF1588_CK_DIV | ICE_CGU_R24_FBDIV_FRAC | + ICE_CGU_R23_R24_TIME_REF_SEL); + r24 |= FIELD_PREP(ICE_CGU_R23_R24_REF1588_CK_DIV, + e82x_tspll_params[clk_freq].refclk_pre_div); + r24 |= FIELD_PREP(ICE_CGU_R24_FBDIV_FRAC, + e82x_tspll_params[clk_freq].frac_n_div); + r24 |= FIELD_PREP(ICE_CGU_R23_R24_TIME_REF_SEL, clk_src); + + err = ice_write_cgu_reg(hw, ICE_CGU_R24, r24); + if (err) + return err; + + /* Wait to ensure everything is stable */ + usleep_range(10, 20); + + /* Finally, enable the PLL */ + r24 |= ICE_CGU_R23_R24_TSPLL_ENABLE; + + err = ice_write_cgu_reg(hw, ICE_CGU_R24, r24); + if (err) + return err; + + /* Wait at least 1 ms to verify if the PLL locks */ + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); + + err = ice_read_cgu_reg(hw, ICE_CGU_RO_BWM_LF, &val); + if (err) + return err; + + if (!(val & ICE_CGU_RO_BWM_LF_TRUE_LOCK)) { + dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n"); + return -EBUSY; + } + + err = ice_read_cgu_reg(hw, ICE_CGU_R9, &r9); + if (err) + return err; + err = ice_read_cgu_reg(hw, ICE_CGU_R24, &r24); + if (err) + return err; + + ice_tspll_log_cfg(hw, !!FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r24), + FIELD_GET(ICE_CGU_R23_R24_TIME_REF_SEL, r24), + FIELD_GET(ICE_CGU_R9_TIME_REF_FREQ_SEL, r9), + true, true); + + return 0; +} + +/** + * ice_tspll_dis_sticky_bits_e82x - disable TSPLL sticky bits + * @hw: Pointer to the HW struct + * + * Configure the Clock Generation Unit TSPLL sticky bits so they don't latch on + * losing TSPLL lock, but always show current state. + * + * Return: 0 on success, other error codes when failed to read/write CGU. + */ +static int ice_tspll_dis_sticky_bits_e82x(struct ice_hw *hw) +{ + u32 val; + int err; + + err = ice_read_cgu_reg(hw, ICE_CGU_CNTR_BIST, &val); + if (err) + return err; + + val &= ~(ICE_CGU_CNTR_BIST_PLLLOCK_SEL_0 | + ICE_CGU_CNTR_BIST_PLLLOCK_SEL_1); + + return ice_write_cgu_reg(hw, ICE_CGU_CNTR_BIST, val); +} + +/** + * ice_tspll_cfg_e825c - Configure the TSPLL for E825-C + * @hw: Pointer to the HW struct + * @clk_freq: Clock frequency to program + * @clk_src: Clock source to select (TIME_REF, or TCXO) + * + * Configure the Clock Generation Unit with the desired clock frequency and + * time reference, enabling the PLL which drives the PTP hardware clock. + * + * Return: + * * %0 - success + * * %-EINVAL - input parameters are incorrect + * * %-EBUSY - failed to lock TSPLL + * * %other - CGU read/write failure + */ +static int ice_tspll_cfg_e825c(struct ice_hw *hw, enum ice_tspll_freq clk_freq, + enum ice_clk_src clk_src) +{ + u32 val, r9, r23; + int err; + + err = ice_read_cgu_reg(hw, ICE_CGU_R9, &r9); + if (err) + return err; + + err = ice_read_cgu_reg(hw, ICE_CGU_R23, &r23); + if (err) + return err; + + err = ice_read_cgu_reg(hw, ICE_CGU_RO_LOCK, &val); + if (err) + return err; + + ice_tspll_log_cfg(hw, !!FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r23), + FIELD_GET(ICE_CGU_R23_R24_TIME_REF_SEL, r23), + FIELD_GET(ICE_CGU_R9_TIME_REF_FREQ_SEL, r9), + !!FIELD_GET(ICE_CGU_RO_LOCK_TRUE_LOCK, val), + false); + + /* Disable the PLL before changing the clock source or frequency */ + if (FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r23)) { + r23 &= ~ICE_CGU_R23_R24_TSPLL_ENABLE; + + err = ice_write_cgu_reg(hw, ICE_CGU_R23, r23); + if (err) + return err; + } + + if (FIELD_GET(ICE_CGU_R9_TIME_SYNC_EN, r9)) { + r9 &= ~ICE_CGU_R9_TIME_SYNC_EN; + + err = ice_write_cgu_reg(hw, ICE_CGU_R9, r9); + if (err) + return err; + } + + /* Set the frequency and enable the correct receiver */ + r9 &= ~(ICE_CGU_R9_TIME_REF_FREQ_SEL | ICE_CGU_R9_CLK_EREF0_EN | + ICE_CGU_R9_TIME_REF_EN); + r9 |= FIELD_PREP(ICE_CGU_R9_TIME_REF_FREQ_SEL, clk_freq); + if (clk_src == ICE_CLK_SRC_TCXO) + r9 |= ICE_CGU_R9_CLK_EREF0_EN; + else + r9 |= ICE_CGU_R9_TIME_REF_EN; + r9 |= ICE_CGU_R9_TIME_SYNC_EN; + err = ice_write_cgu_reg(hw, ICE_CGU_R9, r9); + if (err) + return err; + + /* Choose the referenced frequency */ + err = ice_read_cgu_reg(hw, ICE_CGU_R16, &val); + if (err) + return err; + val &= ~ICE_CGU_R16_TSPLL_CK_REFCLKFREQ; + val |= FIELD_PREP(ICE_CGU_R16_TSPLL_CK_REFCLKFREQ, + ICE_TSPLL_CK_REFCLKFREQ_E825); + err = ice_write_cgu_reg(hw, ICE_CGU_R16, val); + if (err) + return err; + + /* Configure the TSPLL feedback divisor */ + err = ice_read_cgu_reg(hw, ICE_CGU_R19, &val); + if (err) + return err; + + val &= ~(ICE_CGU_R19_TSPLL_FBDIV_INTGR_E825 | + ICE_CGU_R19_TSPLL_NDIVRATIO); + val |= FIELD_PREP(ICE_CGU_R19_TSPLL_FBDIV_INTGR_E825, + ICE_TSPLL_FBDIV_INTGR_E825); + val |= FIELD_PREP(ICE_CGU_R19_TSPLL_NDIVRATIO, + ICE_TSPLL_NDIVRATIO_E825); + + err = ice_write_cgu_reg(hw, ICE_CGU_R19, val); + if (err) + return err; + + /* Configure the TSPLL post divisor, these two are constant */ + err = ice_read_cgu_reg(hw, ICE_CGU_R22, &val); + if (err) + return err; + + val &= ~(ICE_CGU_R22_TIME1588CLK_DIV | + ICE_CGU_R22_TIME1588CLK_DIV2); + val |= FIELD_PREP(ICE_CGU_R22_TIME1588CLK_DIV, 5); + + err = ice_write_cgu_reg(hw, ICE_CGU_R22, val); + if (err) + return err; + + /* Configure the TSPLL pre divisor (constant) and clock source */ + err = ice_read_cgu_reg(hw, ICE_CGU_R23, &r23); + if (err) + return err; + + r23 &= ~(ICE_CGU_R23_R24_REF1588_CK_DIV | ICE_CGU_R23_R24_TIME_REF_SEL); + r23 |= FIELD_PREP(ICE_CGU_R23_R24_TIME_REF_SEL, clk_src); + + err = ice_write_cgu_reg(hw, ICE_CGU_R23, r23); + if (err) + return err; + + /* Clear the R24 register. */ + err = ice_write_cgu_reg(hw, ICE_CGU_R24, 0); + if (err) + return err; + + /* Wait to ensure everything is stable */ + usleep_range(10, 20); + + /* Finally, enable the PLL */ + r23 |= ICE_CGU_R23_R24_TSPLL_ENABLE; + + err = ice_write_cgu_reg(hw, ICE_CGU_R23, r23); + if (err) + return err; + + /* Wait at least 1 ms to verify if the PLL locks */ + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); + + err = ice_read_cgu_reg(hw, ICE_CGU_RO_LOCK, &val); + if (err) + return err; + + if (!(val & ICE_CGU_RO_LOCK_TRUE_LOCK)) { + dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n"); + return -EBUSY; + } + + err = ice_read_cgu_reg(hw, ICE_CGU_R9, &r9); + if (err) + return err; + err = ice_read_cgu_reg(hw, ICE_CGU_R23, &r23); + if (err) + return err; + + ice_tspll_log_cfg(hw, !!FIELD_GET(ICE_CGU_R23_R24_TSPLL_ENABLE, r23), + FIELD_GET(ICE_CGU_R23_R24_TIME_REF_SEL, r23), + FIELD_GET(ICE_CGU_R9_TIME_REF_FREQ_SEL, r9), + true, true); + + return 0; +} + +/** + * ice_tspll_dis_sticky_bits_e825c - disable TSPLL sticky bits for E825-C + * @hw: Pointer to the HW struct + * + * Configure the Clock Generation Unit TSPLL sticky bits so they don't latch on + * losing TSPLL lock, but always show current state. + * + * Return: 0 on success, other error codes when failed to read/write CGU. + */ +static int ice_tspll_dis_sticky_bits_e825c(struct ice_hw *hw) +{ + u32 val; + int err; + + err = ice_read_cgu_reg(hw, ICE_CGU_BW_TDC, &val); + if (err) + return err; + + val &= ~ICE_CGU_BW_TDC_PLLLOCK_SEL; + + return ice_write_cgu_reg(hw, ICE_CGU_BW_TDC, val); +} + +/** + * ice_tspll_cfg_pps_out_e825c - Enable/disable 1PPS output and set amplitude + * @hw: pointer to the HW struct + * @enable: true to enable 1PPS output, false to disable it + * + * Return: 0 on success, other negative error code when CGU read/write failed. + */ +int ice_tspll_cfg_pps_out_e825c(struct ice_hw *hw, bool enable) +{ + u32 val; + int err; + + err = ice_read_cgu_reg(hw, ICE_CGU_R9, &val); + if (err) + return err; + + val &= ~(ICE_CGU_R9_ONE_PPS_OUT_EN | ICE_CGU_R9_ONE_PPS_OUT_AMP); + val |= FIELD_PREP(ICE_CGU_R9_ONE_PPS_OUT_EN, enable) | + ICE_CGU_R9_ONE_PPS_OUT_AMP; + + return ice_write_cgu_reg(hw, ICE_CGU_R9, val); +} + +/** + * ice_tspll_cfg - Configure the Clock Generation Unit TSPLL + * @hw: Pointer to the HW struct + * @clk_freq: Clock frequency to program + * @clk_src: Clock source to select (TIME_REF, or TCXO) + * + * Configure the Clock Generation Unit with the desired clock frequency and + * time reference, enabling the TSPLL which drives the PTP hardware clock. + * + * Return: 0 on success, -ERANGE on unsupported MAC type, other negative error + * codes when failed to configure CGU. + */ +static int ice_tspll_cfg(struct ice_hw *hw, enum ice_tspll_freq clk_freq, + enum ice_clk_src clk_src) +{ + switch (hw->mac_type) { + case ICE_MAC_GENERIC: + return ice_tspll_cfg_e82x(hw, clk_freq, clk_src); + case ICE_MAC_GENERIC_3K_E825: + return ice_tspll_cfg_e825c(hw, clk_freq, clk_src); + default: + return -ERANGE; + } +} + +/** + * ice_tspll_dis_sticky_bits - disable TSPLL sticky bits + * @hw: Pointer to the HW struct + * + * Configure the Clock Generation Unit TSPLL sticky bits so they don't latch on + * losing TSPLL lock, but always show current state. + * + * Return: 0 on success, -ERANGE on unsupported MAC type. + */ +static int ice_tspll_dis_sticky_bits(struct ice_hw *hw) +{ + switch (hw->mac_type) { + case ICE_MAC_GENERIC: + return ice_tspll_dis_sticky_bits_e82x(hw); + case ICE_MAC_GENERIC_3K_E825: + return ice_tspll_dis_sticky_bits_e825c(hw); + default: + return -ERANGE; + } +} + +/** + * ice_tspll_init - Initialize TSPLL with settings from firmware + * @hw: Pointer to the HW structure + * + * Initialize the Clock Generation Unit of the E82X/E825 device. + * + * Return: 0 on success, other error codes when failed to read/write/cfg CGU. + */ +int ice_tspll_init(struct ice_hw *hw) +{ + struct ice_ts_func_info *ts_info = &hw->func_caps.ts_func_info; + enum ice_tspll_freq tspll_freq; + enum ice_clk_src clk_src; + int err; + + /* Only E822, E823 and E825 products support TSPLL */ + if (hw->mac_type != ICE_MAC_GENERIC && + hw->mac_type != ICE_MAC_GENERIC_3K_E825) + return 0; + + tspll_freq = (enum ice_tspll_freq)ts_info->time_ref; + clk_src = (enum ice_clk_src)ts_info->clk_src; + if (!ice_tspll_check_params(hw, tspll_freq, clk_src)) + return -EINVAL; + + /* Disable sticky lock detection so lock status reported is accurate */ + err = ice_tspll_dis_sticky_bits(hw); + if (err) + return err; + + /* Configure the TSPLL using the parameters from the function + * capabilities. + */ + err = ice_tspll_cfg(hw, tspll_freq, clk_src); + if (err) { + dev_warn(ice_hw_to_dev(hw), "Failed to lock TSPLL to predefined frequency. Retrying with fallback frequency.\n"); + + /* Try to lock to internal TCXO as a fallback. */ + tspll_freq = ice_tspll_default_freq(hw->mac_type); + clk_src = ICE_CLK_SRC_TCXO; + err = ice_tspll_cfg(hw, tspll_freq, clk_src); + if (err) + dev_warn(ice_hw_to_dev(hw), "Failed to lock TSPLL to fallback frequency.\n"); + } + + return err; +} diff --git a/drivers/net/ethernet/intel/ice/ice_tspll.h b/drivers/net/ethernet/intel/ice/ice_tspll.h new file mode 100644 index 0000000000..c0b1232cc0 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tspll.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025, Intel Corporation. */ + +#ifndef _ICE_TSPLL_H_ +#define _ICE_TSPLL_H_ + +/** + * struct ice_tspll_params_e82x - E82X TSPLL parameters + * @refclk_pre_div: Reference clock pre-divisor + * @post_pll_div: Post PLL divisor + * @feedback_div: Feedback divisor + * @frac_n_div: Fractional divisor + * + * Clock Generation Unit parameters used to program the PLL based on the + * selected TIME_REF/TCXO frequency. + */ +struct ice_tspll_params_e82x { + u8 refclk_pre_div; + u8 post_pll_div; + u8 feedback_div; + u32 frac_n_div; +}; + +#define ICE_TSPLL_CK_REFCLKFREQ_E825 0x1F +#define ICE_TSPLL_NDIVRATIO_E825 5 +#define ICE_TSPLL_FBDIV_INTGR_E825 256 + +int ice_tspll_cfg_pps_out_e825c(struct ice_hw *hw, bool enable); +int ice_tspll_init(struct ice_hw *hw); + +#endif /* _ICE_TSPLL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index f12fb3a2b6..75e4a87519 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -865,10 +865,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); sinfo->xdp_frags_size += size; - /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail() - * can pop off frags but driver has to handle it on its own - */ - rx_ring->nr_frags = sinfo->nr_frags; if (page_is_pfmemalloc(rx_buf->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -939,20 +935,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, /** * ice_get_pgcnts - grab page_count() for gathered fragments * @rx_ring: Rx descriptor ring to store the page counts on + * @ntc: the next to clean element (not included in this frame!) * * This function is intended to be called right before running XDP * program so that the page recycling mechanism will be able to take * a correct decision regarding underlying pages; this is done in such * way as XDP program can change the refcount of page */ -static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; struct ice_rx_buf *rx_buf; u32 cnt = rx_ring->count; - for (int i = 0; i < nr_frags; i++) { + while (idx != ntc) { rx_buf = &rx_ring->rx_buf[idx]; rx_buf->pgcnt = page_count(rx_buf->page); @@ -1125,62 +1121,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** - * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame * @rx_ring: Rx ring with all the auxiliary data * @xdp: XDP buffer carrying linear + frags part - * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage - * @ntc: a current next_to_clean value to be stored at rx_ring + * @ntc: the next to clean element (not included in this frame!) * @verdict: return code from XDP program execution * - * Walk through gathered fragments and satisfy internal page - * recycle mechanism; we take here an action related to verdict - * returned by XDP program; + * Called after XDP program is completed, or on error with verdict set to + * ICE_XDP_CONSUMED. + * + * Walk through buffers from first_desc to the end of the frame, releasing + * buffers and satisfying internal page recycle mechanism. The action depends + * on verdict from XDP program. */ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 *xdp_xmit, u32 ntc, u32 verdict) + u32 ntc, u32 verdict) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; - u32 post_xdp_frags = 1; struct ice_rx_buf *buf; - int i; + u32 xdp_frags = 0; + int i = 0; if (unlikely(xdp_buff_has_frags(xdp))) - post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - for (i = 0; i < post_xdp_frags; i++) { + while (idx != ntc) { buf = &rx_ring->rx_buf[idx]; + if (++idx == cnt) + idx = 0; - if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + /* An XDP program could release fragments from the end of the + * buffer. For these, we need to keep the pagecnt_bias as-is. + * To do this, only adjust pagecnt_bias for fragments up to + * the total remaining after the XDP program has run. + */ + if (verdict != ICE_XDP_CONSUMED) ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - *xdp_xmit |= verdict; - } else if (verdict & ICE_XDP_CONSUMED) { + else if (i++ <= xdp_frags) buf->pagecnt_bias++; - } else if (verdict == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } ice_put_rx_buf(rx_ring, buf); - - if (++idx == cnt) - idx = 0; - } - /* handle buffers that represented frags released by XDP prog; - * for these we keep pagecnt_bias as-is; refcount from struct page - * has been decremented within XDP prog and we do not have to increase - * the biased refcnt - */ - for (; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - ice_put_rx_buf(rx_ring, buf); - if (++idx == cnt) - idx = 0; } xdp->data = NULL; rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; } /** @@ -1260,6 +1245,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, size, ntc); + /* Increment ntc before calls to ice_put_rx_mbuf() */ + if (++ntc == cnt) + ntc = 0; + if (!xdp->data) { void *hard_start; @@ -1268,24 +1257,23 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); + ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED); break; } - if (++ntc == cnt) - ntc = 0; /* skip if it is NOP desc */ if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_get_pgcnts(rx_ring); + ice_get_pgcnts(rx_ring, ntc); xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); + xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR); continue; construct_skb: @@ -1298,7 +1286,7 @@ construct_skb: rx_ring->ring_stats->rx_stats.alloc_page_failed++; xdp_verdict = ICE_XDP_CONSUMED; } - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); if (!skb) break; @@ -1809,6 +1797,7 @@ dma_error: static int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) { + const struct ice_tx_ring *tx_ring = off->tx_ring; u32 l4_len = 0, l3_len = 0, l2_len = 0; struct sk_buff *skb = first->skb; union { @@ -1958,6 +1947,30 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) l3_len = l4.hdr - ip.hdr; offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S; + if ((tx_ring->netdev->features & NETIF_F_HW_CSUM) && + !(first->tx_flags & ICE_TX_FLAGS_TSO) && + !skb_csum_is_sctp(skb)) { + /* Set GCS */ + u16 csum_start = (skb->csum_start - skb->mac_header) / 2; + u16 csum_offset = skb->csum_offset / 2; + u16 gcs_params; + + gcs_params = FIELD_PREP(ICE_TX_GCS_DESC_START_M, csum_start) | + FIELD_PREP(ICE_TX_GCS_DESC_OFFSET_M, csum_offset) | + FIELD_PREP(ICE_TX_GCS_DESC_TYPE_M, + ICE_TX_GCS_DESC_CSUM_PSH); + + /* Unlike legacy HW checksums, GCS requires a context + * descriptor. + */ + off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX; + off->cd_gcs_params = gcs_params; + /* Fill out CSO info in data descriptors */ + off->td_offset |= offset; + off->td_cmd |= cmd; + return 1; + } + /* Enable L4 checksum offloads */ switch (l4_proto) { case IPPROTO_TCP: @@ -2424,7 +2437,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); - if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) + if ((ice_is_switchdev_running(vsi->back) || + ice_lag_is_switchdev_running(vsi->back)) && + vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { @@ -2439,7 +2454,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) /* setup context descriptor */ cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params); cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2); - cdesc->rsvd = cpu_to_le16(0); + cdesc->gcs = cpu_to_le16(offload.cd_gcs_params); cdesc->qw1 = cpu_to_le64(offload.cd_qw1); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 7130992d41..07155e615f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -193,6 +193,7 @@ struct ice_tx_offload_params { u32 td_l2tag1; u32 cd_tunnel_params; u16 cd_l2tag2; + u16 cd_gcs_params; u8 header_len; }; @@ -357,14 +358,15 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; - u32 nr_frags; - dma_addr_t dma; /* physical address of ring */ + u16 max_frame; u16 rx_buf_len; + dma_addr_t dma; /* physical address of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) #define ICE_RX_FLAGS_MULTIDEV BIT(3) +#define ICE_RX_FLAGS_RING_GCS BIT(4) u8 flags; /* CL5 - 5th cacheline starts here */ struct xdp_rxq_info xdp_rxq; @@ -405,6 +407,7 @@ struct ice_tx_ring { #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) u8 flags; u8 dcb_tc; /* Traffic class of ring */ + u16 quanta_prof_id; } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 2719f0e209..45cfaabc41 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -80,6 +80,23 @@ ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, libeth_rx_pt_set_hash(skb, hash, decoded); } +/** + * ice_rx_gcs - Set generic checksum in skb + * @skb: skb currently being received and modified + * @rx_desc: receive descriptor + */ +static void ice_rx_gcs(struct sk_buff *skb, + const union ice_32b_rx_flex_desc *rx_desc) +{ + const struct ice_32b_rx_flex_desc_nic *desc; + u16 csum; + + desc = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + skb->ip_summed = CHECKSUM_COMPLETE; + csum = (__force u16)desc->raw_csum; + skb->csum = csum_unfold((__force __sum16)swab16(csum)); +} + /** * ice_rx_csum - Indicate in skb if checksum is good * @ring: the ring we care about @@ -107,6 +124,15 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); + if ((ring->flags & ICE_RX_FLAGS_RING_GCS) && + rx_desc->wb.rxdid == ICE_RXDID_FLEX_NIC && + (decoded.inner_prot == LIBETH_RX_PT_INNER_TCP || + decoded.inner_prot == LIBETH_RX_PT_INNER_UDP || + decoded.inner_prot == LIBETH_RX_PT_INNER_ICMP)) { + ice_rx_gcs(skb, rx_desc); + return; + } + /* check if HW has decoded the packet and checksum */ if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index f6c2b16ab4..6cf32b4041 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -111,7 +111,6 @@ static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring) } void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx); -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, bool frame); void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 6e59ce991f..e5db73b14a 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -325,17 +325,17 @@ struct ice_hw_common_caps { #define ICE_TS_TMR_IDX_ASSOC_M BIT(24) /* TIME_REF clock rate specification */ -enum ice_time_ref_freq { - ICE_TIME_REF_FREQ_25_000 = 0, - ICE_TIME_REF_FREQ_122_880 = 1, - ICE_TIME_REF_FREQ_125_000 = 2, - ICE_TIME_REF_FREQ_153_600 = 3, - ICE_TIME_REF_FREQ_156_250 = 4, - ICE_TIME_REF_FREQ_245_760 = 5, +enum ice_tspll_freq { + ICE_TSPLL_FREQ_25_000 = 0, + ICE_TSPLL_FREQ_122_880 = 1, + ICE_TSPLL_FREQ_125_000 = 2, + ICE_TSPLL_FREQ_153_600 = 3, + ICE_TSPLL_FREQ_156_250 = 4, + ICE_TSPLL_FREQ_245_760 = 5, - NUM_ICE_TIME_REF_FREQ, + NUM_ICE_TSPLL_FREQ, - ICE_TIME_REF_FREQ_INVALID = -1, + ICE_TSPLL_FREQ_INVALID = -1, }; /* Clock source specification */ @@ -348,7 +348,7 @@ enum ice_clk_src { struct ice_ts_func_info { /* Function specific info */ - enum ice_time_ref_freq time_ref; + enum ice_tspll_freq time_ref; u8 clk_freq; u8 clk_src; u8 tmr_index_assoc; @@ -861,7 +861,6 @@ struct ice_e810_params { struct ice_eth56g_params { u8 num_phys; - u8 phy_addr[2]; bool onestep_ena; bool sfd_ena; u32 peer_delay; @@ -872,14 +871,6 @@ union ice_phy_params { struct ice_eth56g_params eth56g; }; -/* PHY model */ -enum ice_phy_model { - ICE_PHY_UNSUP = -1, - ICE_PHY_E810 = 1, - ICE_PHY_E82X, - ICE_PHY_ETH56G, -}; - /* Global Link Topology */ enum ice_global_link_topo { ICE_LINK_TOPO_UP_TO_2_LINKS, @@ -889,11 +880,9 @@ enum ice_global_link_topo { }; struct ice_ptp_hw { - enum ice_phy_model phy_model; union ice_phy_params phy; u8 num_lports; u8 ports_per_phy; - bool is_2x50g_muxed_topo; }; /* Port hardware description */ @@ -917,6 +906,7 @@ struct ice_hw { u8 revision_id; u8 pf_id; /* device profile info */ + u8 logical_pf_id; u16 max_burst_size; /* driver sets this value */ @@ -980,6 +970,7 @@ struct ice_hw { u8 intrl_gran; struct ice_ptp_hw ptp; + s8 lane_num; /* Active package version (currently active) */ struct ice_pkg_ver active_pkg_ver; @@ -1227,4 +1218,9 @@ struct ice_aq_get_set_rss_lut_params { #define ICE_FW_API_REPORT_DFLT_CFG_MIN 7 #define ICE_FW_API_REPORT_DFLT_CFG_PATCH 3 +/* AQ API version for Health Status support */ +#define ICE_FW_API_HEALTH_REPORT_MAJ 1 +#define ICE_FW_API_HEALTH_REPORT_MIN 7 +#define ICE_FW_API_HEALTH_REPORT_PATCH 6 + #endif /* _ICE_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index be42668996..799b2c1f11 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -59,6 +59,13 @@ struct ice_fdir_prof_info { u64 fdir_active_cnt; }; +struct ice_vf_qs_bw { + u32 committed; + u32 peak; + u16 queue_id; + u8 tc; +}; + /* VF operations */ struct ice_vf_ops { enum ice_disq_rst_src reset_type; @@ -117,6 +124,9 @@ struct ice_vf { u8 spoofchk:1; u8 link_forced:1; u8 link_up:1; /* only valid if VF link is forced */ + + u32 ptp_caps; + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ @@ -140,6 +150,7 @@ struct ice_vf { struct devlink_port devlink_port; u16 num_msix; /* num of MSI-X configured on this VF */ + struct ice_vf_qs_bw qs_bw[ICE_MAX_RSS_QS_PER_VF]; }; /* Flags for controlling behavior of ice_reset_vf */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 3c86d0c2fe..ee5abdea6b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -495,6 +495,12 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_QOS) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_QOS; + + if (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) + vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_PTP; + vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; @@ -559,7 +565,7 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) * * check for the valid queue ID */ -static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid) +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid) { /* allocated Tx and Rx queues should be always equal for VF VSI */ return qid < vsi->alloc_txq; @@ -1034,6 +1040,191 @@ error_param: NULL, 0); } +/** + * ice_vc_get_qos_caps - Get current QoS caps from PF + * @vf: pointer to the VF info + * + * Get VF's QoS capabilities, such as TC number, arbiter and + * bandwidth from PF. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_get_qos_caps(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_qos_cap_list *cap_list = NULL; + u8 tc_prio[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct virtchnl_qos_cap_elem *cfg = NULL; + struct ice_vsi_ctx *vsi_ctx; + struct ice_pf *pf = vf->pf; + struct ice_port_info *pi; + struct ice_vsi *vsi; + u8 numtc, tc; + u16 len = 0; + int ret, i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + pi = pf->hw.port_info; + numtc = vsi->tc_cfg.numtc; + + vsi_ctx = ice_get_vsi_ctx(pi->hw, vf->lan_vsi_idx); + if (!vsi_ctx) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = struct_size(cap_list, cap, numtc); + cap_list = kzalloc(len, GFP_KERNEL); + if (!cap_list) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + cap_list->vsi_id = vsi->vsi_num; + cap_list->num_elem = numtc; + + /* Store the UP2TC configuration from DCB to a user priority bitmap + * of each TC. Each element of prio_of_tc represents one TC. Each + * bitmap indicates the user priorities belong to this TC. + */ + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + tc = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[i]; + tc_prio[tc] |= BIT(i); + } + + for (i = 0; i < numtc; i++) { + cfg = &cap_list->cap[i]; + cfg->tc_num = i; + cfg->tc_prio = tc_prio[i]; + cfg->arbiter = pi->qos_cfg.local_dcbx_cfg.etscfg.tsatable[i]; + cfg->weight = VIRTCHNL_STRICT_WEIGHT; + cfg->type = VIRTCHNL_BW_SHAPER; + cfg->shaper.committed = vsi_ctx->sched.bw_t_info[i].cir_bw.bw; + cfg->shaper.peak = vsi_ctx->sched.bw_t_info[i].eir_bw.bw; + } + +err: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_QOS_CAPS, v_ret, + (u8 *)cap_list, len); + kfree(cap_list); + return ret; +} + +/** + * ice_vf_cfg_qs_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @num_queues: number of queues to be configured + * + * Configure per queue bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) +{ + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + int ret; + u16 i; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + for (i = 0; i < num_queues; i++) { + u32 p_rate, min_rate; + u8 tc; + + p_rate = vf->qs_bw[i].peak; + min_rate = vf->qs_bw[i].committed; + tc = vf->qs_bw[i].tc; + if (p_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW, p_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW); + if (ret) + return ret; + + if (min_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW, min_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW); + + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_vf_cfg_q_quanta_profile - Configure quanta profile + * @vf: pointer to the VF info + * @quanta_prof_idx: pointer to the quanta profile index + * @quanta_size: quanta size to be set + * + * This function chooses available quanta profile and configures the register. + * The quanta profile is evenly divided by the number of device ports, and then + * available to the specific PF and VFs. The first profile for each PF is a + * reserved default profile. Only quanta size of the rest unused profile can be + * modified. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, + u16 *quanta_prof_idx) +{ + const u16 n_desc = calc_quanta_desc(quanta_size); + struct ice_hw *hw = &vf->pf->hw; + const u16 n_cmd = 2 * n_desc; + struct ice_pf *pf = vf->pf; + u16 per_pf, begin_id; + u8 n_used; + u32 reg; + + begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * + hw->logical_pf_id; + + if (quanta_size == ICE_DFLT_QUANTA) { + *quanta_prof_idx = begin_id; + } else { + per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / + hw->dev_caps.num_funcs; + n_used = pf->num_quanta_prof_used; + if (n_used < per_pf) { + *quanta_prof_idx = begin_id + 1 + n_used; + pf->num_quanta_prof_used++; + } else { + return -EINVAL; + } + } + + reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); + wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); + + return 0; +} + /** * ice_vc_cfg_promiscuous_mode_msg * @vf: pointer to the VF info @@ -1635,6 +1826,164 @@ error_param: NULL, 0); } +/** + * ice_vc_cfg_q_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queues_bw_cfg *qbw = + (struct virtchnl_queues_bw_cfg *)msg; + struct ice_vsi *vsi; + u16 i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || + qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + for (i = 0; i < qbw->num_queues; i++) { + if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && + qbw->cfg[i].shaper.peak > vf->max_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->max_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && + qbw->cfg[i].shaper.committed < vf->min_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->min_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].queue_id > vf->num_vf_qs) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + } + + for (i = 0; i < qbw->num_queues; i++) { + vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; + vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; + vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; + vf->qs_bw[i].tc = qbw->cfg[i].tc; + } + + if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_q_quanta - Configure per queue quanta + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues quanta. + * + * Return: 0 on success or negative error value. + */ +static int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) +{ + u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_quanta_cfg *qquanta = + (struct virtchnl_quanta_cfg *)msg; + struct ice_vsi *vsi; + int ret; + + start_qid = qquanta->queue_select.start_queue_id; + num_queues = qquanta->queue_select.num_queues; + + if (check_add_overflow(start_qid, num_queues, &end_qid)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (end_qid > ICE_MAX_RSS_QS_PER_VF || + end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + quanta_size = qquanta->quanta_size; + if (quanta_size > ICE_MAX_QUANTA_SIZE || + quanta_size < ICE_MIN_QUANTA_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (quanta_size % 64) { + dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, + &quanta_prof_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto err; + } + + for (i = start_qid; i < end_qid; i++) + vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, + v_ret, NULL, 0); +} + /** * ice_vc_cfg_qs_msg * @vf: pointer to the VF info @@ -1652,6 +2001,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) struct ice_vsi *vsi; u8 act_prt, pri_prt; int i = -1, q_idx; + bool ena_ts; lag = pf->lag; mutex_lock(&pf->lag_mutex); @@ -1715,8 +2065,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Tx queue info from VF into VSI */ if (qpi->txq.ring_len > 0) { - vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; - vsi->tx_rings[i]->count = qpi->txq.ring_len; + vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr; + vsi->tx_rings[q_idx]->count = qpi->txq.ring_len; /* Disable any existing queue first */ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) @@ -1725,7 +2075,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* Configure a queue with the requested settings */ if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", - vf->vf_id, i); + vf->vf_id, q_idx); goto error_param; } } @@ -1733,39 +2083,37 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { u16 max_frame_size = ice_vc_get_max_frame_size(vf); + struct ice_rx_ring *ring = vsi->rx_rings[q_idx]; u32 rxdid; - vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; - vsi->rx_rings[i]->count = qpi->rxq.ring_len; + ring->dma = qpi->rxq.dma_ring_addr; + ring->count = qpi->rxq.ring_len; if (qpi->rxq.crc_disable) - vsi->rx_rings[q_idx]->flags |= - ICE_RX_FLAGS_CRC_STRIP_DIS; + ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; else - vsi->rx_rings[q_idx]->flags &= - ~ICE_RX_FLAGS_CRC_STRIP_DIS; + ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; if (qpi->rxq.databuffer_size != 0 && (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || qpi->rxq.databuffer_size < 1024)) goto error_param; - vsi->rx_buf_len = qpi->rxq.databuffer_size; - vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len; + ring->rx_buf_len = qpi->rxq.databuffer_size; if (qpi->rxq.max_pkt_size > max_frame_size || qpi->rxq.max_pkt_size < 64) goto error_param; - vsi->max_frame = qpi->rxq.max_pkt_size; + ring->max_frame = qpi->rxq.max_pkt_size; /* add space for the port VLAN since the VF driver is * not expected to account for it in the MTU * calculation */ if (ice_vf_is_port_vlan_ena(vf)) - vsi->max_frame += VLAN_HLEN; + ring->max_frame += VLAN_HLEN; if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", - vf->vf_id, i); + vf->vf_id, q_idx); goto error_param; } @@ -1783,9 +2131,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) rxdid = ICE_RXDID_LEGACY_1; } + ena_ts = ((vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && + (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && + (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); + ice_write_qrxflxp_cntxt(&vsi->back->hw, - vsi->rxq_map[q_idx], - rxdid, 0x03, false); + vsi->rxq_map[q_idx], rxdid, + ICE_RXDID_PRIO, ena_ts); } } @@ -2233,17 +2586,27 @@ static bool ice_is_vlan_promisc_allowed(struct ice_vf *vf) /** * ice_vf_ena_vlan_promisc - Enable Tx/Rx VLAN promiscuous for the VLAN + * @vf: VF to enable VLAN promisc on * @vsi: VF's VSI used to enable VLAN promiscuous mode * @vlan: VLAN used to enable VLAN promiscuous * * This function should only be called if VLAN promiscuous mode is allowed, * which can be determined via ice_is_vlan_promisc_allowed(). */ -static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan) +static int ice_vf_ena_vlan_promisc(struct ice_vf *vf, struct ice_vsi *vsi, + struct ice_vlan *vlan) { - u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX; + u8 promisc_m = 0; int status; + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + promisc_m |= ICE_UCAST_VLAN_PROMISC_BITS; + if (test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + promisc_m |= ICE_MCAST_VLAN_PROMISC_BITS; + + if (!promisc_m) + return 0; + status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, vlan->vid); if (status && status != -EEXIST) @@ -2262,7 +2625,7 @@ static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan) */ static int ice_vf_dis_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan) { - u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX; + u8 promisc_m = ICE_UCAST_VLAN_PROMISC_BITS | ICE_MCAST_VLAN_PROMISC_BITS; int status; status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, @@ -2417,7 +2780,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } } else if (vlan_promisc) { - status = ice_vf_ena_vlan_promisc(vsi, &vlan); + status = ice_vf_ena_vlan_promisc(vf, vsi, &vlan); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", @@ -2700,12 +3063,8 @@ err: static int ice_vc_query_rxdid(struct ice_vf *vf) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_supported_rxdids *rxdid = NULL; - struct ice_hw *hw = &vf->pf->hw; + struct virtchnl_supported_rxdids rxdid = {}; struct ice_pf *pf = vf->pf; - int len = 0; - int ret, i; - u32 regval; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2717,35 +3076,11 @@ static int ice_vc_query_rxdid(struct ice_vf *vf) goto err; } - len = sizeof(struct virtchnl_supported_rxdids); - rxdid = kzalloc(len, GFP_KERNEL); - if (!rxdid) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; - len = 0; - goto err; - } - - /* RXDIDs supported by DDP package can be read from the register - * to get the supported RXDID bitmap. But the legacy 32byte RXDID - * is not listed in DDP package, add it in the bitmap manually. - * Legacy 16byte descriptor is not supported. - */ - rxdid->supported_rxdids |= BIT(ICE_RXDID_LEGACY_1); - - for (i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { - regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); - if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) - & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) - rxdid->supported_rxdids |= BIT(i); - } - - pf->supported_rxdids = rxdid->supported_rxdids; + rxdid.supported_rxdids = pf->supported_rxdids; err: - ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, - v_ret, (u8 *)rxdid, len); - kfree(rxdid); - return ret; + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, + v_ret, (u8 *)&rxdid, sizeof(rxdid)); } /** @@ -3254,7 +3589,7 @@ ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi, return err; if (vlan_promisc) { - err = ice_vf_ena_vlan_promisc(vsi, &vlan); + err = ice_vf_ena_vlan_promisc(vf, vsi, &vlan); if (err) return err; } @@ -3282,7 +3617,8 @@ ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi, */ if (!ice_is_dvm_ena(&vsi->back->hw)) { if (vlan_promisc) { - err = ice_vf_ena_vlan_promisc(vsi, &vlan); + err = ice_vf_ena_vlan_promisc(vf, vsi, + &vlan); if (err) return err; } @@ -3788,6 +4124,59 @@ out: v_ret, NULL, 0); } +static int ice_vc_get_ptp_cap(struct ice_vf *vf, + const struct virtchnl_ptp_caps *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM; + u32 caps = VIRTCHNL_1588_PTP_CAP_RX_TSTAMP | + VIRTCHNL_1588_PTP_CAP_READ_PHC; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto err; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + + if (msg->caps & caps) + vf->ptp_caps = caps; + +err: + /* send the response back to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_CAPS, v_ret, + (u8 *)&vf->ptp_caps, + sizeof(struct virtchnl_ptp_caps)); +} + +static int ice_vc_get_phc_time(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_ERR_PARAM; + struct virtchnl_phc_time *phc_time = NULL; + struct ice_pf *pf = vf->pf; + u32 len = 0; + int ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto err; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + + phc_time = kzalloc(sizeof(*phc_time), GFP_KERNEL); + if (!phc_time) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto err; + } + + len = sizeof(*phc_time); + + phc_time->time = ice_ptp_read_src_clk_reg(pf, NULL); + +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_1588_PTP_GET_TIME, v_ret, + (u8 *)phc_time, len); + kfree(phc_time); + return ret; +} + static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .get_ver_msg = ice_vc_get_ver_msg, .get_vf_res_msg = ice_vc_get_vf_res_msg, @@ -3821,6 +4210,14 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, + .get_qos_caps = ice_vc_get_qos_caps, + .cfg_q_bw = ice_vc_cfg_q_bw, + .cfg_q_quanta = ice_vc_cfg_q_quanta, + .get_ptp_cap = ice_vc_get_ptp_cap, + .get_phc_time = ice_vc_get_phc_time, + /* If you add a new op here please make sure to add it to + * ice_virtchnl_repr_ops as well. + */ }; /** @@ -3878,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } @@ -3951,6 +4347,11 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, + .get_qos_caps = ice_vc_get_qos_caps, + .cfg_q_bw = ice_vc_cfg_q_bw, + .cfg_q_quanta = ice_vc_cfg_q_quanta, + .get_ptp_cap = ice_vc_get_ptp_cap, + .get_phc_time = ice_vc_get_phc_time, }; /** @@ -4179,6 +4580,21 @@ error_handler: case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: err = ops->dis_vlan_insertion_v2_msg(vf, msg); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + err = ops->get_qos_caps(vf); + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + err = ops->cfg_q_bw(vf, msg); + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + err = ops->cfg_q_quanta(vf, msg); + break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + err = ops->get_ptp_cap(vf, (const void *)msg); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + err = ops->get_phc_time(vf); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index 3a41158691..222990f229 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -13,12 +13,22 @@ /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ #define ICE_MAX_VLAN_PER_VF 8 +#define ICE_DFLT_QUANTA 1024 +#define ICE_MAX_QUANTA_SIZE 4096 +#define ICE_MIN_QUANTA_SIZE 256 + +#define calc_quanta_desc(x) \ + max_t(u16, 12, min_t(u16, 63, (((x) + 66) / 132) * 2 + 4)) + /* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for * broadcast, and 16 for additional unicast/multicast filters */ #define ICE_MAX_MACADDR_PER_VF 18 #define ICE_FLEX_DESC_RXDID_MAX_NUM 64 +/* Priority to be compared against previous priority from the pipe */ +#define ICE_RXDID_PRIO 0x03 + /* VFs only get a single VSI. For ice hardware, the VF does not need to know * its VSI index. However, the virtchnl interface requires a VSI number, * mainly due to legacy hardware. @@ -61,6 +71,13 @@ struct ice_virtchnl_ops { int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*get_qos_caps)(struct ice_vf *vf); + int (*cfg_q_tc_map)(struct ice_vf *vf, u8 *msg); + int (*cfg_q_bw)(struct ice_vf *vf, u8 *msg); + int (*cfg_q_quanta)(struct ice_vf *vf, u8 *msg); + int (*get_ptp_cap)(struct ice_vf *vf, + const struct virtchnl_ptp_caps *msg); + int (*get_phc_time)(struct ice_vf *vf); }; #ifdef CONFIG_PCI_IOV diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index d796dbd2a4..a3d1579a61 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -84,6 +84,17 @@ static const u32 fdir_pf_allowlist_opcodes[] = { VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER, }; +/* VIRTCHNL_VF_CAP_PTP */ +static const u32 ptp_allowlist_opcodes[] = { + VIRTCHNL_OP_1588_PTP_GET_CAPS, + VIRTCHNL_OP_1588_PTP_GET_TIME, +}; + +static const u32 tc_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_QOS_CAPS, VIRTCHNL_OP_CONFIG_QUEUE_BW, + VIRTCHNL_OP_CONFIG_QUANTA, +}; + struct allowlist_opcode_info { const u32 *opcodes; size_t size; @@ -104,6 +115,8 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_QOS, tc_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_CAP_PTP, ptp_allowlist_opcodes), }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 14e3f0f89c..f90f545b31 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -832,21 +832,27 @@ ice_vc_fdir_parse_raw(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto, struct virtchnl_fdir_fltr_conf *conf) { - u8 *pkt_buf, *msk_buf __free(kfree); + u8 *pkt_buf, *msk_buf __free(kfree) = NULL; struct ice_parser_result rslt; struct ice_pf *pf = vf->pf; + u16 pkt_len, udp_port = 0; struct ice_parser *psr; int status = -ENOMEM; struct ice_hw *hw; - u16 udp_port = 0; - pkt_buf = kzalloc(proto->raw.pkt_len, GFP_KERNEL); - msk_buf = kzalloc(proto->raw.pkt_len, GFP_KERNEL); + pkt_len = proto->raw.pkt_len; + + if (!pkt_len || pkt_len > VIRTCHNL_MAX_SIZE_RAW_PACKET) + return -EINVAL; + + pkt_buf = kzalloc(pkt_len, GFP_KERNEL); + msk_buf = kzalloc(pkt_len, GFP_KERNEL); + if (!pkt_buf || !msk_buf) goto err_mem_alloc; - memcpy(pkt_buf, proto->raw.spec, proto->raw.pkt_len); - memcpy(msk_buf, proto->raw.mask, proto->raw.pkt_len); + memcpy(pkt_buf, proto->raw.spec, pkt_len); + memcpy(msk_buf, proto->raw.mask, pkt_len); hw = &pf->hw; @@ -862,7 +868,7 @@ ice_vc_fdir_parse_raw(struct ice_vf *vf, if (ice_get_open_tunnel_port(hw, &udp_port, TNL_VXLAN)) ice_parser_vxlan_tunnel_set(psr, udp_port, true); - status = ice_parser_run(psr, pkt_buf, proto->raw.pkt_len, &rslt); + status = ice_parser_run(psr, pkt_buf, pkt_len, &rslt); if (status) goto err_parser_destroy; @@ -876,7 +882,7 @@ ice_vc_fdir_parse_raw(struct ice_vf *vf, } status = ice_parser_profile_init(&rslt, pkt_buf, msk_buf, - proto->raw.pkt_len, ICE_BLK_FD, + pkt_len, ICE_BLK_FD, conf->prof); if (status) goto err_parser_profile_init; @@ -885,7 +891,7 @@ ice_vc_fdir_parse_raw(struct ice_vf *vf, ice_parser_profile_dump(hw, conf->prof); /* Store raw flow info into @conf */ - conf->pkt_len = proto->raw.pkt_len; + conf->pkt_len = pkt_len; conf->pkt_buf = pkt_buf; conf->parser_ena = true; @@ -2091,6 +2097,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 334ae945d6..5e28e14f86 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include +#include #include #include #include "ice.h" @@ -1017,7 +1018,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct ice_tx_desc *tx_desc; u32 i; - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + unrolled_count(PKTS_PER_BATCH) + for (i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 45adeb5132..8dc5d55e26 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -7,14 +7,6 @@ #define PKTS_PER_BATCH 8 -#ifdef __clang__ -#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for -#elif __GNUC__ >= 8 -#define loop_unrolled_for _Pragma("GCC unroll 8") for -#else -#define loop_unrolled_for for -#endif - struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index eac0f966e0..323db1e2be 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -319,6 +319,7 @@ struct igc_adapter { struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ struct system_time_snapshot snapshot; + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ char fw_version[32]; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8e449904aa..d19325b0e6 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -574,7 +574,10 @@ #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */ +/* A short cycle time of 1us theoretically should work, but appears to be too + * short in practice. + */ +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ @@ -593,6 +596,7 @@ #define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ #define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ #define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ +#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ /* PCIe PTM Cycle Control */ #define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 6e70bca15d..cc89b72c85 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7169,6 +7169,7 @@ static int igc_probe(struct pci_dev *pdev, err_register: igc_release_hw_control(adapter); + igc_ptp_stop(adapter); err_eeprom: if (!igc_check_reset_block(hw)) igc_reset_phy(hw); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 946edbad43..efc7b30e42 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,45 +974,62 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ +static void igc_ptm_trigger(struct igc_hw *hw) +{ + u32 ctrl; + + /* To "manually" start the PTM cycle we need to set the + * trigger (TRIG) bit + */ + ctrl = rd32(IGC_PTM_CTRL); + ctrl |= IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Perform flush after write to CTRL register otherwise + * transaction may not start + */ + wrfl(); +} + +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ +static void igc_ptm_reset(struct igc_hw *hw) +{ + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Write to clear all status */ + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); +} + static int igc_phc_get_syncdevicetime(ktime_t *device, struct system_counterval_t *system, void *ctx) { - u32 stat, t2_curr_h, t2_curr_l, ctrl; struct igc_adapter *adapter = ctx; struct igc_hw *hw = &adapter->hw; + u32 stat, t2_curr_h, t2_curr_l; int err, count = 100; ktime_t t1, t2_curr; - /* Get a snapshot of system clocks to use as historic value. */ - ktime_get_snapshot(&adapter->snapshot); - + /* Doing this in a loop because in the event of a + * badly timed (ha!) system clock adjustment, we may + * get PTM errors from the PCI root, but these errors + * are transitory. Repeating the process returns valid + * data eventually. + */ do { - /* Doing this in a loop because in the event of a - * badly timed (ha!) system clock adjustment, we may - * get PTM errors from the PCI root, but these errors - * are transitory. Repeating the process returns valid - * data eventually. - */ + /* Get a snapshot of system clocks to use as historic value. */ + ktime_get_snapshot(&adapter->snapshot); - /* To "manually" start the PTM cycle we need to clear and - * then set again the TRIG bit. - */ - ctrl = rd32(IGC_PTM_CTRL); - ctrl &= ~IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - ctrl |= IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - - /* The cycle only starts "for real" when software notifies - * that it has read the registers, this is done by setting - * VALID bit. - */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, stat, IGC_PTM_STAT_SLEEP, IGC_PTM_STAT_TIMEOUT); + igc_ptm_reset(hw); + if (err < 0) { netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); return err; @@ -1021,15 +1038,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) break; - if (stat & ~IGC_PTM_STAT_VALID) { - /* An error occurred, log it. */ - igc_ptm_log_error(adapter, stat); - /* The STAT register is write-1-to-clear (W1C), - * so write the previous error status to clear it. - */ - wr32(IGC_PTM_STAT, stat); - continue; - } + igc_ptm_log_error(adapter, stat); } while (--count); if (!count) { @@ -1061,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, { struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, ptp_caps); + int ret; - return get_device_system_crosststamp(igc_phc_get_syncdevicetime, - adapter, &adapter->snapshot, cts); + /* This blocks until any in progress PTM transactions complete */ + mutex_lock(&adapter->ptm_lock); + + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, + adapter, &adapter->snapshot, cts); + mutex_unlock(&adapter->ptm_lock); + + return ret; } static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, @@ -1162,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); + mutex_init(&adapter->ptm_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1174,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; netdev_err(netdev, "ptp_clock_register failed\n"); + mutex_destroy(&adapter->ptm_lock); } else if (adapter->ptp_clock) { netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; @@ -1203,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl; + mutex_lock(&adapter->ptm_lock); ctrl = rd32(IGC_PTM_CTRL); ctrl &= ~IGC_PTM_CTRL_EN; wr32(IGC_PTM_CTRL, ctrl); + mutex_unlock(&adapter->ptm_lock); } /** @@ -1237,13 +1257,18 @@ void igc_ptp_suspend(struct igc_adapter *adapter) **/ void igc_ptp_stop(struct igc_adapter *adapter) { + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + igc_ptp_suspend(adapter); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } + mutex_destroy(&adapter->ptm_lock); } /** @@ -1255,13 +1280,18 @@ void igc_ptp_stop(struct igc_adapter *adapter) void igc_ptp_reset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - u32 cycle_ctrl, ctrl; + u32 cycle_ctrl, ctrl, stat; unsigned long flags; u32 timadj; + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { @@ -1290,14 +1320,19 @@ void igc_ptp_reset(struct igc_adapter *adapter) ctrl = IGC_PTM_CTRL_EN | IGC_PTM_CTRL_START_NOW | IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | - IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) | - IGC_PTM_CTRL_TRIG; + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); wr32(IGC_PTM_CTRL, ctrl); /* Force the first cycle to run. */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT)) + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + + igc_ptm_reset(hw); break; default: /* No work to do. */ @@ -1314,5 +1349,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } diff --git a/fs/coredump.c b/fs/coredump.c index 45737b43dd..2b8c36c966 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -63,6 +63,7 @@ static void free_vma_snapshot(struct coredump_params *cprm); static int core_uses_pid; static unsigned int core_pipe_limit; +static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; @@ -1025,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = { .extra1 = (unsigned int *)&core_file_note_size_min, .extra2 = (unsigned int *)&core_file_note_size_max, }, + { + .procname = "core_sort_vma", + .data = &core_sort_vma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_coredump_sysctls(void) @@ -1255,8 +1265,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) cprm->vma_data_size += m->dump_size; } - sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), - cmp_vma_size, NULL); + if (core_sort_vma) + sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), + cmp_vma_size, NULL); return true; } diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h index 64f536b803..372c320c50 100644 --- a/include/asm-generic/codetag.lds.h +++ b/include/asm-generic/codetag.lds.h @@ -11,4 +11,23 @@ #define CODETAG_SECTIONS() \ SECTION_WITH_BOUNDARIES(alloc_tags) +/* + * Module codetags which aren't used after module unload, therefore have the + * same lifespan as the module and can be safely unloaded with the module. + */ +#define MOD_CODETAG_SECTIONS() + +#define MOD_SEPARATE_CODETAG_SECTION(_name) \ + .codetag.##_name : { \ + SECTION_WITH_BOUNDARIES(_name) \ + } + +/* + * For codetags which might be used after module unload, therefore might stay + * longer in memory. Each such codetag type has its own section so that we can + * unload them individually once unused. + */ +#define MOD_SEPARATE_CODETAG_SECTIONS() \ + MOD_SEPARATE_CODETAG_SECTION(alloc_tags) + #endif /* __ASM_GENERIC_CODETAG_LDS_H */ diff --git a/include/asm-generic/text-patching.h b/include/asm-generic/text-patching.h new file mode 100644 index 0000000000..2245c641b7 --- /dev/null +++ b/include/asm-generic/text-patching.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_TEXT_PATCHING_H +#define _ASM_GENERIC_TEXT_PATCHING_H + +#endif /* _ASM_GENERIC_TEXT_PATCHING_H */ diff --git a/include/config/PACKING b/include/config/PACKING new file mode 100644 index 0000000000..e69de29bb2 diff --git a/include/config/PACKING_KUNIT_TEST b/include/config/PACKING_KUNIT_TEST new file mode 100644 index 0000000000..e69de29bb2 diff --git a/include/config/auto.conf b/include/config/auto.conf index 9cf28fac16..f530e698bc 100644 --- a/include/config/auto.conf +++ b/include/config/auto.conf @@ -1910,6 +1910,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_CAN_VXCAN=m CONFIG_CRYPTO_MD5=y CONFIG_SND_DRIVERS=y +CONFIG_PACKING_KUNIT_TEST=m CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 CONFIG_DMATEST=m CONFIG_ATH9K_HTC=m @@ -2403,6 +2404,7 @@ CONFIG_RTLWIFI_DEBUG=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y CONFIG_CRYPTO_TWOFISH=m CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y +CONFIG_PACKING=y CONFIG_FUSION_SPI=m CONFIG_IPV6_MIP6=m CONFIG_MHI_NET=m diff --git a/include/generated/autoconf.h b/include/generated/autoconf.h index 24c51dafe3..1cfffbd92d 100644 --- a/include/generated/autoconf.h +++ b/include/generated/autoconf.h @@ -1910,6 +1910,7 @@ #define CONFIG_CAN_VXCAN_MODULE 1 #define CONFIG_CRYPTO_MD5 1 #define CONFIG_SND_DRIVERS 1 +#define CONFIG_PACKING_KUNIT_TEST_MODULE 1 #define CONFIG_SND_AC97_POWER_SAVE_DEFAULT 0 #define CONFIG_DMATEST_MODULE 1 #define CONFIG_ATH9K_HTC_MODULE 1 @@ -2403,6 +2404,7 @@ #define CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO 1 #define CONFIG_CRYPTO_TWOFISH_MODULE 1 #define CONFIG_HAVE_ARCH_PREL32_RELOCATIONS 1 +#define CONFIG_PACKING 1 #define CONFIG_FUSION_SPI_MODULE 1 #define CONFIG_IPV6_MIP6_MODULE 1 #define CONFIG_MHI_NET_MODULE 1 diff --git a/include/generated/rustc_cfg b/include/generated/rustc_cfg index fb22fddc3e..1d8a74a1fc 100644 --- a/include/generated/rustc_cfg +++ b/include/generated/rustc_cfg @@ -3749,6 +3749,8 @@ --cfg=CONFIG_CRYPTO_MD5="y" --cfg=CONFIG_SND_DRIVERS --cfg=CONFIG_SND_DRIVERS="y" +--cfg=CONFIG_PACKING_KUNIT_TEST +--cfg=CONFIG_PACKING_KUNIT_TEST="m" --cfg=CONFIG_SND_AC97_POWER_SAVE_DEFAULT="0" --cfg=CONFIG_DMATEST --cfg=CONFIG_DMATEST="m" @@ -4721,6 +4723,8 @@ --cfg=CONFIG_CRYPTO_TWOFISH="m" --cfg=CONFIG_HAVE_ARCH_PREL32_RELOCATIONS --cfg=CONFIG_HAVE_ARCH_PREL32_RELOCATIONS="y" +--cfg=CONFIG_PACKING +--cfg=CONFIG_PACKING="y" --cfg=CONFIG_FUSION_SPI --cfg=CONFIG_FUSION_SPI="m" --cfg=CONFIG_IPV6_MIP6 diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 941deffc59..55d30543c4 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,6 +30,13 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_module_section { + unsigned long start_addr; + unsigned long end_addr; + /* used size */ + unsigned long size; +}; + #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG #define CODETAG_EMPTY ((void *)1) @@ -54,6 +61,8 @@ static inline void set_codetag_empty(union codetag_ref *ref) {} #ifdef CONFIG_MEM_ALLOC_PROFILING +#define ALLOC_TAG_SECTION_NAME "alloc_tags" + struct codetag_bytes { struct codetag *ct; s64 bytes; @@ -76,7 +85,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; @@ -85,7 +94,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index f41395264d..c2b6f6802f 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit { VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, }; +enum virtchnl_bw_limit_type { + VIRTCHNL_BW_SHAPER = 0, +}; /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -151,6 +154,14 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, + /* opcode 58 and 59 are reserved */ + VIRTCHNL_OP_1588_PTP_GET_CAPS = 60, + VIRTCHNL_OP_1588_PTP_GET_TIME = 61, + /* opcode 62 - 65 are reserved */ + VIRTCHNL_OP_GET_QOS_CAPS = 66, + /* opcode 68 through 111 are reserved */ + VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, + VIRTCHNL_OP_CONFIG_QUANTA = 113, VIRTCHNL_OP_MAX, }; @@ -261,6 +272,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) +#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) +#define VIRTCHNL_VF_CAP_PTP BIT(31) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -300,6 +313,60 @@ struct virtchnl_txq_info { VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); +/* RX descriptor IDs (range from 0 to 63) */ +enum virtchnl_rx_desc_ids { + VIRTCHNL_RXDID_0_16B_BASE = 0, + VIRTCHNL_RXDID_1_32B_BASE = 1, + VIRTCHNL_RXDID_2_FLEX_SQ_NIC = 2, + VIRTCHNL_RXDID_3_FLEX_SQ_SW = 3, + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB = 4, + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL = 5, + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2 = 6, + VIRTCHNL_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC = 16, + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN = 17, + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4 = 18, + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6 = 19, + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW = 20, + VIRTCHNL_RXDID_21_COMMS_AUX_TCP = 21, + /* 22 through 63 are reserved */ +}; + +#define VIRTCHNL_RXDID_BIT(x) BIT_ULL(VIRTCHNL_RXDID_##x) + +/* RX descriptor ID bitmasks */ +enum virtchnl_rx_desc_id_bitmasks { + VIRTCHNL_RXDID_0_16B_BASE_M = VIRTCHNL_RXDID_BIT(0_16B_BASE), + VIRTCHNL_RXDID_1_32B_BASE_M = VIRTCHNL_RXDID_BIT(1_32B_BASE), + VIRTCHNL_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL_RXDID_BIT(2_FLEX_SQ_NIC), + VIRTCHNL_RXDID_3_FLEX_SQ_SW_M = VIRTCHNL_RXDID_BIT(3_FLEX_SQ_SW), + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB_M = VIRTCHNL_RXDID_BIT(4_FLEX_SQ_NIC_VEB), + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL_M = VIRTCHNL_RXDID_BIT(5_FLEX_SQ_NIC_ACL), + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2_M = VIRTCHNL_RXDID_BIT(6_FLEX_SQ_NIC_2), + VIRTCHNL_RXDID_7_HW_RSVD_M = VIRTCHNL_RXDID_BIT(7_HW_RSVD), + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC_M = VIRTCHNL_RXDID_BIT(16_COMMS_GENERIC), + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN_M = VIRTCHNL_RXDID_BIT(17_COMMS_AUX_VLAN), + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4_M = VIRTCHNL_RXDID_BIT(18_COMMS_AUX_IPV4), + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6_M = VIRTCHNL_RXDID_BIT(19_COMMS_AUX_IPV6), + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW_M = VIRTCHNL_RXDID_BIT(20_COMMS_AUX_FLOW), + VIRTCHNL_RXDID_21_COMMS_AUX_TCP_M = VIRTCHNL_RXDID_BIT(21_COMMS_AUX_TCP), + /* 22 through 63 are reserved */ +}; + +/* virtchnl_rxq_info_flags - definition of bits in the flags field of the + * virtchnl_rxq_info structure. + * + * @VIRTCHNL_PTP_RX_TSTAMP: request to enable Rx timestamping + * + * Other flag bits are currently reserved and they may be extended in the + * future. + */ +enum virtchnl_rxq_info_flags { + VIRTCHNL_PTP_RX_TSTAMP = BIT(0), +}; + /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. @@ -322,8 +389,14 @@ struct virtchnl_rxq_info { u32 databuffer_size; u32 max_pkt_size; u8 crc_disable; - u8 rxdid; - u8 pad1[2]; + /* see enum virtchnl_rx_desc_ids; + * only used when VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is supported. Note + * that when the offload is not supported, the descriptor format aligns + * with VIRTCHNL_RXDID_1_32B_BASE. + */ + enum virtchnl_rx_desc_ids rxdid:8; + enum virtchnl_rxq_info_flags flags:8; /* see virtchnl_rxq_info_flags */ + u8 pad1; u64 dma_ring_addr; /* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */ @@ -1024,6 +1097,7 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); struct virtchnl_supported_rxdids { + /* see enum virtchnl_rx_desc_id_bitmasks */ u64 supported_rxdids; }; @@ -1274,7 +1348,7 @@ struct virtchnl_proto_hdrs { * 2 - from the second inner layer * .... **/ - int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + u32 count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ union { struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; @@ -1326,7 +1400,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); struct virtchnl_filter_action_set { /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ - int count; + u32 count; struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; }; @@ -1416,6 +1490,141 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMP BIT(1) +#define VIRTCHNL_1588_PTP_CAP_READ_PHC BIT(2) + +/** + * struct virtchnl_ptp_caps - Defines the PTP caps available to the VF. + * @caps: On send, VF sets what capabilities it requests. On reply, PF + * indicates what has been enabled for this VF. The PF shall not set + * bits which were not requested by the VF. + * @rsvd: Reserved bits for future extension. + * + * Structure that defines the PTP capabilities available to the VF. The VF + * sends VIRTCHNL_OP_1588_PTP_GET_CAPS, and must fill in the ptp_caps field + * indicating what capabilities it is requesting. The PF will respond with the + * same message with the virtchnl_ptp_caps structure indicating what is + * enabled for the VF. + * + * VIRTCHNL_1588_PTP_CAP_RX_TSTAMP indicates that the VF receive queues have + * receive timestamps enabled in the flexible descriptors. Note that this + * requires a VF to also negotiate to enable advanced flexible descriptors in + * the receive path instead of the default legacy descriptor format. + * + * VIRTCHNL_1588_PTP_CAP_READ_PHC indicates that the VF may read the PHC time + * via the VIRTCHNL_OP_1588_PTP_GET_TIME command. + * + * Note that in the future, additional capability flags may be added which + * indicate additional extended support. All fields marked as reserved by this + * header will be set to zero. VF implementations should verify this to ensure + * that future extensions do not break compatibility. + */ +struct virtchnl_ptp_caps { + u32 caps; + u8 rsvd[44]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps); + +/** + * struct virtchnl_phc_time - Contains the 64bits of PHC clock time in ns. + * @time: PHC time in nanoseconds + * @rsvd: Reserved for future extension + * + * Structure received with VIRTCHNL_OP_1588_PTP_GET_TIME. Contains the 64bits + * of PHC clock time in nanoseconds. + * + * VIRTCHNL_OP_1588_PTP_GET_TIME may be sent to request the current time of + * the PHC. This op is available in case direct access via the PHC registers + * is not available. + */ +struct virtchnl_phc_time { + u64 time; + u8 rsvd[8]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time); + +struct virtchnl_shaper_bw { + /* Unit is Kbps */ + u32 committed; + u32 peak; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw); + +/* VIRTCHNL_OP_GET_QOS_CAPS + * VF sends this message to get its QoS Caps, such as + * TC number, Arbiter and Bandwidth. + */ +struct virtchnl_qos_cap_elem { + u8 tc_num; + u8 tc_prio; +#define VIRTCHNL_ABITER_STRICT 0 +#define VIRTCHNL_ABITER_ETS 2 + u8 arbiter; +#define VIRTCHNL_STRICT_WEIGHT 1 + u8 weight; + enum virtchnl_bw_limit_type type; + union { + struct virtchnl_shaper_bw shaper; + u8 pad2[32]; + }; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem); + +struct virtchnl_qos_cap_list { + u16 vsi_id; + u16 num_elem; + struct virtchnl_qos_cap_elem cap[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list); +#define virtchnl_qos_cap_list_LEGACY_SIZEOF 44 + +/* VIRTCHNL_OP_CONFIG_QUEUE_BW */ +struct virtchnl_queue_bw { + u16 queue_id; + u8 tc; + u8 pad; + struct virtchnl_shaper_bw shaper; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw); + +struct virtchnl_queues_bw_cfg { + u16 vsi_id; + u16 num_queues; + struct virtchnl_queue_bw cfg[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg); +#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF 16 + +enum virtchnl_queue_type { + VIRTCHNL_QUEUE_TYPE_TX = 0, + VIRTCHNL_QUEUE_TYPE_RX = 1, +}; + +/* structure to specify a chunk of contiguous queues */ +struct virtchnl_queue_chunk { + /* see enum virtchnl_queue_type */ + s32 type; + u16 start_queue_id; + u16 num_queues; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); + +struct virtchnl_quanta_cfg { + u16 quanta_size; + u16 pad; + struct virtchnl_queue_chunk queue_select; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg); + #define __vss_byone(p, member, count, old) \ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) @@ -1438,6 +1647,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c), \ + __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c), \ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) @@ -1637,6 +1848,41 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: valid_len = sizeof(struct virtchnl_vlan_setting); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF; + if (msglen >= valid_len) { + struct virtchnl_queues_bw_cfg *q_bw = + (struct virtchnl_queues_bw_cfg *)msg; + + valid_len = virtchnl_struct_size(q_bw, cfg, + q_bw->num_queues); + if (q_bw->num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + valid_len = sizeof(struct virtchnl_quanta_cfg); + if (msglen >= valid_len) { + struct virtchnl_quanta_cfg *q_quanta = + (struct virtchnl_quanta_cfg *)msg; + + if (q_quanta->quanta_size == 0 || + q_quanta->queue_select.num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + valid_len = sizeof(struct virtchnl_ptp_caps); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + valid_len = sizeof(struct virtchnl_phc_time); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/codetag.h b/include/linux/codetag.h index c2a579ccd4..d10bd9810d 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -35,8 +35,15 @@ struct codetag_type_desc { size_t tag_size; void (*module_load)(struct codetag_type *cttype, struct codetag_module *cmod); - bool (*module_unload)(struct codetag_type *cttype, + void (*module_unload)(struct codetag_type *cttype, struct codetag_module *cmod); +#ifdef CONFIG_MODULES + void (*module_replaced)(struct module *mod, struct module *new_mod); + bool (*needs_section_mem)(struct module *mod, unsigned long size); + void *(*alloc_section_mem)(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align); + void (*free_section_mem)(struct module *mod, bool used); +#endif }; struct codetag_iterator { @@ -71,11 +78,31 @@ struct codetag_type * codetag_register_type(const struct codetag_type_desc *desc); #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) + +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size); +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align); +void codetag_free_module_sections(struct module *mod); +void codetag_module_replaced(struct module *mod, struct module *new_mod); void codetag_load_module(struct module *mod); -bool codetag_unload_module(struct module *mod); -#else +void codetag_unload_module(struct module *mod); + +#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ + +static inline bool +codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) { return false; } +static inline void * +codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) { return NULL; } +static inline void codetag_free_module_sections(struct module *mod) {} +static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} static inline void codetag_load_module(struct module *mod) {} -static inline bool codetag_unload_module(struct module *mod) { return true; } -#endif +static inline void codetag_unload_module(struct module *mod) {} + +#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ #endif /* _LINUX_CODETAG_H */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec17324..cc668a054d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,8 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 32cef11441..3fa49a533c 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -46,11 +46,58 @@ enum execmem_type { /** * enum execmem_range_flags - options for executable memory allocations * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + * @EXECMEM_ROX_CACHE: allocations should use ROX cache of huge pages */ enum execmem_range_flags { EXECMEM_KASAN_SHADOW = (1 << 0), + EXECMEM_ROX_CACHE = (1 << 1), }; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +/** + * execmem_fill_trapping_insns - set memory to contain instructions that + * will trap + * @ptr: pointer to memory to fill + * @size: size of the range to fill + * @writable: is the memory poited by @ptr is writable or ROX + * + * A hook for architecures to fill execmem ranges with invalid instructions. + * Architectures that use EXECMEM_ROX_CACHE must implement this. + */ +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +#endif + /** * struct execmem_range - definition of an address space suitable for code and * related data allocations @@ -123,6 +170,27 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +/** + * execmem_update_copy - copy an update to executable memory + * @dst: destination address to update + * @src: source address containing the data + * @size: how many bytes of memory shold be copied + * + * Copy @size bytes from @src to @dst using text poking if the memory at + * @dst is read-only. + * + * Return: a pointer to @dst or NULL on error + */ +void *execmem_update_copy(void *dst, const void *src, size_t size); + +/** + * execmem_is_rox - check if execmem is read-only + * @type - the execmem type to check + * + * Return: %true if the @type is read-only, %false if it's writable + */ +bool execmem_is_rox(enum execmem_type type); + #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) void execmem_init(void); #else diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index c2c1100408..e7e2caa1a9 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -592,6 +592,20 @@ static __always_inline void mas_reset(struct ma_state *mas) #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +/** + * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. + * @__mas: Maple Tree operation state (maple_state) + * @__entry: Entry retrieved from the tree + * @__min: minimum index to retrieve from the tree + * + * When returned, mas->index and mas->last will hold the entire range for the + * entry. + * + * Note: may return the zero entry. + */ +#define mas_for_each_rev(__mas, __entry, __min) \ + while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) + #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4338b1b4ac..bd7e60c0b7 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -692,6 +692,7 @@ struct x86_cpu_id { __u16 feature; /* bit index */ /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ __u16 flags; + __u8 type; kernel_ulong_t driver_data; }; @@ -700,7 +701,10 @@ struct x86_cpu_id { #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 #define X86_STEPPING_ANY 0 +#define X86_STEP_MIN 0 +#define X86_STEP_MAX 0xf #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +#define X86_CPU_TYPE_ANY 0 /* * Generic table type for matching CPU features. diff --git a/include/linux/module.h b/include/linux/module.h index bcc0377c71..fb512e9d5a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,6 +368,7 @@ enum mod_mem_type { struct module_memory { void *base; unsigned int size; + RH_KABI_FILL_HOLE(bool is_rox) #ifdef CONFIG_MODULES_TREE_LOOKUP struct mod_tree_node mtn; diff --git a/include/linux/packing.h b/include/linux/packing.h index 8d6571feb9..0589d70bbe 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -8,6 +8,83 @@ #include #include +#define GEN_PACKED_FIELD_STRUCT(__type) \ + struct packed_field_ ## __type { \ + __type startbit; \ + __type endbit; \ + __type offset; \ + __type size; \ + } + +/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and + * unpacked structures < 256B. + */ +GEN_PACKED_FIELD_STRUCT(u8); + +/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and + * unpacked structures < 64KB. + */ +GEN_PACKED_FIELD_STRUCT(u16); + +#define PACKED_FIELD(start, end, struct_name, struct_field) \ +{ \ + (start), \ + (end), \ + offsetof(struct_name, struct_field), \ + sizeof_field(struct_name, struct_field), \ +} + +#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \ + typeof(&(fields)[0]) __f = (fields); \ + typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \ + const bool _ascending = __f[0].startbit < __f[1].startbit; \ + BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks ascending order"); \ + BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks descending order"); \ + BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \ + min(_f1.startbit, _f2.startbit), \ + __stringify(fields) " field " __stringify(index2) \ + " overlaps with previous field"); \ +}) + +#define CHECK_PACKED_FIELD(fields, index) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(_f[0]) __f = _f[index]; \ + BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \ + __stringify(fields) " field " __stringify(index) \ + " start bit must not be smaller than end bit"); \ + BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \ + __f.size != 4 && __f.size != 8, \ + __stringify(fields) " field " __stringify(index) \ + " has unsupported unpacked storage size"); \ + BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \ + __stringify(fields) " field " __stringify(index) \ + " exceeds unpacked storage size"); \ + __builtin_choose_expr(index != 0, \ + CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \ + 1); \ +}) + +/* Note that the packed fields may be either in ascending or descending order. + * Thus, we must check that both the first and last field wit within the + * packed buffer size. + */ +#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(pbuflen) _len = (pbuflen); \ + const size_t num_fields = ARRAY_SIZE(fields); \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \ + __stringify(fields) " pbuflen " __stringify(pbuflen) \ + " must be a compile time constant"); \ + BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " first field exceeds packed buffer size"); \ + BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " last field exceeds packed buffer size"); \ +}) + #define QUIRK_MSB_ON_THE_RIGHT BIT(0) #define QUIRK_LITTLE_ENDIAN BIT(1) #define QUIRK_LSW32_IS_FIRST BIT(2) @@ -17,33 +94,361 @@ enum packing_op { UNPACK, }; -/** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. - * - * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. - * @startbit: The index (in logical notation, compensated for quirks) where - * the packed value starts within pbuf. Must be larger than, or - * equal to, endbit. - * @endbit: The index (in logical notation, compensated for quirks) where - * the packed value ends within pbuf. Must be smaller than, or equal - * to, startbit. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. - * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and - * QUIRK_MSB_ON_THE_RIGHT. - * - * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. - */ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks); + +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +/* Do not hand-edit the following packed field check macros! + * + * They are generated using scripts/gen_packed_field_checks.c, which may be + * built via "make scripts_gen_packed_field_checks". If larger macro sizes are + * needed in the future, please use this program to re-generate the macros and + * insert them here. + */ + +#define CHECK_PACKED_FIELDS_1(fields) \ + CHECK_PACKED_FIELD(fields, 0) + +#define CHECK_PACKED_FIELDS_2(fields) do { \ + CHECK_PACKED_FIELDS_1(fields); \ + CHECK_PACKED_FIELD(fields, 1); \ +} while (0) + +#define CHECK_PACKED_FIELDS_3(fields) do { \ + CHECK_PACKED_FIELDS_2(fields); \ + CHECK_PACKED_FIELD(fields, 2); \ +} while (0) + +#define CHECK_PACKED_FIELDS_4(fields) do { \ + CHECK_PACKED_FIELDS_3(fields); \ + CHECK_PACKED_FIELD(fields, 3); \ +} while (0) + +#define CHECK_PACKED_FIELDS_5(fields) do { \ + CHECK_PACKED_FIELDS_4(fields); \ + CHECK_PACKED_FIELD(fields, 4); \ +} while (0) + +#define CHECK_PACKED_FIELDS_6(fields) do { \ + CHECK_PACKED_FIELDS_5(fields); \ + CHECK_PACKED_FIELD(fields, 5); \ +} while (0) + +#define CHECK_PACKED_FIELDS_7(fields) do { \ + CHECK_PACKED_FIELDS_6(fields); \ + CHECK_PACKED_FIELD(fields, 6); \ +} while (0) + +#define CHECK_PACKED_FIELDS_8(fields) do { \ + CHECK_PACKED_FIELDS_7(fields); \ + CHECK_PACKED_FIELD(fields, 7); \ +} while (0) + +#define CHECK_PACKED_FIELDS_9(fields) do { \ + CHECK_PACKED_FIELDS_8(fields); \ + CHECK_PACKED_FIELD(fields, 8); \ +} while (0) + +#define CHECK_PACKED_FIELDS_10(fields) do { \ + CHECK_PACKED_FIELDS_9(fields); \ + CHECK_PACKED_FIELD(fields, 9); \ +} while (0) + +#define CHECK_PACKED_FIELDS_11(fields) do { \ + CHECK_PACKED_FIELDS_10(fields); \ + CHECK_PACKED_FIELD(fields, 10); \ +} while (0) + +#define CHECK_PACKED_FIELDS_12(fields) do { \ + CHECK_PACKED_FIELDS_11(fields); \ + CHECK_PACKED_FIELD(fields, 11); \ +} while (0) + +#define CHECK_PACKED_FIELDS_13(fields) do { \ + CHECK_PACKED_FIELDS_12(fields); \ + CHECK_PACKED_FIELD(fields, 12); \ +} while (0) + +#define CHECK_PACKED_FIELDS_14(fields) do { \ + CHECK_PACKED_FIELDS_13(fields); \ + CHECK_PACKED_FIELD(fields, 13); \ +} while (0) + +#define CHECK_PACKED_FIELDS_15(fields) do { \ + CHECK_PACKED_FIELDS_14(fields); \ + CHECK_PACKED_FIELD(fields, 14); \ +} while (0) + +#define CHECK_PACKED_FIELDS_16(fields) do { \ + CHECK_PACKED_FIELDS_15(fields); \ + CHECK_PACKED_FIELD(fields, 15); \ +} while (0) + +#define CHECK_PACKED_FIELDS_17(fields) do { \ + CHECK_PACKED_FIELDS_16(fields); \ + CHECK_PACKED_FIELD(fields, 16); \ +} while (0) + +#define CHECK_PACKED_FIELDS_18(fields) do { \ + CHECK_PACKED_FIELDS_17(fields); \ + CHECK_PACKED_FIELD(fields, 17); \ +} while (0) + +#define CHECK_PACKED_FIELDS_19(fields) do { \ + CHECK_PACKED_FIELDS_18(fields); \ + CHECK_PACKED_FIELD(fields, 18); \ +} while (0) + +#define CHECK_PACKED_FIELDS_20(fields) do { \ + CHECK_PACKED_FIELDS_19(fields); \ + CHECK_PACKED_FIELD(fields, 19); \ +} while (0) + +#define CHECK_PACKED_FIELDS_21(fields) do { \ + CHECK_PACKED_FIELDS_20(fields); \ + CHECK_PACKED_FIELD(fields, 20); \ +} while (0) + +#define CHECK_PACKED_FIELDS_22(fields) do { \ + CHECK_PACKED_FIELDS_21(fields); \ + CHECK_PACKED_FIELD(fields, 21); \ +} while (0) + +#define CHECK_PACKED_FIELDS_23(fields) do { \ + CHECK_PACKED_FIELDS_22(fields); \ + CHECK_PACKED_FIELD(fields, 22); \ +} while (0) + +#define CHECK_PACKED_FIELDS_24(fields) do { \ + CHECK_PACKED_FIELDS_23(fields); \ + CHECK_PACKED_FIELD(fields, 23); \ +} while (0) + +#define CHECK_PACKED_FIELDS_25(fields) do { \ + CHECK_PACKED_FIELDS_24(fields); \ + CHECK_PACKED_FIELD(fields, 24); \ +} while (0) + +#define CHECK_PACKED_FIELDS_26(fields) do { \ + CHECK_PACKED_FIELDS_25(fields); \ + CHECK_PACKED_FIELD(fields, 25); \ +} while (0) + +#define CHECK_PACKED_FIELDS_27(fields) do { \ + CHECK_PACKED_FIELDS_26(fields); \ + CHECK_PACKED_FIELD(fields, 26); \ +} while (0) + +#define CHECK_PACKED_FIELDS_28(fields) do { \ + CHECK_PACKED_FIELDS_27(fields); \ + CHECK_PACKED_FIELD(fields, 27); \ +} while (0) + +#define CHECK_PACKED_FIELDS_29(fields) do { \ + CHECK_PACKED_FIELDS_28(fields); \ + CHECK_PACKED_FIELD(fields, 28); \ +} while (0) + +#define CHECK_PACKED_FIELDS_30(fields) do { \ + CHECK_PACKED_FIELDS_29(fields); \ + CHECK_PACKED_FIELD(fields, 29); \ +} while (0) + +#define CHECK_PACKED_FIELDS_31(fields) do { \ + CHECK_PACKED_FIELDS_30(fields); \ + CHECK_PACKED_FIELD(fields, 30); \ +} while (0) + +#define CHECK_PACKED_FIELDS_32(fields) do { \ + CHECK_PACKED_FIELDS_31(fields); \ + CHECK_PACKED_FIELD(fields, 31); \ +} while (0) + +#define CHECK_PACKED_FIELDS_33(fields) do { \ + CHECK_PACKED_FIELDS_32(fields); \ + CHECK_PACKED_FIELD(fields, 32); \ +} while (0) + +#define CHECK_PACKED_FIELDS_34(fields) do { \ + CHECK_PACKED_FIELDS_33(fields); \ + CHECK_PACKED_FIELD(fields, 33); \ +} while (0) + +#define CHECK_PACKED_FIELDS_35(fields) do { \ + CHECK_PACKED_FIELDS_34(fields); \ + CHECK_PACKED_FIELD(fields, 34); \ +} while (0) + +#define CHECK_PACKED_FIELDS_36(fields) do { \ + CHECK_PACKED_FIELDS_35(fields); \ + CHECK_PACKED_FIELD(fields, 35); \ +} while (0) + +#define CHECK_PACKED_FIELDS_37(fields) do { \ + CHECK_PACKED_FIELDS_36(fields); \ + CHECK_PACKED_FIELD(fields, 36); \ +} while (0) + +#define CHECK_PACKED_FIELDS_38(fields) do { \ + CHECK_PACKED_FIELDS_37(fields); \ + CHECK_PACKED_FIELD(fields, 37); \ +} while (0) + +#define CHECK_PACKED_FIELDS_39(fields) do { \ + CHECK_PACKED_FIELDS_38(fields); \ + CHECK_PACKED_FIELD(fields, 38); \ +} while (0) + +#define CHECK_PACKED_FIELDS_40(fields) do { \ + CHECK_PACKED_FIELDS_39(fields); \ + CHECK_PACKED_FIELD(fields, 39); \ +} while (0) + +#define CHECK_PACKED_FIELDS_41(fields) do { \ + CHECK_PACKED_FIELDS_40(fields); \ + CHECK_PACKED_FIELD(fields, 40); \ +} while (0) + +#define CHECK_PACKED_FIELDS_42(fields) do { \ + CHECK_PACKED_FIELDS_41(fields); \ + CHECK_PACKED_FIELD(fields, 41); \ +} while (0) + +#define CHECK_PACKED_FIELDS_43(fields) do { \ + CHECK_PACKED_FIELDS_42(fields); \ + CHECK_PACKED_FIELD(fields, 42); \ +} while (0) + +#define CHECK_PACKED_FIELDS_44(fields) do { \ + CHECK_PACKED_FIELDS_43(fields); \ + CHECK_PACKED_FIELD(fields, 43); \ +} while (0) + +#define CHECK_PACKED_FIELDS_45(fields) do { \ + CHECK_PACKED_FIELDS_44(fields); \ + CHECK_PACKED_FIELD(fields, 44); \ +} while (0) + +#define CHECK_PACKED_FIELDS_46(fields) do { \ + CHECK_PACKED_FIELDS_45(fields); \ + CHECK_PACKED_FIELD(fields, 45); \ +} while (0) + +#define CHECK_PACKED_FIELDS_47(fields) do { \ + CHECK_PACKED_FIELDS_46(fields); \ + CHECK_PACKED_FIELD(fields, 46); \ +} while (0) + +#define CHECK_PACKED_FIELDS_48(fields) do { \ + CHECK_PACKED_FIELDS_47(fields); \ + CHECK_PACKED_FIELD(fields, 47); \ +} while (0) + +#define CHECK_PACKED_FIELDS_49(fields) do { \ + CHECK_PACKED_FIELDS_48(fields); \ + CHECK_PACKED_FIELD(fields, 48); \ +} while (0) + +#define CHECK_PACKED_FIELDS_50(fields) do { \ + CHECK_PACKED_FIELDS_49(fields); \ + CHECK_PACKED_FIELD(fields, 49); \ +} while (0) + +#define CHECK_PACKED_FIELDS(fields) \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \ + ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \ +)))))))))))))))))))))))))))))))))))))))))))))))))) + +/* End of generated content */ + +#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : pack_fields_u8, \ + const struct packed_field_u16 * : pack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + +#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : unpack_fields_u8, \ + const struct packed_field_u16 * : unpack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + #endif diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index e7aec20fb4..3030d9245f 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -34,6 +34,12 @@ static inline int set_direct_map_default_noflush(struct page *page) return 0; } +static inline int set_direct_map_valid_noflush(struct page *page, + unsigned nr, bool valid) +{ + return 0; +} + static inline bool kernel_page_present(struct page *page) { return true; diff --git a/include/linux/text-patching.h b/include/linux/text-patching.h new file mode 100644 index 0000000000..ad5877ab08 --- /dev/null +++ b/include/linux/text-patching.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TEXT_PATCHING_H +#define _LINUX_TEXT_PATCHING_H + +#include + +#ifndef text_poke_copy +static inline void *text_poke_copy(void *dst, const void *src, size_t len) +{ + return memcpy(dst, src, len); +} +#define text_poke_copy text_poke_copy +#endif + +#endif /* _LINUX_TEXT_PATCHING_H */ diff --git a/include/linux/unroll.h b/include/linux/unroll.h index d42fd63663..863fb69f6a 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -9,6 +9,50 @@ #include +#ifdef CONFIG_CC_IS_CLANG +#define __pick_unrolled(x, y) _Pragma(#x) +#elif CONFIG_GCC_VERSION >= 80000 +#define __pick_unrolled(x, y) _Pragma(#y) +#else +#define __pick_unrolled(x, y) /* not supported */ +#endif + +/** + * unrolled - loop attributes to ask the compiler to unroll it + * + * Usage: + * + * #define BATCH 8 + * + * unrolled_count(BATCH) + * for (u32 i = 0; i < BATCH; i++) + * // loop body without cross-iteration dependencies + * + * This is only a hint and the compiler is free to disable unrolling if it + * thinks the count is suboptimal and may hurt performance and/or hugely + * increase object code size. + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends + * on what iter x will do with variables) is not a strict requirement, but + * provides best performance and object code size. + * Available only on Clang and GCC 8.x onwards. + */ + +/* Ask the compiler to pick an optimal unroll count, Clang only */ +#define unrolled \ + __pick_unrolled(clang loop unroll(enable), /* nothing */) + +/* Unroll each @n iterations of the loop */ +#define unrolled_count(n) \ + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) + +/* Unroll the whole loop */ +#define unrolled_full \ + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) + +/* Never unroll the loop */ +#define unrolled_none \ + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) + #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) #define __UNROLL_0(MACRO, args...) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d095809..5a37cb2b6f 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ad2ce7a6ab..27408f21e5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -134,12 +134,6 @@ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); -#ifdef CONFIG_MMU -extern unsigned long vmalloc_nr_pages(void); -#else -static inline unsigned long vmalloc_nr_pages(void) { return 0; } -#endif - extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) @@ -266,12 +260,29 @@ static inline bool is_vm_area_hugepages(const void *addr) #endif } +/* for /proc/kcore */ +long vread_iter(struct iov_iter *iter, const char *addr, size_t count); + +/* + * Internals. Don't use.. + */ +__init void vm_area_add_early(struct vm_struct *vm); +__init void vm_area_register_early(struct vm_struct *vm, size_t align); + +int register_vmap_purge_notifier(struct notifier_block *nb); +int unregister_vmap_purge_notifier(struct notifier_block *nb); + #ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) + +unsigned long vmalloc_nr_pages(void); + int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); + static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); @@ -279,24 +290,14 @@ static inline void set_vm_flush_reset_perms(void *addr) if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } +#else /* !CONFIG_MMU */ +#define VMALLOC_TOTAL 0UL -#else -static inline void set_vm_flush_reset_perms(void *addr) -{ -} -#endif +static inline unsigned long vmalloc_nr_pages(void) { return 0; } +static inline void set_vm_flush_reset_perms(void *addr) {} +#endif /* CONFIG_MMU */ -/* for /proc/kcore */ -extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); - -/* - * Internals. Don't use.. - */ -extern __init void vm_area_add_early(struct vm_struct *vm); -extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); - -#ifdef CONFIG_SMP -# ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_SMP) struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); @@ -311,22 +312,9 @@ pcpu_get_vm_areas(const unsigned long *offsets, return NULL; } -static inline void -pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) -{ -} -# endif +static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {} #endif -#ifdef CONFIG_MMU -#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -#else -#define VMALLOC_TOTAL 0UL -#endif - -int register_vmap_purge_notifier(struct notifier_block *nb); -int unregister_vmap_purge_notifier(struct notifier_block *nb); - #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else diff --git a/include/net/devlink.h b/include/net/devlink.h index db5eff6cb6..7f5b365547 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1261,6 +1261,18 @@ enum devlink_trap_group_generic_id { .min_burst = _min_burst, \ } +#define devlink_fmsg_put(fmsg, name, value) ( \ + _Generic((value), \ + bool : devlink_fmsg_bool_pair_put, \ + u8 : devlink_fmsg_u8_pair_put, \ + u16 : devlink_fmsg_u32_pair_put, \ + u32 : devlink_fmsg_u32_pair_put, \ + u64 : devlink_fmsg_u64_pair_put, \ + int : devlink_fmsg_u32_pair_put, \ + char * : devlink_fmsg_string_pair_put, \ + const char * : devlink_fmsg_string_pair_put) \ + (fmsg, name, (value))) + enum { /* device supports reload operations */ DEVLINK_F_RELOAD = 1UL << 0, @@ -2007,6 +2019,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb); #else diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c index b4cc03842d..df873dad04 100644 --- a/kernel/module/debug_kmemleak.c +++ b/kernel/module/debug_kmemleak.c @@ -14,7 +14,8 @@ void kmemleak_load_module(const struct module *mod, { /* only scan writable, non-executable sections */ for_each_mod_mem_type(type) { - if (type != MOD_DATA && type != MOD_INIT_DATA) + if (type != MOD_DATA && type != MOD_INIT_DATA && + !mod->mem[type].is_rox) kmemleak_no_scan(mod->mem[type].base); } } diff --git a/kernel/module/main.c b/kernel/module/main.c index 974e44d946..6d9cbdb97b 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1210,6 +1210,17 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM; + if (execmem_is_rox(execmem_type)) { + int err = execmem_make_temp_rw(ptr, size); + + if (err) { + execmem_free(ptr); + return -ENOMEM; + } + + mod->mem[type].is_rox = true; + } + /* * The pointer to these blocks of memory are stored on the module * structure and we keep that around so long as the module is @@ -1221,7 +1232,8 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) * *do* eventually get freed, but let's just keep things simple * and avoid *any* false positives. */ - kmemleak_not_leak(ptr); + if (!mod->mem[type].is_rox) + kmemleak_not_leak(ptr); memset(ptr, 0, size); mod->mem[type].base = ptr; @@ -1229,18 +1241,24 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) return 0; } -static void module_memory_free(struct module *mod, enum mod_mem_type type, - bool unload_codetags) +static void module_memory_restore_rox(struct module *mod) { - void *ptr = mod->mem[type].base; + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; - if (!unload_codetags && mod_mem_type_is_core_data(type)) - return; - - execmem_free(ptr); + if (mem->is_rox) + execmem_restore_rox(mem->base, mem->size); + } } -static void free_mod_mem(struct module *mod, bool unload_codetags) +static void module_memory_free(struct module *mod, enum mod_mem_type type) +{ + struct module_memory *mem = &mod->mem[type]; + + execmem_free(mem->base); +} + +static void free_mod_mem(struct module *mod) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1251,25 +1269,20 @@ static void free_mod_mem(struct module *mod, bool unload_codetags) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod, type, unload_codetags); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod, MOD_DATA, unload_codetags); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { - bool unload_codetags; - trace_module_free(mod); - unload_codetags = codetag_unload_module(mod); - if (!unload_codetags) - pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", - mod->name); + codetag_unload_module(mod); mod_sysfs_teardown(mod); @@ -1312,7 +1325,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod, unload_codetags); + free_mod_mem(mod); } void *__symbol_get(const char *symbol) @@ -1577,6 +1590,20 @@ static void __layout_sections(struct module *mod, struct load_info *info, bool i if (WARN_ON_ONCE(type == MOD_INVALID)) continue; + /* + * Do not allocate codetag memory as we load it into + * preallocated contiguous memory. + */ + if (codetag_needs_module_section(mod, sname, s->sh_size)) { + /* + * s->sh_entsize won't be used but populate the + * type field to avoid confusion. + */ + s->sh_entsize = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK) + << SH_ENTSIZE_TYPE_SHIFT; + continue; + } + s->sh_entsize = module_get_offset_and_type(mod, type, s, i); pr_debug("\t%s\n", sname); } @@ -2251,6 +2278,7 @@ static int move_module(struct module *mod, struct load_info *info) int i; enum mod_mem_type t = 0; int ret = -ENOMEM; + bool codetag_section_found = false; for_each_mod_mem_type(type) { if (!mod->mem[type].size) { @@ -2261,7 +2289,7 @@ static int move_module(struct module *mod, struct load_info *info) ret = module_memory_alloc(mod, type); if (ret) { t = type; - goto out_enomem; + goto out_err; } } @@ -2270,12 +2298,34 @@ static int move_module(struct module *mod, struct load_info *info) for (i = 0; i < info->hdr->e_shnum; i++) { void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; - enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + const char *sname; if (!(shdr->sh_flags & SHF_ALLOC)) continue; - dest = mod->mem[type].base + (shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK); + sname = info->secstrings + shdr->sh_name; + /* + * Load codetag sections separately as they might still be used + * after module unload. + */ + if (codetag_needs_module_section(mod, sname, shdr->sh_size)) { + dest = codetag_alloc_module_section(mod, sname, shdr->sh_size, + arch_mod_section_prepend(mod, i), shdr->sh_addralign); + if (WARN_ON(!dest)) { + ret = -EINVAL; + goto out_err; + } + if (IS_ERR(dest)) { + ret = PTR_ERR(dest); + goto out_err; + } + codetag_section_found = true; + } else { + enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; + + dest = mod->mem[type].base + offset; + } if (shdr->sh_type != SHT_NOBITS) { /* @@ -2287,7 +2337,7 @@ static int move_module(struct module *mod, struct load_info *info) if (i == info->index.mod && (WARN_ON_ONCE(shdr->sh_size != sizeof(struct module)))) { ret = -ENOEXEC; - goto out_enomem; + goto out_err; } memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); } @@ -2303,9 +2353,13 @@ static int move_module(struct module *mod, struct load_info *info) } return 0; -out_enomem: +out_err: + module_memory_restore_rox(mod); for (t--; t >= 0; t--) - module_memory_free(mod, t, true); + module_memory_free(mod, t); + if (codetag_section_found) + codetag_free_module_sections(mod); + return ret; } @@ -2426,6 +2480,8 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; kmemleak_load_module(mod, info); + codetag_module_replaced(info->mod, mod); + return mod; } @@ -2435,7 +2491,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod, true); + free_mod_mem(mod); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3059,6 +3115,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod->mem[type].size); } + module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: /* diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index c45caa4690..ce47b6346f 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,9 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + const struct module_memory *mem = &mod->mem[type]; int ret; - if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + if (mem->is_rox) + ret = execmem_restore_rox(mem->base, mem->size); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else ret = module_set_memory(mod, type, set_memory_x); diff --git a/lib/Kconfig b/lib/Kconfig index b38849af6f..50d85f38b5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -40,6 +40,18 @@ config PACKING When in doubt, say N. +config PACKING_KUNIT_TEST + tristate "KUnit tests for packing library" if !KUNIT_ALL_TESTS + depends on PACKING && KUNIT + default KUNIT_ALL_TESTS + help + This builds KUnit tests for the packing library. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation in Documentation/dev-tools/kunit/. + + When in doubt, say N. + config BITREVERSE tristate diff --git a/lib/Makefile b/lib/Makefile index 773adf88af..811ba12c8c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -154,6 +154,7 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o obj-$(CONFIG_PACKING) += packing.o +obj-$(CONFIG_PACKING_KUNIT_TEST) += packing_test.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 81e5f9a70f..5f9cd1642d 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -8,6 +9,15 @@ #include #include +#define ALLOCINFO_FILE_NAME "allocinfo" +#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) + +#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT +static bool mem_profiling_support __meminitdata = true; +#else +static bool mem_profiling_support __meminitdata; +#endif + static struct codetag_type *alloc_tag_cttype; DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); @@ -144,41 +154,247 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl return nr; } +static void __init shutdown_mem_profiling(void) +{ + if (mem_alloc_profiling_enabled()) + static_branch_disable(&mem_alloc_profiling_key); + + if (!mem_profiling_support) + return; + + mem_profiling_support = false; +} + static void __init procfs_init(void) { - proc_create_seq("allocinfo", 0400, NULL, &allocinfo_seq_op); + if (!mem_profiling_support) + return; + + if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) { + pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); + shutdown_mem_profiling(); + } } -static bool alloc_tag_module_unload(struct codetag_type *cttype, - struct codetag_module *cmod) +#ifdef CONFIG_MODULES + +static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); +/* A dummy object used to indicate an unloaded module */ +static struct module unloaded_mod; +/* A dummy object used to indicate a module prepended area */ +static struct module prepend_mod; + +static struct alloc_tag_module_section module_tags; + +static bool needs_section_mem(struct module *mod, unsigned long size) { - struct codetag_iterator iter = codetag_get_ct_iter(cttype); - struct alloc_tag_counters counter; - bool module_unused = true; - struct alloc_tag *tag; - struct codetag *ct; + return size >= sizeof(struct alloc_tag); +} - for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) { - if (iter.cmod != cmod) - continue; +static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +{ + while (from <= to) { + struct alloc_tag_counters counter; - tag = ct_to_alloc_tag(ct); - counter = alloc_tag_read(tag); - - if (WARN(counter.bytes, - "%s:%u module %s func:%s has %llu allocated at module unload", - ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes)) - module_unused = false; + counter = alloc_tag_read(from); + if (counter.bytes) + return from; + from++; } - return module_unused; + return NULL; } -#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT -static bool mem_profiling_support __meminitdata = true; -#else -static bool mem_profiling_support __meminitdata; -#endif +/* Called with mod_area_mt locked */ +static void clean_unused_module_areas_locked(void) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_for_each(&mas, val, module_tags.size) { + if (val != &unloaded_mod) + continue; + + /* Release area if all tags are unused */ + if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last))) + mas_erase(&mas); + } +} + +/* Called with mod_area_mt locked */ +static bool find_aligned_area(struct ma_state *mas, unsigned long section_size, + unsigned long size, unsigned int prepend, unsigned long align) +{ + bool cleanup_done = false; + +repeat: + /* Try finding exact size and hope the start is aligned */ + if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) { + if (IS_ALIGNED(mas->index + prepend, align)) + return true; + + /* Try finding larger area to align later */ + mas_reset(mas); + if (!mas_empty_area(mas, 0, section_size - 1, + size + prepend + align - 1)) + return true; + } + + /* No free area, try cleanup stale data and repeat the search once */ + if (!cleanup_done) { + clean_unused_module_areas_locked(); + cleanup_done = true; + mas_reset(mas); + goto repeat; + } + + return false; +} + +static void *reserve_module_tags(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align) +{ + unsigned long section_size = module_tags.end_addr - module_tags.start_addr; + MA_STATE(mas, &mod_area_mt, 0, section_size - 1); + unsigned long offset; + void *ret = NULL; + + /* If no tags return error */ + if (size < sizeof(struct alloc_tag)) + return ERR_PTR(-EINVAL); + + /* + * align is always power of 2, so we can use IS_ALIGNED and ALIGN. + * align 0 or 1 means no alignment, to simplify set to 1. + */ + if (!align) + align = 1; + + mas_lock(&mas); + if (!find_aligned_area(&mas, section_size, size, prepend, align)) { + ret = ERR_PTR(-ENOMEM); + goto unlock; + } + + /* Mark found area as reserved */ + offset = mas.index; + offset += prepend; + offset = ALIGN(offset, align); + if (offset != mas.index) { + unsigned long pad_start = mas.index; + + mas.last = offset - 1; + mas_store(&mas, &prepend_mod); + if (mas_is_err(&mas)) { + ret = ERR_PTR(xa_err(mas.node)); + goto unlock; + } + mas.index = offset; + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) { + mas.index = pad_start; + mas_erase(&mas); + ret = ERR_PTR(xa_err(mas.node)); + } + } else { + mas.last = offset + size - 1; + mas_store(&mas, mod); + if (mas_is_err(&mas)) + ret = ERR_PTR(xa_err(mas.node)); + } +unlock: + mas_unlock(&mas); + + if (IS_ERR(ret)) + return ret; + + if (module_tags.size < offset + size) + module_tags.size = offset + size; + + return (struct alloc_tag *)(module_tags.start_addr + offset); +} + +static void release_module_tags(struct module *mod, bool used) +{ + MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); + struct alloc_tag *tag; + struct module *val; + + mas_lock(&mas); + mas_for_each_rev(&mas, val, 0) + if (val == mod) + break; + + if (!val) /* module not found */ + goto out; + + if (!used) + goto release_area; + + /* Find out if the area is used */ + tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), + (struct alloc_tag *)(module_tags.start_addr + mas.last)); + if (tag) { + struct alloc_tag_counters counter = alloc_tag_read(tag); + + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } else { + used = false; + } +release_area: + mas_store(&mas, used ? &unloaded_mod : NULL); + val = mas_prev_range(&mas, 0); + if (val == &prepend_mod) + mas_store(&mas, NULL); +out: + mas_unlock(&mas); +} + +static void replace_module(struct module *mod, struct module *new_mod) +{ + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); + struct module *val; + + mas_lock(&mas); + mas_for_each(&mas, val, module_tags.size) { + if (val != mod) + continue; + + mas_store_gfp(&mas, new_mod, GFP_KERNEL); + break; + } + mas_unlock(&mas); +} + +static int __init alloc_mod_tags_mem(void) +{ + /* Allocate space to copy allocation tags */ + module_tags.start_addr = (unsigned long)execmem_alloc(EXECMEM_MODULE_DATA, + MODULE_ALLOC_TAG_VMAP_SIZE); + if (!module_tags.start_addr) + return -ENOMEM; + + module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; + + return 0; +} + +static void __init free_mod_tags_mem(void) +{ + execmem_free((void *)module_tags.start_addr); + module_tags.start_addr = 0; +} + +#else /* CONFIG_MODULES */ + +static inline int alloc_mod_tags_mem(void) { return 0; } +static inline void free_mod_tags_mem(void) {} + +#endif /* CONFIG_MODULES */ static int __init setup_early_mem_profiling(char *str) { @@ -255,14 +471,26 @@ static inline void sysctl_init(void) {} static int __init alloc_tag_init(void) { const struct codetag_type_desc desc = { - .section = "alloc_tags", - .tag_size = sizeof(struct alloc_tag), - .module_unload = alloc_tag_module_unload, + .section = ALLOC_TAG_SECTION_NAME, + .tag_size = sizeof(struct alloc_tag), +#ifdef CONFIG_MODULES + .needs_section_mem = needs_section_mem, + .alloc_section_mem = reserve_module_tags, + .free_section_mem = release_module_tags, + .module_replaced = replace_module, +#endif }; + int res; + + res = alloc_mod_tags_mem(); + if (res) + return res; alloc_tag_cttype = codetag_register_type(&desc); - if (IS_ERR(alloc_tag_cttype)) + if (IS_ERR(alloc_tag_cttype)) { + free_mod_tags_mem(); return PTR_ERR(alloc_tag_cttype); + } sysctl_init(); procfs_init(); diff --git a/lib/codetag.c b/lib/codetag.c index d1fbbb7c2e..7455b966ca 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -207,6 +207,94 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) } #ifdef CONFIG_MODULES +#define CODETAG_SECTION_PREFIX ".codetag." + +/* Some codetag types need a separate module section */ +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) +{ + const char *type_name; + struct codetag_type *cttype; + bool ret = false; + + if (strncmp(name, CODETAG_SECTION_PREFIX, strlen(CODETAG_SECTION_PREFIX))) + return false; + + type_name = name + strlen(CODETAG_SECTION_PREFIX); + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (!cttype->desc.needs_section_mem) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.needs_section_mem(mod, size); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) +{ + const char *type_name = name + strlen(CODETAG_SECTION_PREFIX); + struct codetag_type *cttype; + void *ret = ERR_PTR(-EINVAL); + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (strcmp(type_name, cttype->desc.section) == 0) { + if (WARN_ON(!cttype->desc.alloc_section_mem)) + break; + + down_write(&cttype->mod_lock); + ret = cttype->desc.alloc_section_mem(mod, size, prepend, align); + up_write(&cttype->mod_lock); + break; + } + } + mutex_unlock(&codetag_lock); + + return ret; +} + +void codetag_free_module_sections(struct module *mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.free_section_mem) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.free_section_mem(mod, false); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + +void codetag_module_replaced(struct module *mod, struct module *new_mod) +{ + struct codetag_type *cttype; + + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + if (!cttype->desc.module_replaced) + continue; + + down_write(&cttype->mod_lock); + cttype->desc.module_replaced(mod, new_mod); + up_write(&cttype->mod_lock); + } + mutex_unlock(&codetag_lock); +} + void codetag_load_module(struct module *mod) { struct codetag_type *cttype; @@ -220,13 +308,12 @@ void codetag_load_module(struct module *mod) mutex_unlock(&codetag_lock); } -bool codetag_unload_module(struct module *mod) +void codetag_unload_module(struct module *mod) { struct codetag_type *cttype; - bool unload_ok = true; if (!mod) - return true; + return; /* await any module's kfree_rcu() operations to complete */ kvfree_rcu_barrier(); @@ -246,18 +333,17 @@ bool codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - if (!cttype->desc.module_unload(cttype, cmod)) - unload_ok = false; + cttype->desc.module_unload(cttype, cmod); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); kfree(cmod); } up_write(&cttype->mod_lock); + if (found && cttype->desc.free_section_mem) + cttype->desc.free_section_mem(mod, true); } mutex_unlock(&codetag_lock); - - return unload_ok; } #endif /* CONFIG_MODULES */ diff --git a/lib/packing.c b/lib/packing.c index 3f656167c1..bb1643d9e6 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -5,47 +5,310 @@ #include #include #include +#include #include #include #include -static int get_le_offset(int offset) -{ - int closest_multiple_of_4; +#define __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &(fields)[i]; \ + u64 uval; \ + \ + uval = ustruct_field_to_u64(ustruct, field->offset, field->size); \ + \ + __pack(pbuf, uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + } \ + }) - closest_multiple_of_4 = (offset / 4) * 4; - offset -= closest_multiple_of_4; - return closest_multiple_of_4 + (3 - offset); +#define __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &fields[i]; \ + u64 uval; \ + \ + __unpack(pbuf, &uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + \ + u64_to_ustruct_field(ustruct, field->offset, field->size, uval); \ + } \ + }) + +/** + * calculate_box_addr - Determine physical location of byte in buffer + * @box: Index of byte within buffer seen as a logical big-endian big number + * @len: Size of buffer in bytes + * @quirks: mask of QUIRK_LSW32_IS_FIRST and QUIRK_LITTLE_ENDIAN + * + * Function interprets the buffer as a @len byte sized big number, and returns + * the physical offset of the @box logical octet within it. Internally, it + * treats the big number as groups of 4 bytes. If @len is not a multiple of 4, + * the last group may be shorter. + * + * @QUIRK_LSW32_IS_FIRST gives the ordering of groups of 4 octets relative to + * each other. If set, the most significant group of 4 octets is last in the + * buffer (and may be truncated if @len is not a multiple of 4). + * + * @QUIRK_LITTLE_ENDIAN gives the ordering of bytes within each group of 4. + * If set, the most significant byte is last in the group. If @len takes the + * form of 4k+3, the last group will only be able to represent 24 bits, and its + * most significant octet is byte 2. + * + * Return: the physical offset into the buffer corresponding to the logical box. + */ +static size_t calculate_box_addr(size_t box, size_t len, u8 quirks) +{ + size_t offset_of_group, offset_in_group, this_group = box / 4; + size_t group_size; + + if (quirks & QUIRK_LSW32_IS_FIRST) + offset_of_group = this_group * 4; + else + offset_of_group = len - ((this_group + 1) * 4); + + group_size = min(4, len - offset_of_group); + + if (quirks & QUIRK_LITTLE_ENDIAN) + offset_in_group = box - this_group * 4; + else + offset_in_group = group_size - (box - this_group * 4) - 1; + + return offset_of_group + offset_in_group; } -static int get_reverse_lsw32_offset(int offset, size_t len) +static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) { - int closest_multiple_of_4; - int word_index; + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8 = startbit / BITS_PER_BYTE; + int plogical_last_u8 = endbit / BITS_PER_BYTE; + int value_width = startbit - endbit + 1; + int box; - word_index = offset / 4; - closest_multiple_of_4 = word_index * 4; - offset -= closest_multiple_of_4; - word_index = (len / 4) - word_index - 1; - return word_index * 4 + offset; + /* Check if "uval" fits in "value_width" bits. + * The test only works for value_width < 64, but in the latter case, + * any 64-bit uval will surely fit. + */ + WARN(value_width < 64 && uval >= (1ull << value_width), + "Cannot store 0x%llx inside bits %zu-%zu - will truncate\n", + uval, startbit, endbit); + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + size_t box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + size_t proj_start_bit, proj_end_bit; + u64 proj_mask; + u64 pval; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % BITS_PER_BYTE; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % BITS_PER_BYTE; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit; + proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = calculate_box_addr(box, pbuflen, quirks); + + /* Write to pbuf, read from uval */ + pval = uval & proj_mask; + pval >>= proj_end_bit; + pval <<= box_end_bit; + + if (quirks & QUIRK_MSB_ON_THE_RIGHT) { + pval = bitrev8(pval); + box_mask = bitrev8(box_mask); + } + + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; + } } -static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, - int *box_end_bit, u8 *box_mask) +/** + * pack - Pack u64 number into bitfield of buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: CPU-readable unpacked value to pack. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @pbuf memory will + * be modified on success. + */ +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks) { - int box_bit_width = *box_start_bit - *box_end_bit + 1; - int new_box_start_bit, new_box_end_bit; + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)) + /* Invalid function call */ + return -EINVAL; - *to_write >>= *box_end_bit; - *to_write = bitrev8(*to_write) >> (8 - box_bit_width); - *to_write <<= *box_end_bit; + if (unlikely(startbit - endbit >= 64)) + return -ERANGE; - new_box_end_bit = box_bit_width - *box_start_bit - 1; - new_box_start_bit = box_bit_width - *box_end_bit - 1; - *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit); - *box_start_bit = new_box_start_bit; - *box_end_bit = new_box_end_bit; + __pack(pbuf, uval, startbit, endbit, pbuflen, quirks); + + return 0; } +EXPORT_SYMBOL(pack); + +static void __unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8 = startbit / BITS_PER_BYTE; + int plogical_last_u8 = endbit / BITS_PER_BYTE; + int box; + + /* Initialize parameter */ + *uval = 0; + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + size_t box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + size_t proj_start_bit, proj_end_bit; + u64 proj_mask; + u64 pval; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % BITS_PER_BYTE; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % BITS_PER_BYTE; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * BITS_PER_BYTE) + box_start_bit) - endbit; + proj_end_bit = ((box * BITS_PER_BYTE) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = calculate_box_addr(box, pbuflen, quirks); + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr]; + + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + pval = bitrev8(pval); + + pval &= box_mask; + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; + } +} + +/** + * unpack - Unpack u64 number from packed buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @uval will be + * modified on success. + */ +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + /* width of the field to access in the pbuf */ + u64 value_width; + + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (value_width > 64) + return -ERANGE; + + __unpack(pbuf, uval, startbit, endbit, pbuflen, quirks); + + return 0; +} +EXPORT_SYMBOL(unpack); /** * packing - Convert numbers (currently u64) between a packed and an unpacked @@ -69,6 +332,8 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and * QUIRK_MSB_ON_THE_RIGHT. * + * Note: this is deprecated, prefer to use pack() or unpack() in new code. + * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming * correct usage, return code may be discarded. * If op is PACK, pbuf is modified. @@ -77,125 +342,137 @@ static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks) { - /* Number of bits for storing "uval" - * also width of the field to access in the pbuf - */ - u64 value_width; - /* Logical byte indices corresponding to the - * start and end of the field. - */ - int plogical_first_u8, plogical_last_u8, box; + if (op == PACK) + return pack(pbuf, *uval, startbit, endbit, pbuflen, quirks); - /* startbit is expected to be larger than endbit */ - if (startbit < endbit) - /* Invalid function call */ - return -EINVAL; - - value_width = startbit - endbit + 1; - if (value_width > 64) - return -ERANGE; - - /* Check if "uval" fits in "value_width" bits. - * If value_width is 64, the check will fail, but any - * 64-bit uval will surely fit. - */ - if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) - /* Cannot store "uval" inside "value_width" bits. - * Truncating "uval" is most certainly not desirable, - * so simply erroring out is appropriate. - */ - return -ERANGE; - - /* Initialize parameter */ - if (op == UNPACK) - *uval = 0; - - /* Iterate through an idealistic view of the pbuf as an u64 with - * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low - * logical bit significance. "box" denotes the current logical u8. - */ - plogical_first_u8 = startbit / 8; - plogical_last_u8 = endbit / 8; - - for (box = plogical_first_u8; box >= plogical_last_u8; box--) { - /* Bit indices into the currently accessed 8-bit box */ - int box_start_bit, box_end_bit, box_addr; - u8 box_mask; - /* Corresponding bits from the unpacked u64 parameter */ - int proj_start_bit, proj_end_bit; - u64 proj_mask; - - /* This u8 may need to be accessed in its entirety - * (from bit 7 to bit 0), or not, depending on the - * input arguments startbit and endbit. - */ - if (box == plogical_first_u8) - box_start_bit = startbit % 8; - else - box_start_bit = 7; - if (box == plogical_last_u8) - box_end_bit = endbit % 8; - else - box_end_bit = 0; - - /* We have determined the box bit start and end. - * Now we calculate where this (masked) u8 box would fit - * in the unpacked (CPU-readable) u64 - the u8 box's - * projection onto the unpacked u64. Though the - * box is u8, the projection is u64 because it may fall - * anywhere within the unpacked u64. - */ - proj_start_bit = ((box * 8) + box_start_bit) - endbit; - proj_end_bit = ((box * 8) + box_end_bit) - endbit; - proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); - box_mask = GENMASK_ULL(box_start_bit, box_end_bit); - - /* Determine the offset of the u8 box inside the pbuf, - * adjusted for quirks. The adjusted box_addr will be used for - * effective addressing inside the pbuf (so it's not - * logical any longer). - */ - box_addr = pbuflen - box - 1; - if (quirks & QUIRK_LITTLE_ENDIAN) - box_addr = get_le_offset(box_addr); - if (quirks & QUIRK_LSW32_IS_FIRST) - box_addr = get_reverse_lsw32_offset(box_addr, - pbuflen); - - if (op == UNPACK) { - u64 pval; - - /* Read from pbuf, write to uval */ - pval = ((u8 *)pbuf)[box_addr] & box_mask; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval >>= box_end_bit; - pval <<= proj_end_bit; - *uval &= ~proj_mask; - *uval |= pval; - } else { - u64 pval; - - /* Write to pbuf, read from uval */ - pval = (*uval) & proj_mask; - pval >>= proj_end_bit; - if (quirks & QUIRK_MSB_ON_THE_RIGHT) - adjust_for_msb_right_quirk(&pval, - &box_start_bit, - &box_end_bit, - &box_mask); - - pval <<= box_end_bit; - ((u8 *)pbuf)[box_addr] &= ~box_mask; - ((u8 *)pbuf)[box_addr] |= pval; - } - } - return 0; + return unpack(pbuf, uval, startbit, endbit, pbuflen, quirks); } EXPORT_SYMBOL(packing); +static u64 ustruct_field_to_u64(const void *ustruct, size_t field_offset, + size_t field_size) +{ + switch (field_size) { + case 1: + return *((u8 *)(ustruct + field_offset)); + case 2: + return *((u16 *)(ustruct + field_offset)); + case 4: + return *((u32 *)(ustruct + field_offset)); + default: + return *((u64 *)(ustruct + field_offset)); + } +} + +static void u64_to_ustruct_field(void *ustruct, size_t field_offset, + size_t field_size, u64 uval) +{ + switch (field_size) { + case 1: + *((u8 *)(ustruct + field_offset)) = uval; + break; + case 2: + *((u16 *)(ustruct + field_offset)) = uval; + break; + case 4: + *((u32 *)(ustruct + field_offset)) = uval; + break; + default: + *((u64 *)(ustruct + field_offset)) = uval; + break; + } +} + +/** + * pack_fields_u8 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definition. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u8); + +/** + * pack_fields_u16 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u16); + +/** + * unpack_fields_u8 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u8); + +/** + * unpack_fields_u16 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u16); + MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/packing_test.c b/lib/packing_test.c new file mode 100644 index 0000000000..ce3b83d33b --- /dev/null +++ b/lib/packing_test.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Vladimir Oltean + * Copyright (c) 2024, Intel Corporation. + */ +#include +#include + +struct packing_test_case { + const char *desc; + const u8 *pbuf; + size_t pbuf_size; + u64 uval; + size_t start_bit; + size_t end_bit; + u8 quirks; +}; + +#define NO_QUIRKS 0 + +/** + * PBUF - Initialize .pbuf and .pbuf_size + * @array: elements of constant physical buffer + * + * Initializes the .pbuf and .pbuf_size fields of a struct packing_test_case + * with a constant array of the specified elements. + */ +#define PBUF(array...) \ + .pbuf = (const u8[]){ array }, \ + .pbuf_size = sizeof((const u8 []){ array }) + +static const struct packing_test_case cases[] = { + /* These tests pack and unpack a magic 64-bit value + * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, + 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xbe, 0xef, 0xca, 0xfe, + 0xca, 0xfe, 0xde, 0xad, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xad, 0xde, 0xfe, 0xca, + 0xfe, 0xca, 0xef, 0xbe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x53, 0x7f, 0x7b, 0xb5, + 0x7d, 0xf7, 0x53, 0x7f, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x7d, 0xf7, 0x53, 0x7f, + 0x53, 0x7f, 0x7b, 0xb5, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xb5, 0x7b, 0x7f, 0x53, + 0x7f, 0x53, 0xf7, 0x7d, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x7f, 0x53, 0xf7, 0x7d, + 0xb5, 0x7b, 0x7f, 0x53, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a magic 64-bit value + * (0xcafedeadbeefcafe) at a fixed logical offset (32) within an + * otherwise zero array of varying size from 18 bytes to 24 bytes. + */ + { + .desc = "no quirks, 18 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, + 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, + 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 19 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xca, + 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0x00, + 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 20 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, + 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 22 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, 0xbe, 0xef, + 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "no quirks, 24 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xca, 0xfe, 0xde, 0xad, + 0xbe, 0xef, 0xca, 0xfe, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first + little endian words, 18 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 19 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 20 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 22 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 24 bytes", + PBUF(0x00, 0x00, 0x00, 0x00, 0xfe, 0xca, 0xef, 0xbe, + 0xad, 0xde, 0xfe, 0xca, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xcafedeadbeefcafe, + .start_bit = 95, + .end_bit = 32, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a magic 64-bit value + * (0x1122334455667788) at an odd starting bit (43) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x89, 0x11, 0x9a, 0x22, 0xab, + 0x33, 0xbc, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x33, 0xbc, 0x40, 0x00, + 0x11, 0x9a, 0x22, 0xab, 0x00, 0x00, 0x00, 0x89), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes, non-aligned", + PBUF(0x89, 0x00, 0x00, 0x00, 0xab, 0x22, 0x9a, 0x11, + 0x00, 0x40, 0xbc, 0x33, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xbc, 0x33, + 0xab, 0x22, 0x9a, 0x11, 0x89, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x91, 0x88, 0x59, 0x44, 0xd5, + 0xcc, 0x3d, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0xcc, 0x3d, 0x02, 0x00, + 0x88, 0x59, 0x44, 0xd5, 0x00, 0x00, 0x00, 0x91), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes, non-aligned", + PBUF(0x91, 0x00, 0x00, 0x00, 0xd5, 0x44, 0x59, 0x88, + 0x00, 0x02, 0x3d, 0xcc, 0x00, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x3d, 0xcc, + 0xd5, 0x44, 0x59, 0x88, 0x91, 0x00, 0x00, 0x00), + .uval = 0x1122334455667788, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + /* These tests pack and unpack a u64 with all bits set + * (0xffffffffffffffff) at an odd starting bit (43) within an + * otherwise zero array of 128 bits (16 bytes). They test all possible + * bit layouts of the 128 bit buffer. + */ + { + .desc = "no quirks, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = NO_QUIRKS, + }, + { + .desc = "lsw32 first, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf8, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x07, 0xff), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "lsw32 first + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT, + }, + { + .desc = "msb right + lsw32 first, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x1f, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xe0, 0xff), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST, + }, + { + .desc = "msb right + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0xff, 0xe0, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x1f, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LITTLE_ENDIAN, + }, + { + .desc = "msb right + lsw32 first + little endian words, 16 bytes, non-aligned, 0xff", + PBUF(0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x00, 0x00), + .uval = 0xffffffffffffffff, + .start_bit = 106, + .end_bit = 43, + .quirks = QUIRK_MSB_ON_THE_RIGHT | QUIRK_LSW32_IS_FIRST | QUIRK_LITTLE_ENDIAN, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(packing, cases, desc); + +static void packing_test_pack(struct kunit *test) +{ + const struct packing_test_case *params = test->param_value; + u8 *pbuf; + int err; + + pbuf = kunit_kzalloc(test, params->pbuf_size, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, pbuf); + + err = pack(pbuf, params->uval, params->start_bit, params->end_bit, + params->pbuf_size, params->quirks); + + KUNIT_EXPECT_EQ_MSG(test, err, 0, "pack() returned %pe\n", ERR_PTR(err)); + KUNIT_EXPECT_MEMEQ(test, pbuf, params->pbuf, params->pbuf_size); +} + +static void packing_test_unpack(struct kunit *test) +{ + const struct packing_test_case *params = test->param_value; + u64 uval; + int err; + + err = unpack(params->pbuf, &uval, params->start_bit, params->end_bit, + params->pbuf_size, params->quirks); + KUNIT_EXPECT_EQ_MSG(test, err, 0, "unpack() returned %pe\n", ERR_PTR(err)); + KUNIT_EXPECT_EQ(test, uval, params->uval); +} + +#define PACKED_BUF_SIZE 8 + +typedef struct __packed { u8 buf[PACKED_BUF_SIZE]; } packed_buf_t; + +struct test_data { + u32 field3; + u16 field2; + u16 field4; + u16 field6; + u8 field1; + u8 field5; +}; + +static const struct packed_field_u8 test_fields[] = { + PACKED_FIELD(63, 61, struct test_data, field1), + PACKED_FIELD(60, 52, struct test_data, field2), + PACKED_FIELD(51, 28, struct test_data, field3), + PACKED_FIELD(27, 14, struct test_data, field4), + PACKED_FIELD(13, 9, struct test_data, field5), + PACKED_FIELD(8, 0, struct test_data, field6), +}; + +static void packing_test_pack_fields(struct kunit *test) +{ + const struct test_data data = { + .field1 = 0x2, + .field2 = 0x100, + .field3 = 0xF00050, + .field4 = 0x7D3, + .field5 = 0x9, + .field6 = 0x10B, + }; + packed_buf_t expect = { + .buf = { 0x50, 0x0F, 0x00, 0x05, 0x01, 0xF4, 0xD3, 0x0B }, + }; + packed_buf_t buf = {}; + + pack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_MEMEQ(test, &expect, &buf, sizeof(buf)); +} + +static void packing_test_unpack_fields(struct kunit *test) +{ + const packed_buf_t buf = { + .buf = { 0x17, 0x28, 0x10, 0x19, 0x3D, 0xA9, 0x07, 0x9C }, + }; + struct test_data data = {}; + + unpack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_EQ(test, 0, data.field1); + KUNIT_EXPECT_EQ(test, 0x172, data.field2); + KUNIT_EXPECT_EQ(test, 0x810193, data.field3); + KUNIT_EXPECT_EQ(test, 0x36A4, data.field4); + KUNIT_EXPECT_EQ(test, 0x3, data.field5); + KUNIT_EXPECT_EQ(test, 0x19C, data.field6); +} + +static struct kunit_case packing_test_cases[] = { + KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params), + KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params), + KUNIT_CASE(packing_test_pack_fields), + KUNIT_CASE(packing_test_unpack_fields), + {}, +}; + +static struct kunit_suite packing_test_suite = { + .name = "packing", + .test_cases = packing_test_cases, +}; + +kunit_test_suite(packing_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for packing library"); diff --git a/mm/execmem.c b/mm/execmem.c index 0c4b36bc6d..ac22b2c57b 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -6,28 +6,41 @@ * Copyright (C) 2024 Mike Rapoport IBM. */ +#define pr_fmt(fmt) "execmem: " fmt + #include +#include #include #include +#include +#include #include +#include + +#include + +#include "internal.h" static struct execmem_info *execmem_info __ro_after_init; static struct execmem_info default_execmem_info __ro_after_init; -static void *__execmem_alloc(struct execmem_range *range, size_t size) +#ifdef CONFIG_MMU +static void *execmem_vmalloc(struct execmem_range *range, size_t size, + pgprot_t pgprot, unsigned long vm_flags) { bool kasan = range->flags & EXECMEM_KASAN_SHADOW; - unsigned long vm_flags = VM_FLUSH_RESET_PERMS; gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN; + unsigned int align = range->alignment; unsigned long start = range->start; unsigned long end = range->end; - unsigned int align = range->alignment; - pgprot_t pgprot = range->pgprot; void *p; if (kasan) vm_flags |= VM_DEFER_KMEMLEAK; + if (vm_flags & VM_ALLOW_HUGE_VMAP) + align = PMD_SIZE; + p = __vmalloc_node_range(size, align, start, end, gfp_flags, pgprot, vm_flags, NUMA_NO_NODE, __builtin_return_address(0)); @@ -40,7 +53,7 @@ static void *__execmem_alloc(struct execmem_range *range, size_t size) } if (!p) { - pr_warn_ratelimited("execmem: unable to allocate memory\n"); + pr_warn_ratelimited("unable to allocate memory\n"); return NULL; } @@ -49,14 +62,311 @@ static void *__execmem_alloc(struct execmem_range *range, size_t size) return NULL; } - return kasan_reset_tag(p); + return p; } +#else +static void *execmem_vmalloc(struct execmem_range *range, size_t size, + pgprot_t pgprot, unsigned long vm_flags) +{ + return vmalloc(size); +} +#endif /* CONFIG_MMU */ + +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +struct execmem_cache { + struct mutex mutex; + struct maple_tree busy_areas; + struct maple_tree free_areas; +}; + +static struct execmem_cache execmem_cache = { + .mutex = __MUTEX_INITIALIZER(execmem_cache.mutex), + .busy_areas = MTREE_INIT_EXT(busy_areas, MT_FLAGS_LOCK_EXTERN, + execmem_cache.mutex), + .free_areas = MTREE_INIT_EXT(free_areas, MT_FLAGS_LOCK_EXTERN, + execmem_cache.mutex), +}; + +static inline unsigned long mas_range_len(struct ma_state *mas) +{ + return mas->last - mas->index + 1; +} + +static int execmem_set_direct_map_valid(struct vm_struct *vm, bool valid) +{ + unsigned int nr = (1 << get_vm_area_page_order(vm)); + unsigned int updated = 0; + int err = 0; + + for (int i = 0; i < vm->nr_pages; i += nr) { + err = set_direct_map_valid_noflush(vm->pages[i], nr, valid); + if (err) + goto err_restore; + updated += nr; + } + + return 0; + +err_restore: + for (int i = 0; i < updated; i += nr) + set_direct_map_valid_noflush(vm->pages[i], nr, !valid); + + return err; +} + +static void execmem_cache_clean(struct work_struct *work) +{ + struct maple_tree *free_areas = &execmem_cache.free_areas; + struct mutex *mutex = &execmem_cache.mutex; + MA_STATE(mas, free_areas, 0, ULONG_MAX); + void *area; + + mutex_lock(mutex); + mas_for_each(&mas, area, ULONG_MAX) { + size_t size = mas_range_len(&mas); + + if (IS_ALIGNED(size, PMD_SIZE) && + IS_ALIGNED(mas.index, PMD_SIZE)) { + struct vm_struct *vm = find_vm_area(area); + + execmem_set_direct_map_valid(vm, true); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + vfree(area); + } + } + mutex_unlock(mutex); +} + +static DECLARE_WORK(execmem_cache_clean_work, execmem_cache_clean); + +static int execmem_cache_add(void *ptr, size_t size) +{ + struct maple_tree *free_areas = &execmem_cache.free_areas; + struct mutex *mutex = &execmem_cache.mutex; + unsigned long addr = (unsigned long)ptr; + MA_STATE(mas, free_areas, addr - 1, addr + 1); + unsigned long lower, upper; + void *area = NULL; + int err; + + lower = addr; + upper = addr + size - 1; + + mutex_lock(mutex); + area = mas_walk(&mas); + if (area && mas.last == addr - 1) + lower = mas.index; + + area = mas_next(&mas, ULONG_MAX); + if (area && mas.index == addr + size) + upper = mas.last; + + mas_set_range(&mas, lower, upper); + err = mas_store_gfp(&mas, (void *)lower, GFP_KERNEL); + mutex_unlock(mutex); + if (err) + return err; + + return 0; +} + +static bool within_range(struct execmem_range *range, struct ma_state *mas, + size_t size) +{ + unsigned long addr = mas->index; + + if (addr >= range->start && addr + size < range->end) + return true; + + if (range->fallback_start && + addr >= range->fallback_start && addr + size < range->fallback_end) + return true; + + return false; +} + +static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) +{ + struct maple_tree *free_areas = &execmem_cache.free_areas; + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + MA_STATE(mas_free, free_areas, 0, ULONG_MAX); + MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); + struct mutex *mutex = &execmem_cache.mutex; + unsigned long addr, last, area_size = 0; + void *area, *ptr = NULL; + int err; + + mutex_lock(mutex); + mas_for_each(&mas_free, area, ULONG_MAX) { + area_size = mas_range_len(&mas_free); + + if (area_size >= size && within_range(range, &mas_free, size)) + break; + } + + if (area_size < size) + goto out_unlock; + + addr = mas_free.index; + last = mas_free.last; + + /* insert allocated size to busy_areas at range [addr, addr + size) */ + mas_set_range(&mas_busy, addr, addr + size - 1); + err = mas_store_gfp(&mas_busy, (void *)addr, GFP_KERNEL); + if (err) + goto out_unlock; + + mas_store_gfp(&mas_free, NULL, GFP_KERNEL); + if (area_size > size) { + void *ptr = (void *)(addr + size); + + /* + * re-insert remaining free size to free_areas at range + * [addr + size, last] + */ + mas_set_range(&mas_free, addr + size, last); + err = mas_store_gfp(&mas_free, ptr, GFP_KERNEL); + if (err) { + mas_store_gfp(&mas_busy, NULL, GFP_KERNEL); + goto out_unlock; + } + } + ptr = (void *)addr; + +out_unlock: + mutex_unlock(mutex); + return ptr; +} + +static int execmem_cache_populate(struct execmem_range *range, size_t size) +{ + unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; + struct vm_struct *vm; + size_t alloc_size; + int err = -ENOMEM; + void *p; + + alloc_size = round_up(size, PMD_SIZE); + p = execmem_vmalloc(range, alloc_size, PAGE_KERNEL, vm_flags); + if (!p) + return err; + + vm = find_vm_area(p); + if (!vm) + goto err_free_mem; + + /* fill memory with instructions that will trap */ + execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); + + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; + + err = execmem_cache_add(p, alloc_size); + if (err) + goto err_reset_direct_map; + + return 0; + +err_reset_direct_map: + execmem_set_direct_map_valid(vm, true); +err_free_mem: + vfree(p); + return err; +} + +static void *execmem_cache_alloc(struct execmem_range *range, size_t size) +{ + void *p; + int err; + + p = __execmem_cache_alloc(range, size); + if (p) + return p; + + err = execmem_cache_populate(range, size); + if (err) + return NULL; + + return __execmem_cache_alloc(range, size); +} + +static bool execmem_cache_free(void *ptr) +{ + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + struct mutex *mutex = &execmem_cache.mutex; + unsigned long addr = (unsigned long)ptr; + MA_STATE(mas, busy_areas, addr, addr); + size_t size; + void *area; + + mutex_lock(mutex); + area = mas_walk(&mas); + if (!area) { + mutex_unlock(mutex); + return false; + } + size = mas_range_len(&mas); + + mas_store_gfp(&mas, NULL, GFP_KERNEL); + mutex_unlock(mutex); + + execmem_fill_trapping_insns(ptr, size, /* writable = */ false); + + execmem_cache_add(ptr, size); + + schedule_work(&execmem_cache_clean_work); + + return true; +} + +int execmem_make_temp_rw(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + int ret; + + ret = set_memory_nx(addr, nr); + if (ret) + return ret; + + return set_memory_rw(addr, nr); +} + +int execmem_restore_rox(void *ptr, size_t size) +{ + unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long addr = (unsigned long)ptr; + + return set_memory_rox(addr, nr); +} + +#else /* CONFIG_ARCH_HAS_EXECMEM_ROX */ +static void *execmem_cache_alloc(struct execmem_range *range, size_t size) +{ + return NULL; +} + +static bool execmem_cache_free(void *ptr) +{ + return false; +} +#endif /* CONFIG_ARCH_HAS_EXECMEM_ROX */ void *execmem_alloc(enum execmem_type type, size_t size) { struct execmem_range *range = &execmem_info->ranges[type]; + bool use_cache = range->flags & EXECMEM_ROX_CACHE; + unsigned long vm_flags = VM_FLUSH_RESET_PERMS; + pgprot_t pgprot = range->pgprot; + void *p; - return __execmem_alloc(range, size); + if (use_cache) + p = execmem_cache_alloc(range, size); + else + p = execmem_vmalloc(range, size, pgprot, vm_flags); + + return kasan_reset_tag(p); } void execmem_free(void *ptr) @@ -66,7 +376,19 @@ void execmem_free(void *ptr) * supported by vmalloc. */ WARN_ON(in_interrupt()); - vfree(ptr); + + if (!execmem_cache_free(ptr)) + vfree(ptr); +} + +void *execmem_update_copy(void *dst, const void *src, size_t size) +{ + return text_poke_copy(dst, src, size); +} + +bool execmem_is_rox(enum execmem_type type) +{ + return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); } static bool execmem_validate(struct execmem_info *info) @@ -78,6 +400,17 @@ static bool execmem_validate(struct execmem_info *info) return false; } + if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { + for (int i = EXECMEM_DEFAULT; i < EXECMEM_TYPE_MAX; i++) { + r = &info->ranges[i]; + + if (r->flags & EXECMEM_ROX_CACHE) { + pr_warn_once("ROX cache is not supported\n"); + r->flags &= ~EXECMEM_ROX_CACHE; + } + } + } + return true; } diff --git a/mm/internal.h b/mm/internal.h index 64c2eb0b16..95997108a7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1234,6 +1234,7 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe, void __init vmalloc_init(void); int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); +unsigned int get_vm_area_page_order(struct vm_struct *vm); #else static inline void vmalloc_init(void) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5480b77f41..74c0a5eae2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3023,6 +3023,11 @@ static inline unsigned int vm_area_page_order(struct vm_struct *vm) #endif } +unsigned int get_vm_area_page_order(struct vm_struct *vm) +{ + return vm_area_page_order(vm); +} + static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int order) { #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC @@ -3779,8 +3784,6 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, } if (vmap_allow_huge && (vm_flags & VM_ALLOW_HUGE_VMAP)) { - unsigned long size_per_node; - /* * Try huge pages. Only try for PAGE_KERNEL allocations, * others like modules don't yet expect huge pages in @@ -3788,13 +3791,10 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, * supporting them. */ - size_per_node = size; - if (node == NUMA_NO_NODE) - size_per_node /= num_online_nodes(); - if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE) + if (arch_vmap_pmd_supported(prot) && size >= PMD_SIZE) shift = PMD_SHIFT; else - shift = arch_vmap_pte_supported_shift(size_per_node); + shift = arch_vmap_pte_supported_shift(size); align = max(real_align, 1UL << shift); size = ALIGN(real_size, 1UL << shift); diff --git a/mm/vmstat.c b/mm/vmstat.c index ac6a5aa34e..d87007210b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1432,6 +1432,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_X86 "direct_map_level2_splits", "direct_map_level3_splits", + "direct_map_level2_collapses", + "direct_map_level3_collapses", #endif #ifdef CONFIG_PER_VMA_LOCK_STATS "vma_lock_success", diff --git a/net/devlink/health.c b/net/devlink/health.c index acb8c0e174..8ff71376ef 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -1241,3 +1241,70 @@ int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, return reporter->ops->test(reporter, info->extack); } + +/** + * devlink_fmsg_dump_skb - Dump sk_buffer structure + * @fmsg: devlink formatted message pointer + * @skb: pointer to skb + * + * Dump diagnostic information about sk_buff structure, like headroom, length, + * tailroom, MAC, etc. + */ +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct sock *sk = skb->sk; + bool has_mac, has_trans; + + has_mac = skb_mac_header_was_set(skb); + has_trans = skb_transport_header_was_set(skb); + + devlink_fmsg_pair_nest_start(fmsg, "skb"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "actual len", skb->len); + devlink_fmsg_put(fmsg, "head len", skb_headlen(skb)); + devlink_fmsg_put(fmsg, "data len", skb->data_len); + devlink_fmsg_put(fmsg, "tail len", skb_tailroom(skb)); + devlink_fmsg_put(fmsg, "MAC", has_mac ? skb->mac_header : -1); + devlink_fmsg_put(fmsg, "MAC len", + has_mac ? skb_mac_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "network hdr", skb->network_header); + devlink_fmsg_put(fmsg, "network hdr len", + has_trans ? skb_network_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "transport hdr", + has_trans ? skb->transport_header : -1); + devlink_fmsg_put(fmsg, "csum", (__force u32)skb->csum); + devlink_fmsg_put(fmsg, "csum_ip_summed", (u8)skb->ip_summed); + devlink_fmsg_put(fmsg, "csum_complete_sw", !!skb->csum_complete_sw); + devlink_fmsg_put(fmsg, "csum_valid", !!skb->csum_valid); + devlink_fmsg_put(fmsg, "csum_level", (u8)skb->csum_level); + devlink_fmsg_put(fmsg, "sw_hash", !!skb->sw_hash); + devlink_fmsg_put(fmsg, "l4_hash", !!skb->l4_hash); + devlink_fmsg_put(fmsg, "proto", ntohs(skb->protocol)); + devlink_fmsg_put(fmsg, "pkt_type", (u8)skb->pkt_type); + devlink_fmsg_put(fmsg, "iif", skb->skb_iif); + + if (sk) { + devlink_fmsg_pair_nest_start(fmsg, "sk"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "family", sk->sk_type); + devlink_fmsg_put(fmsg, "type", sk->sk_type); + devlink_fmsg_put(fmsg, "proto", sk->sk_protocol); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + } + + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + devlink_fmsg_pair_nest_start(fmsg, "shinfo"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "tx_flags", sh->tx_flags); + devlink_fmsg_put(fmsg, "nr_frags", sh->nr_frags); + devlink_fmsg_put(fmsg, "gso_size", sh->gso_size); + devlink_fmsg_put(fmsg, "gso_type", sh->gso_type); + devlink_fmsg_put(fmsg, "gso_segs", sh->gso_segs); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_dump_skb); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d0aed41ded..bb518e37f8 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1871,7 +1871,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, */ f = rcu_access_pointer(new->pub.beacon_ies); - kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); + if (!new->pub.hidden_beacon_bss) + kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); return false; } diff --git a/redhat/configs/common/generic/x86/CONFIG_MITIGATION_ITS b/redhat/configs/common/generic/x86/CONFIG_MITIGATION_ITS new file mode 100644 index 0000000000..4251b771b9 --- /dev/null +++ b/redhat/configs/common/generic/x86/CONFIG_MITIGATION_ITS @@ -0,0 +1 @@ +CONFIG_MITIGATION_ITS=y diff --git a/redhat/configs/rhel/generic/CONFIG_PACKING b/redhat/configs/rhel/generic/CONFIG_PACKING index 6af3d64ddb..6a11756bf6 100644 --- a/redhat/configs/rhel/generic/CONFIG_PACKING +++ b/redhat/configs/rhel/generic/CONFIG_PACKING @@ -1 +1 @@ -# CONFIG_PACKING is not set +CONFIG_PACKING=y diff --git a/redhat/configs/rhel/generic/CONFIG_PACKING_KUNIT_TEST b/redhat/configs/rhel/generic/CONFIG_PACKING_KUNIT_TEST new file mode 100644 index 0000000000..e2b176d3bf --- /dev/null +++ b/redhat/configs/rhel/generic/CONFIG_PACKING_KUNIT_TEST @@ -0,0 +1 @@ +CONFIG_PACKING_KUNIT_TEST=m diff --git a/redhat/kernel.changelog-10.0 b/redhat/kernel.changelog-10.0 index ee7bdeb55c..0ffa7be1c7 100644 --- a/redhat/kernel.changelog-10.0 +++ b/redhat/kernel.changelog-10.0 @@ -1,3 +1,214 @@ +* Mon Nov 03 2025 Jan Stancek [6.12.0-55.43.1.el10_0] +- wifi: cfg80211: fix use-after-free in cmp_bss() (CKI Backport Bot) [RHEL-122878] {CVE-2025-39864} +- igc: fix lock order in igc_ptp_reset (CKI Backport Bot) [RHEL-118667] +- igc: add lock preventing multiple simultaneous PTM transactions (CKI Backport Bot) [RHEL-118667] +- igc: cleanup PTP module if probe fails (CKI Backport Bot) [RHEL-118667] +- igc: handle the IGC_PTP_ENABLED flag correctly (CKI Backport Bot) [RHEL-118667] +- igc: move ktime snapshot into PTM retry loop (CKI Backport Bot) [RHEL-118667] +- igc: increase wait time before retrying PTM (CKI Backport Bot) [RHEL-118667] +- igc: fix PTM cycle trigger logic (CKI Backport Bot) [RHEL-118667] +- coredump: Only sort VMAs when core_sort_vma sysctl is set (Herton R. Krzesinski) [RHEL-113363] +- cxgb4: Avoid removal of uninserted tid (CKI Backport Bot) [RHEL-112148] +- powerpc/pseries/iommu: memory notifier incorrectly adds TCEs for pmemory (Mamatha Inamdar) [RHEL-103014] +- tools arch x86: Sync the msr-index.h copy with the kernel sources (Waiman Long) [RHEL-92175] +- Revert "mm/execmem: Unify early execmem_cache behaviour" (Waiman Long) [RHEL-92175] +- x86/Kconfig: only enable ROX cache in execmem when STRICT_MODULE_RWX is set (Waiman Long) [RHEL-92175] +- x86/mm/pat: don't collapse pages without PSE set (Waiman Long) [RHEL-92175] +- tools arch x86: Sync the msr-index.h copy with the kernel sources (Waiman Long) [RHEL-92175] +- arm64: proton-pack: Add new CPUs 'k' values for branch mitigation (Waiman Long) [RHEL-92175] +- arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users (Waiman Long) [RHEL-92175] {CVE-2025-37963} +- arm64: bpf: Add BHB mitigation to the epilogue for cBPF programs (Waiman Long) [RHEL-92175] {CVE-2025-37948} +- arm64: proton-pack: Expose whether the branchy loop k value (Waiman Long) [RHEL-92175] +- arm64: proton-pack: Expose whether the platform is mitigated by firmware (Waiman Long) [RHEL-92175] +- arm64: insn: Add support for encoding DSB (Waiman Long) [RHEL-92175] +- selftest/x86/bugs: Add selftests for ITS (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/ibt: Keep IBT disabled during alternative patching (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- mm/execmem: Unify early execmem_cache behaviour (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Align RETs in BHB clear sequence to avoid thunking (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Add support for RSB stuffing mitigation (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Add "vmexit" option to skip mitigation on some CPUs (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Enable Indirect Target Selection mitigation (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- redhat/configs: Enable CONFIG_MITIGATION_ITS for x86 (Waiman Long) [RHEL-92175] +- x86/its: Add support for ITS-safe return thunk (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Add support for ITS-safe indirect thunk (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- x86/its: Enumerate Indirect Target Selection (ITS) bug (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- Documentation: x86/bugs/its: Add ITS documentation (Waiman Long) [RHEL-92175] {CVE-2024-28956} +- KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count transitions (Waiman Long) [RHEL-92175] +- x86/bugs: Add RSB mitigation document (Waiman Long) [RHEL-92175] +- x86/bugs: Don't fill RSB on context switch with eIBRS (Waiman Long) [RHEL-92175] +- x86/bugs: Don't fill RSB on VMEXIT with eIBRS+retpoline (Waiman Long) [RHEL-92175] +- x86/bugs: Fix RSB clearing in indirect_branch_prediction_barrier() (Waiman Long) [RHEL-92175] +- x86/bugs: Use SBPB in write_ibpb() if applicable (Waiman Long) [RHEL-92175] +- x86/bugs: Rename entry_ibpb() to write_ibpb() (Waiman Long) [RHEL-92175] +- x86/bugs: Make spectre user default depend on MITIGATION_SPECTRE_V2 (Waiman Long) [RHEL-92175] +- x86/bugs: Use the cpu_smt_possible() helper instead of open-coded code (Waiman Long) [RHEL-92175] +- x86/bugs: Add AUTO mitigations for mds/taa/mmio/rfds (Waiman Long) [RHEL-92175] +- x86/bugs: Relocate mds/taa/mmio/rfds defines (Waiman Long) [RHEL-92175] +- x86/bugs: Add X86_BUG_SPECTRE_V2_USER (Waiman Long) [RHEL-92175] +- x86/bugs: Remove X86_FEATURE_USE_IBPB (Waiman Long) [RHEL-92175] +- KVM: nVMX: Always use IBPB to properly virtualize IBRS (Waiman Long) [RHEL-92175] +- x86/bugs: Use a static branch to guard IBPB on vCPU switch (Waiman Long) [RHEL-92175] +- x86/bugs: Remove the X86_FEATURE_USE_IBPB check in ib_prctl_set() (Waiman Long) [RHEL-92175] +- x86/mm: Remove X86_FEATURE_USE_IBPB checks in cond_mitigation() (Waiman Long) [RHEL-92175] +- x86/bugs: Move the X86_FEATURE_USE_IBPB check into callers (Waiman Long) [RHEL-92175] +- x86/bugs: KVM: Add support for SRSO_MSR_FIX (Waiman Long) [RHEL-92175] +- arm64: errata: Assume that unknown CPUs _are_ vulnerable to Spectre BHB (Waiman Long) [RHEL-92175] +- arm64: errata: Add QCOM_KRYO_4XX_GOLD to the spectre_bhb_k24_list (Waiman Long) [RHEL-92175] +- x86/rfds: Exclude P-only parts from the RFDS affected list (Waiman Long) [RHEL-92175] +- x86/cpu: Update x86_match_cpu() to also use cpu-type (Waiman Long) [RHEL-92175] +- x86/cpu: Add cpu_type to struct x86_cpu_id (Waiman Long) [RHEL-92175] +- x86/cpu: Shorten CPU matching macro (Waiman Long) [RHEL-92175] +- x86/cpu: Fix the description of X86_MATCH_VFM_STEPS() (Waiman Long) [RHEL-92175] +- module: don't annotate ROX memory as kmemleak_not_leak() (Waiman Long) [RHEL-92175] +- module: drop unused module_writable_address() (Waiman Long) [RHEL-92175] +- Revert "x86/module: prepare module loading for ROX allocations of text" (Waiman Long) [RHEL-92175] +- module: switch to execmem API for remapping as RW and restoring ROX (Waiman Long) [RHEL-92175] +- execmem: add API for temporal remapping as RW and restoring ROX afterwards (Waiman Long) [RHEL-92175] +- execmem: don't remove ROX cache from the direct map (Waiman Long) [RHEL-92175] +- x86/mm/pat: restore large ROX pages after fragmentation (Waiman Long) [RHEL-92175] +- x86/mm/pat: drop duplicate variable in cpa_flush() (Waiman Long) [RHEL-92175] +- x86/mm/pat: cpa-test: fix length for CPA_ARRAY test (Waiman Long) [RHEL-92175] +- x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit (Waiman Long) [RHEL-92175] +- x86/cpu: Fix typo in x86_match_cpu()'s doc (Waiman Long) [RHEL-92175] +- x86/cpu: Expose only stepping min/max interface (Waiman Long) [RHEL-92175] +- x86/cpu: Introduce new microcode matching helper (Waiman Long) [RHEL-92175] +- KVM: x86: Advertise SRSO_USER_KERNEL_NO to userspace (Waiman Long) [RHEL-92175] +- x86/bugs: Add SRSO_USER_KERNEL_NO support (Waiman Long) [RHEL-92175] +- module: fix writing of livepatch relocations in ROX text (Waiman Long) [RHEL-92175] +- x86/execmem: fix ROX cache usage in Xen PV guests (Waiman Long) [RHEL-92175] +- alloc_tag: load module tags into separate contiguous memory (Waiman Long) [RHEL-92175] +- alloc_tag: introduce shutdown_mem_profiling helper function (Waiman Long) [RHEL-92175] +- maple_tree: add mas_for_each_rev() helper (Waiman Long) [RHEL-92175] +- x86/module: enable ROX caches for module text on 64 bit (Waiman Long) [RHEL-92175] +- execmem: add support for cache of large ROX pages (Waiman Long) [RHEL-92175] +- x86/module: prepare module loading for ROX allocations of text (Waiman Long) [RHEL-92175] +- arch: introduce set_direct_map_valid_noflush() (Waiman Long) [RHEL-92175] +- module: prepare to handle ROX allocations for text (Waiman Long) [RHEL-92175] +- asm-generic: introduce text-patching.h (Waiman Long) [RHEL-92175] +- mm: vmalloc: don't account for number of nodes for HUGE_VMAP allocations (Waiman Long) [RHEL-92175] +- mm: vmalloc: group declarations depending on CONFIG_MMU together (Waiman Long) [RHEL-92175] +- x86/cpu: Add CPU type to struct cpuinfo_topology (Waiman Long) [RHEL-92175] +- x86/cpufeatures: Add X86_FEATURE_AMD_HETEROGENEOUS_CORES (Waiman Long) [RHEL-92175] +- ice: fix Rx page leak on multi-buffer frames (Petr Oros) [RHEL-116542] +- ice: fix NULL access of tx->in_use in ice_ll_ts_intr (Petr Oros) [RHEL-116534] +- ice: fix NULL access of tx->in_use in ice_ptp_ts_irq (Petr Oros) [RHEL-116534] +- ice: default to TIME_REF instead of TXCO on E825-C (Petr Oros) [RHEL-107466] +- ice: move TSPLL init calls to ice_ptp.c (Petr Oros) [RHEL-107466] +- ice: fall back to TCXO on TSPLL lock fail (Petr Oros) [RHEL-107466] +- ice: wait before enabling TSPLL (Petr Oros) [RHEL-107466] +- ice: add multiple TSPLL helpers (Petr Oros) [RHEL-107466] +- ice: use bitfields instead of unions for CGU regs (Petr Oros) [RHEL-107466] +- ice: read TSPLL registers again before reporting status (Petr Oros) [RHEL-107466] +- ice: clear time_sync_en field for E825-C during reprogramming (Petr Oros) [RHEL-107466] +- ice: add TSPLL log config helper (Petr Oros) [RHEL-107466] +- ice: use designated initializers for TSPLL consts (Petr Oros) [RHEL-107466] +- ice: remove ice_tspll_params_e825 definitions (Petr Oros) [RHEL-107466] +- ice: fix E825-C TSPLL register definitions (Petr Oros) [RHEL-107466] +- ice: rename TSPLL and CGU functions and definitions (Petr Oros) [RHEL-107466] +- ice: move TSPLL functions to a separate file (Petr Oros) [RHEL-107466] +- ice: enable timesync operation on 2xNAC E825 devices (Petr Oros) [RHEL-107466] +- ice: refactor ice_sbq_msg_dev enum (Petr Oros) [RHEL-107466] +- ice: remove SW side band access workaround for E825 (Petr Oros) [RHEL-107466] +- ice/ptp: fix crosstimestamp reporting (Petr Oros) [RHEL-115351] +- ice: fix rebuilding the Tx scheduler tree for large queue counts (Petr Oros) [RHEL-115351] +- ice: create new Tx scheduler nodes for new queues only (Petr Oros) [RHEL-115351] +- ice: fix Tx scheduler error handling in XDP callback (Petr Oros) [RHEL-115351] +- ice: Fix LACP bonds without SRIOV environment (Petr Oros) [RHEL-115351] +- ice: fix vf->num_mac count with port representors (Petr Oros) [RHEL-115351] +- ice: Check VF VSI Pointer Value in ice_vc_add_fdir_fltr() (Petr Oros) [RHEL-115351] +- ice: fix Get Tx Topology AQ command error on E830 (Petr Oros) [RHEL-115351] +- ice: fix using untrusted value of pkt_len in ice_vc_fdir_parse_raw() (Petr Oros) [RHEL-115351] +- ice: fix input validation for virtchnl BW (Petr Oros) [RHEL-115351] +- ice: validate queue quanta parameters to prevent OOB access (Petr Oros) [RHEL-115351] +- ice: stop truncating queue ids when checking (Petr Oros) [RHEL-115351] +- virtchnl: make proto and filter action count unsigned (Petr Oros) [RHEL-115351] +- ice: fix reservation of resources for RDMA when disabled (Petr Oros) [RHEL-115351] +- ice: health.c: fix compilation on gcc 7.5 (Petr Oros) [RHEL-115351] +- ice: E825C PHY register cleanup (Petr Oros) [RHEL-115351] +- ice: Refactor E825C PHY registers info struct (Petr Oros) [RHEL-115351] +- ice: rename ice_ptp_init_phc_eth56g function (Petr Oros) [RHEL-115351] +- ice: Add E830 checksum offload support (Petr Oros) [RHEL-115351] +- ice: register devlink prior to creating health reporters (Petr Oros) [RHEL-115351] +- ice: Fix switchdev slow-path in LAG (Petr Oros) [RHEL-115351] +- ice: fix memory leak in aRFS after reset (Petr Oros) [RHEL-115351] +- ice: do not configure destination override for switchdev (Petr Oros) [RHEL-115351] +- ice: dpll: Remove newline at the end of a netlink error message (Petr Oros) [RHEL-115351] +- virtchnl: add enumeration for the rxdid format (Petr Oros) [RHEL-115351] +- ice: support Rx timestamp on flex descriptor (Petr Oros) [RHEL-115351] +- virtchnl: add support for enabling PTP on iAVF (Petr Oros) [RHEL-115351] +- ice: refactor ice_fdir_create_dflt_rules() function (Petr Oros) [RHEL-115351] +- ice: Implement PTP support for E830 devices (Petr Oros) [RHEL-115351] +- ice: Refactor ice_ptp_init_tx_* (Petr Oros) [RHEL-115351] +- ice: Add unified ice_capture_crosststamp (Petr Oros) [RHEL-115351] +- ice: Process TSYN IRQ in a separate function (Petr Oros) [RHEL-115351] +- ice: Use FIELD_PREP for timestamp values (Petr Oros) [RHEL-115351] +- ice: Remove unnecessary ice_is_e8xx() functions (Petr Oros) [RHEL-115351] +- ice: Don't check device type when checking GNSS presence (Petr Oros) [RHEL-115351] +- ice: use generic unrolled_count() macro (Petr Oros) [RHEL-115351] +- ice: count combined queues using Rx/Tx count (Petr Oros) [RHEL-115351] +- ice: Add check for devm_kzalloc() (Petr Oros) [RHEL-115351] +- ice: remove invalid parameter of equalizer (Petr Oros) [RHEL-115351] +- ice: fix ice_parser_rt::bst_key array size (Petr Oros) [RHEL-115351] +- ice: Add in/out PTP pin delays (Petr Oros) [RHEL-115351] +- ice: use string choice helpers (Petr Oros) [RHEL-115351] +- ice: add fw and port health reporters (Petr Oros) [RHEL-115351] +- ice: add recipe priority check in search (Petr Oros) [RHEL-115351] +- ice: Add MDD logging via devlink health (Petr Oros) [RHEL-115351] +- ice: add Tx hang devlink health reporter (Petr Oros) [RHEL-115351] +- ice: rename devlink_port.[ch] to port.[ch] (Petr Oros) [RHEL-115351] +- ice: cleanup Rx queue context programming functions (Petr Oros) [RHEL-115351] +- ice: move prefetch enable to ice_setup_rx_ctx (Petr Oros) [RHEL-115351] +- ice: reduce size of queue context fields (Petr Oros) [RHEL-115351] +- ice: use for Tx and Rx queue context data (Petr Oros) [RHEL-115351] +- ice: use structures to keep track of queue context size (Petr Oros) [RHEL-115351] +- ice: remove int_q_state from ice_tlan_ctx (Petr Oros) [RHEL-115351] +- ice: fix incorrect PHY settings for 100 GB/s (Petr Oros) [RHEL-115351] +- ice: fix max values for dpll pin phase adjust (Petr Oros) [RHEL-115351] +- ice: Fix VLAN pruning in switchdev mode (Petr Oros) [RHEL-115351] +- ice: Fix NULL pointer dereference in switchdev (Petr Oros) [RHEL-115351] +- ice: fix PHY timestamp extraction for ETH56G (Petr Oros) [RHEL-115351] +- ice: Unbind the workqueue (Petr Oros) [RHEL-115351] +- ice: use stack variable for virtchnl_supported_rxdids (Petr Oros) [RHEL-115351] +- ice: initialize pf->supported_rxdids immediately after loading DDP (Petr Oros) [RHEL-115351] +- ice: only allow Tx promiscuous for multicast (Petr Oros) [RHEL-115351] +- ice: support optional flags in signature segment header (Petr Oros) [RHEL-115351] +- ice: refactor "last" segment of DDP pkg (Petr Oros) [RHEL-115351] +- ice: extend dump serdes equalizer values feature (Petr Oros) [RHEL-115351] +- ice: rework of dump serdes equalizer values feature (Petr Oros) [RHEL-115351] +- ice: Support VF queue rate limit and quanta size configuration (Petr Oros) [RHEL-115351] +- virtchnl: fix m68k build. (Petr Oros) [RHEL-115351] +- virtchnl: support queue rate limit and quanta size configuration (Petr Oros) [RHEL-115351] +- ice: Cleanup unused declarations (Petr Oros) [RHEL-115351] +- ice: Use common error handling code in two functions (Petr Oros) [RHEL-115351] +- ice: Make use of assign_bit() API (Petr Oros) [RHEL-115351] +- ice: store max_frame and rx_buf_len only in ice_rx_ring (Petr Oros) [RHEL-115351] +- ice: consistently use q_idx in ice_vc_cfg_qs_msg() (Petr Oros) [RHEL-115351] +- ice: Implement ethtool reset support (Petr Oros) [RHEL-115351] +- ice: Add correct PHY lane assignment (Petr Oros) [RHEL-115351] +- ice: Fix ETH56G FC-FEC Rx offset value (Petr Oros) [RHEL-115351] +- ice: Fix quad registers read on E825 (Petr Oros) [RHEL-115351] +- ice: Fix E825 initialization (Petr Oros) [RHEL-115351] +- unroll: add generic loop unroll helpers (CKI Backport Bot) [RHEL-115351] +- devlink: add devlink_fmsg_dump_skb() function (Petr Oros) [RHEL-115351] +- devlink: add devlink_fmsg_put() macro (Petr Oros) [RHEL-115351] +- redhat: configs: enable CONFIG_PACKING (Petr Oros) [RHEL-115351] +- lib: packing: catch kunit_kzalloc() failure in the pack() test (CKI Backport Bot) [RHEL-115351] +- lib: packing: document recently added APIs (CKI Backport Bot) [RHEL-115351] +- lib: packing: add pack_fields() and unpack_fields() (CKI Backport Bot) [RHEL-115351] +- lib: packing: demote truncation error in pack() to a warning in __pack() (CKI Backport Bot) [RHEL-115351] +- lib: packing: create __pack() and __unpack() variants without error checking (CKI Backport Bot) [RHEL-115351] +- lib: packing: use GENMASK() for box_mask (CKI Backport Bot) [RHEL-115351] +- lib: packing: use BITS_PER_BYTE instead of 8 (CKI Backport Bot) [RHEL-115351] +- lib: packing: fix QUIRK_MSB_ON_THE_RIGHT behavior (CKI Backport Bot) [RHEL-115351] +- lib: packing: add additional KUnit tests (CKI Backport Bot) [RHEL-115351] +- lib: packing: add KUnit tests adapted from selftests (CKI Backport Bot) [RHEL-115351] +- lib: packing: duplicate pack() and unpack() implementations (CKI Backport Bot) [RHEL-115351] +- lib: packing: add pack() and unpack() wrappers over packing() (CKI Backport Bot) [RHEL-115351] +- lib: packing: remove kernel-doc from header file (CKI Backport Bot) [RHEL-115351] +- lib: packing: adjust definitions and implementation for arbitrary buffer lengths (CKI Backport Bot) [RHEL-115351] +- lib: packing: refuse operating on bit indices which exceed size of buffer (CKI Backport Bot) [RHEL-115351] +Resolves: RHEL-103014, RHEL-107466, RHEL-112148, RHEL-113363, RHEL-115351, RHEL-116534, RHEL-116542, RHEL-118667, RHEL-122878, RHEL-92175 + * Sat Oct 25 2025 CKI KWF Bot [6.12.0-55.42.1.el10_0] - platform/x86/intel: power-domains: Use topology_logical_package_id() for package ID (CKI Backport Bot) [RHEL-123288] - KVM: arm64: Disable MPAM visibility by default and ignore VMM writes (Gavin Shan) [RHEL-121690] diff --git a/scripts/Makefile b/scripts/Makefile index 6bcda4b9d0..546e8175e1 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -47,7 +47,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand -hostprogs += unifdef +hostprogs += unifdef gen_packed_field_checks # The module linker script is preprocessed on demand targets += module.lds diff --git a/scripts/gen_packed_field_checks.c b/scripts/gen_packed_field_checks.c new file mode 100644 index 0000000000..60042b7616 --- /dev/null +++ b/scripts/gen_packed_field_checks.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024, Intel Corporation +#include +#include + +#define MAX_PACKED_FIELD_SIZE 50 + +int main(int argc, char **argv) +{ + /* The first macro doesn't need a 'do {} while(0)' loop */ + printf("#define CHECK_PACKED_FIELDS_1(fields) \\\n"); + printf("\tCHECK_PACKED_FIELD(fields, 0)\n\n"); + + /* Remaining macros require a do/while loop, and are implemented + * recursively by calling the previous iteration's macro. + */ + for (int i = 2; i <= MAX_PACKED_FIELD_SIZE; i++) { + printf("#define CHECK_PACKED_FIELDS_%d(fields) do { \\\n", i); + printf("\tCHECK_PACKED_FIELDS_%d(fields); \\\n", i - 1); + printf("\tCHECK_PACKED_FIELD(fields, %d); \\\n", i - 1); + printf("} while (0)\n\n"); + } + + printf("#define CHECK_PACKED_FIELDS(fields) \\\n"); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf("\t__builtin_choose_expr(ARRAY_SIZE(fields) == %d, ({ CHECK_PACKED_FIELDS_%d(fields); }), \\\n", + i, i); + + printf("\t({ BUILD_BUG_ON_MSG(1, \"CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than %d.\"); }) \\\n", + MAX_PACKED_FIELD_SIZE); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf(")"); + + printf("\n"); +} diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 3f43edef81..711c6e0299 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -50,7 +50,7 @@ SECTIONS { .data : { *(.data .data.[0-9a-zA-Z_]*) *(.data..L*) - CODETAG_SECTIONS() + MOD_CODETAG_SECTIONS() } .rodata : { @@ -59,9 +59,10 @@ SECTIONS { } #else .data : { - CODETAG_SECTIONS() + MOD_CODETAG_SECTIONS() } #endif + MOD_SEPARATE_CODETAG_SECTIONS() } /* bring in arch-specific sections */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dd4682857c..1fe09d2bb5 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -210,7 +210,6 @@ #define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ #define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 3ae84c3b8e..5a76bffc32 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -209,6 +209,14 @@ * VERW clears CPU Register * File. */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -522,7 +530,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -539,16 +547,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -591,7 +599,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 363d031a16..9cf769d415 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -115,6 +115,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 0000000000..8ff2d7226c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 0000000000..fed5092b1c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#! /usr/bin/python3 -sP +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack(' 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' ' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 0000000000..65c17530fa --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#! /usr/bin/python3 -sP +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 0000000000..c722cdbff6 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#! /usr/bin/python3 -sP +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 0000000000..47bafbe605 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#! /usr/bin/python3 -sP +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished()