Import of kernel-6.12.0-124.8.1.el10_1

2025-11-25 04:09:18 +00:00 · 2025-11-25 04:09:18 +00:00 · 8e33fb9082
commit 8e33fb9082
parent 9c95bdb733
18058 changed files with 751910 additions and 319577 deletions
--- a/COPYING-6.12.0-124.8.1.el10
+++ b/COPYING-6.12.0-124.8.1.el10
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@ -109,6 +109,10 @@ Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Indicates whether a storage device is capable of storing
 		integrity metadata. Set if the device is T10 PI-capable.
+		This flag is set to 1 if the storage media is formatted
+		with T10 Protection Information. If the storage media is
+		not formatted with T10 Protection Information, this flag
+		is set to 0.


 What:		/sys/block/<disk>/integrity/format
@ -117,6 +121,13 @@ Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Metadata format for integrity capable block device.
 		E.g. T10-DIF-TYPE1-CRC.
+		This field describes the type of T10 Protection Information
+		that the block device can send and receive.
+		If the device can store application integrity metadata but
+		no T10 Protection Information profile is used, this field
+		contains "nop".
+		If the device does not support integrity metadata, this
+		field contains "none".


 What:		/sys/block/<disk>/integrity/protection_interval_bytes
@ -142,7 +153,17 @@ Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Number of bytes of integrity tag space available per
-		512 bytes of data.
+		protection_interval_bytes, which is typically
+		the device's logical block size.
+		This field describes the size of the application tag
+		if the storage device is formatted with T10 Protection
+		Information and permits use of the application tag.
+		The tag_size is reported in bytes and indicates the
+		space available for adding an opaque tag to each block
+		(protection_interval_bytes).
+		If the device does not support T10 Protection Information
+		(even if the device provides application integrity
+		metadata space), this field is set to 0.


 What:		/sys/block/<disk>/integrity/write_generate
@ -229,6 +250,17 @@ Description:
 		encryption, refer to Documentation/block/inline-encryption.rst.


+What:		/sys/block/<disk>/queue/crypto/hw_wrapped_keys
+Date:		February 2025
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] The presence of this file indicates that the device
+		supports hardware-wrapped inline encryption keys, i.e. key blobs
+		that can only be unwrapped and used by dedicated hardware.  For
+		more information about hardware-wrapped inline encryption keys,
+		see Documentation/block/inline-encryption.rst.
+
+
 What:		/sys/block/<disk>/queue/crypto/max_dun_bits
 Date:		February 2022
 Contact:	linux-block@vger.kernel.org
@ -267,6 +299,15 @@ Description:
 		use with inline encryption.


+What:		/sys/block/<disk>/queue/crypto/raw_keys
+Date:		February 2025
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] The presence of this file indicates that the device
+		supports raw inline encryption keys, i.e. keys that are managed
+		in raw, plaintext form in software.
+
+
 What:		/sys/block/<disk>/queue/dax
 Date:		June 2016
 Contact:	linux-block@vger.kernel.org
@ -424,6 +465,13 @@ Description:
 		[RW] This file is used to control (on/off) the iostats
 		accounting of the disk.

+What:		/sys/block/<disk>/queue/iostats_passthrough
+Date:		October 2024
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RW] This file is used to control (on/off) the iostats
+		accounting of the disk for passthrough commands.
+

 What:		/sys/block/<disk>/queue/logical_block_size
 Date:		May 2009
--- a/Documentation/ABI/stable/sysfs-class-bluetooth
+++ b/Documentation/ABI/stable/sysfs-class-bluetooth
@ -0,0 +1,9 @@
+What: 		/sys/class/bluetooth/hci<index>/reset
+Date:		14-Jan-2025
+KernelVersion:	6.13
+Contact:	linux-bluetooth@vger.kernel.org
+Description: 	This write-only attribute allows users to trigger the vendor reset
+		method on the Bluetooth device when arbitrary data is written.
+		The reset may or may not be done through the device transport
+		(e.g., UART/USB), and can also be done through an out-of-band
+		approach such as GPIO.
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@ -177,6 +177,12 @@ Description:
 		The cache write policy: 0 for write-back, 1 for write-through,
 		other or unknown.

+What:		/sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode
+Date:		March 2025
+Contact:	Dave Jiang <dave.jiang@intel.com>
+Description:
+		The address mode: 0 for reserved, 1 for extended-linear.
+
 What:		/sys/devices/system/node/nodeX/x86/sgx_total_bytes
 Date:		November 2021
 Contact:	Jarkko Sakkinen <jarkko@kernel.org>
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@ -342,6 +342,70 @@ Description:	Specific uncompressed frame descriptors
 					   support
 		=========================  =====================================

+What:           /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased
+Date:           Sept 2024
+KernelVersion:  5.15
+Description:    Framebased format descriptors
+
+What:           /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name
+Date:           Sept 2024
+KernelVersion:  5.15
+Description:    Specific framebased format descriptors
+
+                ==================      =======================================
+                bFormatIndex            unique id for this format descriptor;
+                                        only defined after parent header is
+                                        linked into the streaming class;
+                                        read-only
+                bmaControls             this format's data for bmaControls in
+                                        the streaming header
+                bmInterlaceFlags        specifies interlace information,
+                                        read-only
+                bAspectRatioY           the X dimension of the picture aspect
+                                        ratio, read-only
+                bAspectRatioX           the Y dimension of the picture aspect
+                                        ratio, read-only
+                bDefaultFrameIndex      optimum frame index for this stream
+                bBitsPerPixel           number of bits per pixel used to
+                                        specify color in the decoded video
+                                        frame
+                guidFormat              globally unique id used to identify
+                                        stream-encoding format
+                ==================      =======================================
+
+What:           /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name/name
+Date:           Sept 2024
+KernelVersion:  5.15
+Description:    Specific framebased frame descriptors
+
+                =========================  =====================================
+                bFrameIndex                unique id for this framedescriptor;
+                                           only defined after parent format is
+                                           linked into the streaming header;
+                                           read-only
+                dwFrameInterval            indicates how frame interval can be
+                                           programmed; a number of values
+                                           separated by newline can be specified
+                dwDefaultFrameInterval     the frame interval the device would
+                                           like to use as default
+                dwBytesPerLine             Specifies the number of bytes per line
+                                           of video for packed fixed frame size
+                                           formats, allowing the receiver to
+                                           perform stride alignment of the video.
+                                           If the bVariableSize value (above) is
+                                           TRUE (1), or if the format does not
+                                           permit such alignment, this value shall
+                                           be set to zero (0).
+                dwMaxBitRate               the maximum bit rate at the shortest
+                                           frame interval in bps
+                dwMinBitRate               the minimum bit rate at the longest
+                                           frame interval in bps
+                wHeight                    height of decoded bitmap frame in px
+                wWidth                     width of decoded bitmam frame in px
+                bmCapabilities             still image support, fixed frame-rate
+                                           support
+                =========================  =====================================
+
 What:		/config/usb-gadget/gadget/functions/uvc.name/streaming/header
 Date:		Dec 2014
 KernelVersion:	4.0
--- a/Documentation/ABI/testing/debugfs-hisi-migration
+++ b/Documentation/ABI/testing/debugfs-hisi-migration
@ -0,0 +1,25 @@
+What:		/sys/kernel/debug/vfio/<device>/migration/hisi_acc/dev_data
+Date:		Jan 2025
+KernelVersion:  6.13
+Contact:	Longfang Liu <liulongfang@huawei.com>
+Description:	Read the configuration data and some status data
+		required for device live migration. These data include device
+		status data, queue configuration data, some task configuration
+		data and device attribute data. The output format of the data
+		is defined by the live migration driver.
+
+What:		/sys/kernel/debug/vfio/<device>/migration/hisi_acc/migf_data
+Date:		Jan 2025
+KernelVersion:  6.13
+Contact:	Longfang Liu <liulongfang@huawei.com>
+Description:	Read the data from the last completed live migration.
+		This data includes the same device status data as in "dev_data".
+		The migf_data is the dev_data that is migrated.
+
+What:		/sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
+Date:		Jan 2025
+KernelVersion:  6.13
+Contact:	Longfang Liu <liulongfang@huawei.com>
+Description:	Used to obtain the device command sending and receiving
+		channel status. Returns failure or success logs based on the
+		results.
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu
@ -0,0 +1,25 @@
+What:           /sys/bus/event_source/devices/vpa_pmu/format
+Date:           November 2024
+Contact:        Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:    Read-only. Attribute group to describe the magic bits
+                that go into perf_event_attr.config for a particular pmu.
+                (See ABI/testing/sysfs-bus-event_source-devices-format).
+
+                Each attribute under this group defines a bit range of the
+                perf_event_attr.config. Supported attribute are listed
+                below::
+
+                  event = "config:0-31" - event ID
+
+                For example::
+
+                  l1_to_l2_lat = "event=0x1"
+
+What:           /sys/bus/event_source/devices/vpa_pmu/events
+Date:           November 2024
+Contact:        Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:    Read-only. Attribute group to describe performance monitoring
+                events for the Virtual Processor Area events. Each attribute
+                in this group describes a single performance monitoring event
+                supported by vpa_pmu. The name of the file is the name of
+                the event (See ABI/testing/sysfs-bus-event_source-devices-events).
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@ -163,6 +163,17 @@ Description:
 		will be present in sysfs.  Writing 1 to this file
 		will perform reset.

+What:		/sys/bus/pci/devices/.../reset_subordinate
+Date:		October 2024
+Contact:	linux-pci@vger.kernel.org
+Description:
+		This is visible only for bridge devices. If you want to reset
+		all devices attached through the subordinate bus of a specific
+		bridge device, writing 1 to this will try to do it.  This will
+		affect all devices attached to the system through this bridge
+		similiar to writing 1 to their individual "reset" file, so use
+		with caution.
+
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Contact:	Ben Hutchings <bwh@kernel.org>
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@ -149,6 +149,19 @@ Description:
 		advertise to the partner. The currently used capabilities are in
 		brackets. Selection happens by writing to the file.

+What:		/sys/class/typec/<port>/usb_capability
+Date:		November 2024
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:	Lists the supported USB Modes. The default USB mode that is used
+		next time with the Enter_USB Message is in brackets. The default
+		mode can be changed by writing to the file when supported by the
+		driver.
+
+		Valid values:
+		- usb2 (USB 2.0)
+		- usb3 (USB 3.2)
+		- usb4 (USB4)
+
 USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)

 What:		/sys/class/typec/<port>-partner/accessory_mode
@ -220,6 +233,20 @@ Description:
 		directory exists, it will have an attribute file for every VDO
 		in Discover Identity command result.

+What:		/sys/class/typec/<port>-partner/usb_mode
+Date:		November 2024
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:	The USB Modes that the partner device supports. The active mode
+		is displayed in brackets. The active USB mode can be changed by
+		writing to this file when the port driver is able to send Data
+		Reset Message to the partner. That requires USB Power Delivery
+		contract between the partner and the port.
+
+		Valid values:
+		- usb2 (USB 2.0)
+		- usb3 (USB 3.2)
+		- usb4 (USB4)
+
 USB Type-C cable devices (eg. /sys/class/typec/port0-cable/)

 Note: Electronically Marked Cables will have a device also for one cable plug
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@ -523,6 +523,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/srbds
+		/sys/devices/system/cpu/vulnerabilities/tsa
 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
--- a/Documentation/ABI/testing/sysfs-driver-amd-sfh
+++ b/Documentation/ABI/testing/sysfs-driver-amd-sfh
@ -0,0 +1,13 @@
+What:		/sys/bus/pci/drivers/pcie_mp2_amd/*/hpd
+Date:		April 2025
+Contact:	mario.limonciello@amd.com
+Description:
+		Human presence detection (HPD) enable/disable.
+		When HPD is enabled, the device will be able to detect the
+		presence of a human and will send an interrupt that can be
+		used to wake the system from a low power state.
+		When HPD is disabled, the device will not be able to detect
+		the presence of a human.
+
+		Access: Read/Write
+		Valid values: enabled/disabled
--- a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
+++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
@ -0,0 +1,13 @@
+What:		/sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
+Date:		September, 2023
+KernelVersion:	6.5
+Contact:	linux-input@vger.kernel.org
+Description:
+		The set of keys displayed on the Touch Bar.
+		Valid values are:
+		== =================
+		0  Escape key only
+		1  Function keys
+		2  Media/brightness keys
+		3  None
+		== =================
--- a/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
+++ b/Documentation/ABI/testing/sysfs-driver-hid-corsair-void
@ -0,0 +1,38 @@
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_headset
+Date:		January 2024
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(R) The firmware version of the headset
+			* Returns -ENODATA if no version was reported
+
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_receiver
+Date:		January 2024
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(R) The firmware version of the receiver
+
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/microphone_up
+Date:		July 2023
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(R) Get the physical position of the microphone
+			* 1 -> Microphone up
+			* 0 -> Microphone down
+
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/send_alert
+Date:		July 2023
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(W) Play a built-in notification from the headset (0 / 1)
+
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/set_sidetone
+Date:		December 2023
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(W) Set the sidetone volume (0 - sidetone_max)
+
+What:		/sys/bus/hid/drivers/hid-corsair-void/<dev>/sidetone_max
+Date:		July 2024
+KernelVersion:	6.13
+Contact:	Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
+Description:	(R) Report the maximum sidetone volume
--- a/Documentation/ABI/testing/sysfs-kernel-livepatch
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@ -55,6 +55,15 @@ Description:
 		An attribute which indicates whether the patch supports
 		atomic-replace.

+What:		/sys/kernel/livepatch/<patch>/stack_order
+Date:		Jan 2025
+KernelVersion:	6.14.0
+Description:
+		This attribute specifies the sequence in which live patch modules
+		are applied to the system. If multiple live patches modify the same
+		function, the implementation with the biggest 'stack_order' number
+		is used, unless a transition is currently in progress.
+
 What:		/sys/kernel/livepatch/<patch>/<object>
 Date:		Nov 2014
 KernelVersion:	3.19.0
--- a/Documentation/PCI/endpoint/pci-endpoint.rst
+++ b/Documentation/PCI/endpoint/pci-endpoint.rst
@ -117,6 +117,35 @@ by the PCI endpoint function driver.
   The PCI endpoint function driver should use pci_epc_mem_free_addr() to
   free the memory space allocated using pci_epc_mem_alloc_addr().

+* pci_epc_map_addr()
+
+  A PCI endpoint function driver should use pci_epc_map_addr() to map to a RC
+  PCI address the CPU address of local memory obtained with
+  pci_epc_mem_alloc_addr().
+
+* pci_epc_unmap_addr()
+
+  A PCI endpoint function driver should use pci_epc_unmap_addr() to unmap the
+  CPU address of local memory mapped to a RC address with pci_epc_map_addr().
+
+* pci_epc_mem_map()
+
+  A PCI endpoint controller may impose constraints on the RC PCI addresses that
+  can be mapped. The function pci_epc_mem_map() allows endpoint function
+  drivers to allocate and map controller memory while handling such
+  constraints. This function will determine the size of the memory that must be
+  allocated with pci_epc_mem_alloc_addr() for successfully mapping a RC PCI
+  address range. This function will also indicate the size of the PCI address
+  range that was actually mapped, which can be less than the requested size, as
+  well as the offset into the allocated memory to use for accessing the mapped
+  RC PCI address range.
+
+* pci_epc_mem_unmap()
+
+  A PCI endpoint function driver can use pci_epc_mem_unmap() to unmap and free
+  controller memory that was allocated and mapped using pci_epc_mem_map().
+
+
 Other EPC APIs
 ~~~~~~~~~~~~~~

--- a/Documentation/PCI/endpoint/pci-test-howto.rst
+++ b/Documentation/PCI/endpoint/pci-test-howto.rst
@ -81,8 +81,8 @@ device, the following commands can be used::

 	# echo 0x104c > functions/pci_epf_test/func1/vendorid
 	# echo 0xb500 > functions/pci_epf_test/func1/deviceid
-	# echo 16 > functions/pci_epf_test/func1/msi_interrupts
-	# echo 8 > functions/pci_epf_test/func1/msix_interrupts
+	# echo 32 > functions/pci_epf_test/func1/msi_interrupts
+	# echo 2048 > functions/pci_epf_test/func1/msix_interrupts


 Binding pci-epf-test Device to EP Controller
@ -123,113 +123,83 @@ above::
 Using Endpoint Test function Device
 -----------------------------------

-pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
-tests. To compile this tool the following commands should be used::
+Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
+the default PCI endpoint tests. To build the Kselftest for PCI endpoint
+subsystem, the following commands should be used::

 	# cd <kernel-dir>
-	# make -C tools/pci
+	# make -C tools/testing/selftests/pci_endpoint

 or if you desire to compile and install in your system::

 	# cd <kernel-dir>
-	# make -C tools/pci install
+	# make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install

-The tool and script will be located in <rootfs>/usr/bin/
+The test will be located in <rootfs>/usr/bin/

-
-pcitest.sh Output
-~~~~~~~~~~~~~~~~~
+Kselftest Output
+~~~~~~~~~~~~~~~~
 ::

-	# pcitest.sh
-	BAR tests
+	# pci_endpoint_test
+	TAP version 13
+	1..16
+	# Starting 16 tests from 9 test cases.
+	#  RUN           pci_ep_bar.BAR0.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR0.BAR_TEST
+	ok 1 pci_ep_bar.BAR0.BAR_TEST
+	#  RUN           pci_ep_bar.BAR1.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR1.BAR_TEST
+	ok 2 pci_ep_bar.BAR1.BAR_TEST
+	#  RUN           pci_ep_bar.BAR2.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR2.BAR_TEST
+	ok 3 pci_ep_bar.BAR2.BAR_TEST
+	#  RUN           pci_ep_bar.BAR3.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR3.BAR_TEST
+	ok 4 pci_ep_bar.BAR3.BAR_TEST
+	#  RUN           pci_ep_bar.BAR4.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR4.BAR_TEST
+	ok 5 pci_ep_bar.BAR4.BAR_TEST
+	#  RUN           pci_ep_bar.BAR5.BAR_TEST ...
+	#            OK  pci_ep_bar.BAR5.BAR_TEST
+	ok 6 pci_ep_bar.BAR5.BAR_TEST
+	#  RUN           pci_ep_basic.CONSECUTIVE_BAR_TEST ...
+	#            OK  pci_ep_basic.CONSECUTIVE_BAR_TEST
+	ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
+	#  RUN           pci_ep_basic.LEGACY_IRQ_TEST ...
+	#            OK  pci_ep_basic.LEGACY_IRQ_TEST
+	ok 8 pci_ep_basic.LEGACY_IRQ_TEST
+	#  RUN           pci_ep_basic.MSI_TEST ...
+	#            OK  pci_ep_basic.MSI_TEST
+	ok 9 pci_ep_basic.MSI_TEST
+	#  RUN           pci_ep_basic.MSIX_TEST ...
+	#            OK  pci_ep_basic.MSIX_TEST
+	ok 10 pci_ep_basic.MSIX_TEST
+	#  RUN           pci_ep_data_transfer.memcpy.READ_TEST ...
+	#            OK  pci_ep_data_transfer.memcpy.READ_TEST
+	ok 11 pci_ep_data_transfer.memcpy.READ_TEST
+	#  RUN           pci_ep_data_transfer.memcpy.WRITE_TEST ...
+	#            OK  pci_ep_data_transfer.memcpy.WRITE_TEST
+	ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
+	#  RUN           pci_ep_data_transfer.memcpy.COPY_TEST ...
+	#            OK  pci_ep_data_transfer.memcpy.COPY_TEST
+	ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
+	#  RUN           pci_ep_data_transfer.dma.READ_TEST ...
+	#            OK  pci_ep_data_transfer.dma.READ_TEST
+	ok 14 pci_ep_data_transfer.dma.READ_TEST
+	#  RUN           pci_ep_data_transfer.dma.WRITE_TEST ...
+	#            OK  pci_ep_data_transfer.dma.WRITE_TEST
+	ok 15 pci_ep_data_transfer.dma.WRITE_TEST
+	#  RUN           pci_ep_data_transfer.dma.COPY_TEST ...
+	#            OK  pci_ep_data_transfer.dma.COPY_TEST
+	ok 16 pci_ep_data_transfer.dma.COPY_TEST
+	# PASSED: 16 / 16 tests passed.
+	# Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0

-	BAR0:           OKAY
-	BAR1:           OKAY
-	BAR2:           OKAY
-	BAR3:           OKAY
-	BAR4:           NOT OKAY
-	BAR5:           NOT OKAY

-	Interrupt tests
+Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
+capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
+controllers, it is advisable to skip this testcase using this
+command::

-	SET IRQ TYPE TO LEGACY:         OKAY
-	LEGACY IRQ:     NOT OKAY
-	SET IRQ TYPE TO MSI:            OKAY
-	MSI1:           OKAY
-	MSI2:           OKAY
-	MSI3:           OKAY
-	MSI4:           OKAY
-	MSI5:           OKAY
-	MSI6:           OKAY
-	MSI7:           OKAY
-	MSI8:           OKAY
-	MSI9:           OKAY
-	MSI10:          OKAY
-	MSI11:          OKAY
-	MSI12:          OKAY
-	MSI13:          OKAY
-	MSI14:          OKAY
-	MSI15:          OKAY
-	MSI16:          OKAY
-	MSI17:          NOT OKAY
-	MSI18:          NOT OKAY
-	MSI19:          NOT OKAY
-	MSI20:          NOT OKAY
-	MSI21:          NOT OKAY
-	MSI22:          NOT OKAY
-	MSI23:          NOT OKAY
-	MSI24:          NOT OKAY
-	MSI25:          NOT OKAY
-	MSI26:          NOT OKAY
-	MSI27:          NOT OKAY
-	MSI28:          NOT OKAY
-	MSI29:          NOT OKAY
-	MSI30:          NOT OKAY
-	MSI31:          NOT OKAY
-	MSI32:          NOT OKAY
-	SET IRQ TYPE TO MSI-X:          OKAY
-	MSI-X1:         OKAY
-	MSI-X2:         OKAY
-	MSI-X3:         OKAY
-	MSI-X4:         OKAY
-	MSI-X5:         OKAY
-	MSI-X6:         OKAY
-	MSI-X7:         OKAY
-	MSI-X8:         OKAY
-	MSI-X9:         NOT OKAY
-	MSI-X10:        NOT OKAY
-	MSI-X11:        NOT OKAY
-	MSI-X12:        NOT OKAY
-	MSI-X13:        NOT OKAY
-	MSI-X14:        NOT OKAY
-	MSI-X15:        NOT OKAY
-	MSI-X16:        NOT OKAY
-	[...]
-	MSI-X2047:      NOT OKAY
-	MSI-X2048:      NOT OKAY
-
-	Read Tests
-
-	SET IRQ TYPE TO MSI:            OKAY
-	READ (      1 bytes):           OKAY
-	READ (   1024 bytes):           OKAY
-	READ (   1025 bytes):           OKAY
-	READ (1024000 bytes):           OKAY
-	READ (1024001 bytes):           OKAY
-
-	Write Tests
-
-	WRITE (      1 bytes):          OKAY
-	WRITE (   1024 bytes):          OKAY
-	WRITE (   1025 bytes):          OKAY
-	WRITE (1024000 bytes):          OKAY
-	WRITE (1024001 bytes):          OKAY
-
-	Copy Tests
-
-	COPY (      1 bytes):           OKAY
-	COPY (   1024 bytes):           OKAY
-	COPY (   1025 bytes):           OKAY
-	COPY (1024000 bytes):           OKAY
-	COPY (1024001 bytes):           OKAY
+	# pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
--- a/Documentation/PCI/index.rst
+++ b/Documentation/PCI/index.rst
@ -18,3 +18,4 @@ PCI Bus Subsystem
   pcieaer-howto
   endpoint/index
   boot-interrupts
+   tph
--- a/Documentation/PCI/pciebus-howto.rst
+++ b/Documentation/PCI/pciebus-howto.rst
@ -217,8 +217,12 @@ capability structure except the PCI Express capability structure,
 that is shared between many drivers including the service drivers.
 RMW Capability accessors (pcie_capability_clear_and_set_word(),
 pcie_capability_set_word(), and pcie_capability_clear_word()) protect
-a selected set of PCI Express Capability Registers (Link Control
-Register and Root Control Register). Any change to those registers
-should be performed using RMW accessors to avoid problems due to
-concurrent updates. For the up-to-date list of protected registers,
-see pcie_capability_clear_and_set_word().
+a selected set of PCI Express Capability Registers:
+
+* Link Control Register
+* Root Control Register
+* Link Control 2 Register
+
+Any change to those registers should be performed using RMW accessors to
+avoid problems due to concurrent updates. For the up-to-date list of
+protected registers, see pcie_capability_clear_and_set_word().
--- a/Documentation/PCI/tph.rst
+++ b/Documentation/PCI/tph.rst
@ -0,0 +1,132 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+===========
+TPH Support
+===========
+
+:Copyright: 2024 Advanced Micro Devices, Inc.
+:Authors: - Eric van Tassell <eric.vantassell@amd.com>
+          - Wei Huang <wei.huang2@amd.com>
+
+
+Overview
+========
+
+TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices
+to provide optimization hints for requests that target memory space.
+These hints, in a format called Steering Tags (STs), are embedded in the
+requester's TLP headers, enabling the system hardware, such as the Root
+Complex, to better manage platform resources for these requests.
+
+For example, on platforms with TPH-based direct data cache injection
+support, an endpoint device can include appropriate STs in its DMA
+traffic to specify which cache the data should be written to. This allows
+the CPU core to have a higher probability of getting data from cache,
+potentially improving performance and reducing latency in data
+processing.
+
+
+How to Use TPH
+==============
+
+TPH is presented as an optional extended capability in PCIe. The Linux
+kernel handles TPH discovery during boot, but it is up to the device
+driver to request TPH enablement if it is to be utilized. Once enabled,
+the driver uses the provided API to obtain the Steering Tag for the
+target memory and to program the ST into the device's ST table.
+
+Enable TPH support in Linux
+---------------------------
+
+To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option
+enabled.
+
+Manage TPH
+----------
+
+To enable TPH for a device, use the following function::
+
+  int pcie_enable_tph(struct pci_dev *pdev, int mode);
+
+This function enables TPH support for device with a specific ST mode.
+Current supported modes include:
+
+  * PCI_TPH_ST_NS_MODE - NO ST Mode
+  * PCI_TPH_ST_IV_MODE - Interrupt Vector Mode
+  * PCI_TPH_ST_DS_MODE - Device Specific Mode
+
+`pcie_enable_tph()` checks whether the requested mode is actually
+supported by the device before enabling. The device driver can figure out
+which TPH mode is supported and can be properly enabled based on the
+return value of `pcie_enable_tph()`.
+
+To disable TPH, use the following function::
+
+  void pcie_disable_tph(struct pci_dev *pdev);
+
+Manage ST
+---------
+
+Steering Tags are platform specific. PCIe spec does not specify where STs
+are from. Instead PCI Firmware Specification defines an ACPI _DSM method
+(see the `Revised _DSM for Cache Locality TPH Features ECN
+<https://members.pcisig.com/wg/PCI-SIG/document/15470>`_) for retrieving
+STs for a target memory of various properties. This method is what is
+supported in this implementation.
+
+To retrieve a Steering Tag for a target memory associated with a specific
+CPU, use the following function::
+
+  int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
+                          unsigned int cpu_uid, u16 *tag);
+
+The `type` argument is used to specify the memory type, either volatile
+or persistent, of the target memory. The `cpu_uid` argument specifies the
+CPU where the memory is associated to.
+
+After the ST value is retrieved, the device driver can use the following
+function to write the ST into the device::
+
+  int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index,
+                            u16 tag);
+
+The `index` argument is the ST table entry index the ST tag will be
+written into. `pcie_tph_set_st_entry()` will figure out the proper
+location of ST table, either in the MSI-X table or in the TPH Extended
+Capability space, and write the Steering Tag into the ST entry pointed by
+the `index` argument.
+
+It is completely up to the driver to decide how to use these TPH
+functions. For example a network device driver can use the TPH APIs above
+to update the Steering Tag when interrupt affinity of a RX/TX queue has
+been changed. Here is a sample code for IRQ affinity notifier:
+
+.. code-block:: c
+
+    static void irq_affinity_notified(struct irq_affinity_notify *notify,
+                                      const cpumask_t *mask)
+    {
+         struct drv_irq *irq;
+         unsigned int cpu_id;
+         u16 tag;
+
+         irq = container_of(notify, struct drv_irq, affinity_notify);
+         cpumask_copy(irq->cpu_mask, mask);
+
+         /* Pick a right CPU as the target - here is just an example */
+         cpu_id = cpumask_first(irq->cpu_mask);
+
+         if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id,
+                                 &tag))
+             return;
+
+         if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag))
+             return;
+    }
+
+Disable TPH system-wide
+-----------------------
+
+There is a kernel command line option available to control TPH feature:
+    * "notph": TPH will be disabled for all endpoint devices.
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
 	# ./getdelays -d -p 10
 	(output similar to next case)

-Get sum of delays, since system boot, for all pids with tgid 5::
+Get sum and peak of delays, since system boot, for all pids with tgid 242::

-	# ./getdelays -d -t 5
+	bash-4.4# ./getdelays -d -t 242
 	print delayacct stats ON
-	TGID	5
+	TGID    242


-	CPU             count     real total  virtual total    delay total  delay average
-	                    8        7000000        6872122        3382277          0.423ms
-	IO              count    delay total  delay average
-                   0              0          0.000ms
-	SWAP            count    delay total  delay average
-                       0              0          0.000ms
-	RECLAIM         count    delay total  delay average
-                   0              0          0.000ms
-	THRASHING       count    delay total  delay average
-                       0              0          0.000ms
-	COMPACT         count    delay total  delay average
-                       0              0          0.000ms
-	WPCOPY          count    delay total  delay average
-                       0              0          0.000ms
-	IRQ             count    delay total  delay average
-                       0              0          0.000ms
+	CPU         count     real total  virtual total    delay total  delay average      delay max      delay min
+	               39      156000000      156576579        2111069          0.054ms     0.212296ms     0.031307ms
+	IO          count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	SWAP        count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	RECLAIM     count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	THRASHING   count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	COMPACT     count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms
+	WPCOPY      count    delay total  delay average      delay max      delay min
+	              156       11215873          0.072ms     0.207403ms     0.033913ms
+	IRQ         count    delay total  delay average      delay max      delay min
+	                0              0          0.000ms     0.000000ms     0.000000ms

 Get IO accounting for pid 1, it works only with -p::

--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@ -90,9 +90,7 @@ Brief summary of control files.
                                     used.
 memory.swappiness		     set/show swappiness parameter of vmscan
 				     (See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate     set/show controls of moving charges
-                                     This knob is deprecated and shouldn't be
-                                     used.
+ memory.move_charge_at_immigrate     This knob is deprecated.
 memory.oom_control		     set/show oom controls.
                                     This knob is deprecated and shouldn't be
                                     used.
@ -243,10 +241,6 @@ behind this approach is that a cgroup that aggressively uses a shared
 page will eventually get charged for it (once it is uncharged from
 the cgroup that brought it in -- this will happen on memory pressure).

-But see :ref:`section 8.2 <cgroup-v1-memory-movable-charges>` when moving a
-task to another cgroup, its pages may be recharged to the new cgroup, if
-move_charge_at_immigrate has been chosen.
-
 2.4 Swap Extension
 --------------------------------------

@ -756,78 +750,8 @@ If we want to change this to 1G, we can at any time use::

 THIS IS DEPRECATED!

-It's expensive and unreliable! It's better practice to launch workload
-tasks directly from inside their target cgroup. Use dedicated workload
-cgroups to allow fine-grained policy adjustments without having to
-move physical pages between control domains.
-
-Users can move charges associated with a task along with task migration, that
-is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
-This feature is not supported in !CONFIG_MMU environments because of lack of
-page tables.
-
-8.1 Interface
-------------
-
-This feature is disabled by default. It can be enabled (and disabled again) by
-writing to memory.move_charge_at_immigrate of the destination cgroup.
-
-If you want to enable it::
-
-	# echo (some positive value) > memory.move_charge_at_immigrate
-
-.. note::
-      Each bits of move_charge_at_immigrate has its own meaning about what type
-      of charges should be moved. See :ref:`section 8.2
-      <cgroup-v1-memory-movable-charges>` for details.
-
-.. note::
-      Charges are moved only when you move mm->owner, in other words,
-      a leader of a thread group.
-
-.. note::
-      If we cannot find enough space for the task in the destination cgroup, we
-      try to make space by reclaiming memory. Task migration may fail if we
-      cannot make enough space.
-
-.. note::
-      It can take several seconds if you move charges much.
-
-And if you want disable it again::
-
-	# echo 0 > memory.move_charge_at_immigrate
-
-.. _cgroup-v1-memory-movable-charges:
-
-8.2 Type of charges which can be moved
--------------------------------------
-
-Each bit in move_charge_at_immigrate has its own meaning about what type of
-charges should be moved. But in any case, it must be noted that an account of
-a page or a swap can be moved only when it is charged to the task's current
-(old) memory cgroup.
-
-+---+--------------------------------------------------------------------------+
-|bit| what type of charges would be moved ?                                    |
-+===+==========================================================================+
-| 0 | A charge of an anonymous page (or swap of it) used by the target task.   |
-|   | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
-+---+--------------------------------------------------------------------------+
-| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
-|   | and swaps of tmpfs file) mmapped by the target task. Unlike the case of  |
-|   | anonymous pages, file pages (and swaps) in the range mmapped by the task |
-|   | will be moved even if the task hasn't done page fault, i.e. they might   |
-|   | not be the task's "RSS", but other task's "RSS" that maps the same file. |
-|   | The mapcount of the page is ignored (the page can be moved independent   |
-|   | of the mapcount). You must enable Swap Extension (see 2.4) to            |
-|   | enable move of swap charges.                                             |
-+---+--------------------------------------------------------------------------+
-
-8.3 TODO
--------
-
- All of moving charge operations are done under cgroup_mutex. It's not good
-  behavior to hold the mutex too long, so we may need some trick.
+Reading memory.move_charge_at_immigrate will always return 0 and writing
+to it will always return -EINVAL.

 9. Memory thresholds
 ====================
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
     5-6. Device
     5-7. RDMA
       5-7-1. RDMA Interface Files
-     5-8. HugeTLB
-       5.8-1. HugeTLB Interface Files
-     5-9. Misc
-       5.9-1 Miscellaneous cgroup Interface Files
-       5.9-2 Migration and Ownership
-     5-10. Others
-       5-10-1. perf_event
+     5-8. DMEM
+     5-9. HugeTLB
+       5.9-1. HugeTLB Interface Files
+     5-10. Misc
+       5.10-1 Miscellaneous cgroup Interface Files
+       5.10-2 Migration and Ownership
+     5-11. Others
+       5-11-1. perf_event
     5-N. Non-normative information
       5-N-1. CPU controller root cgroup process behaviour
       5-N-2. IO controller root cgroup process behaviour
@ -1655,6 +1656,11 @@ The following nested keys are defined.
 	  pgdemote_khugepaged
 		Number of pages demoted by khugepaged.

+	  hugetlb
+		Amount of memory used by hugetlb pages. This metric only shows
+		up if hugetlb usage is accounted for in memory.current (i.e.
+		cgroup is mounted with the memory_hugetlb_accounting option).
+
  memory.numa_stat
 	A read-only nested-keyed file which exists on non-root cgroups.

@ -2621,6 +2627,49 @@ RDMA Interface Files
 	  mlx4_0 hca_handle=1 hca_object=20
 	  ocrdma1 hca_handle=1 hca_object=23

+DMEM
+----
+
+The "dmem" controller regulates the distribution and accounting of
+device memory regions. Because each memory region may have its own page size,
+which does not have to be equal to the system page size, the units are always bytes.
+
+DMEM Interface Files
+~~~~~~~~~~~~~~~~~~~~
+
+  dmem.max, dmem.min, dmem.low
+	A readwrite nested-keyed file that exists for all the cgroups
+	except root that describes current configured resource limit
+	for a region.
+
+	An example for xe follows::
+
+	  drm/0000:03:00.0/vram0 1073741824
+	  drm/0000:03:00.0/stolen max
+
+	The semantics are the same as for the memory cgroup controller, and are
+	calculated in the same way.
+
+  dmem.capacity
+	A read-only file that describes maximum region capacity.
+	It only exists on the root cgroup. Not all memory can be
+	allocated by cgroups, as the kernel reserves some for
+	internal use.
+
+	An example for xe follows::
+
+	  drm/0000:03:00.0/vram0 8514437120
+	  drm/0000:03:00.0/stolen 67108864
+
+  dmem.current
+	A read-only file that describes current resource usage.
+	It exists for all the cgroup except root.
+
+	An example for xe follows::
+
+	  drm/0000:03:00.0/vram0 12550144
+	  drm/0000:03:00.0/stolen 8650752
+
 HugeTLB
 -------

--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled
 illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
 is the default for SMB3). This remap (``mapposix``) range is also
 compatible with Mac (and "Services for Mac" on some older Windows).
+When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
+disabled.

 CIFS VFS Mount Options
 ======================
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
 combination with a microcode update. The microcode clears the affected CPU
 buffers when the VERW instruction is executed.

-Kernel reuses the MDS function to invoke the buffer clearing:
-
-	mds_clear_cpu_buffers()
+Kernel does the buffer clearing with x86_clear_cpu_buffers().

 On MDS affected CPUs, the kernel already invokes CPU buffer clear on
 kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
--- a/Documentation/admin-guide/kernel-parameters.rst
+++ b/Documentation/admin-guide/kernel-parameters.rst
@ -159,6 +159,7 @@ is applicable::
 	SCSI	Appropriate SCSI support is enabled.
 			A lot of drivers have their options described inside
 			the Documentation/scsi/ sub-directory.
+        SDW     SoundWire support is enabled.
 	SECURITY Different security models are enabled.
 	SELINUX SELinux support is enabled.
 	SERIAL	Serial support is enabled.
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -446,9 +446,15 @@
 	arm64.nobti	[ARM64] Unconditionally disable Branch Target
 			Identification support

+	arm64.nogcs	[ARM64] Unconditionally disable Guarded Control Stack
+			support
+
 	arm64.nomops	[ARM64] Unconditionally disable Memory Copy and Memory
 			Set instructions support

+	arm64.nompam	[ARM64] Unconditionally disable Memory Partitioning And
+			Monitoring support
+
 	arm64.nomte	[ARM64] Unconditionally disable Memory Tagging Extension
 			support

@ -2304,6 +2310,9 @@
 			per_cpu_perf_limits
 			  Allow per-logical-CPU P-State performance control limits using
 			  cpufreq sysfs interface
+			no_cas
+			  Do not enable capacity-aware scheduling (CAS) on
+			  hybrid systems

 	intremap=	[X86-64,Intel-IOMMU,EARLY]
 			on	enable Interrupt Remapping (default)
@ -2441,7 +2450,9 @@
 			specified in the flag list (default: domain):

 			nohz
-			  Disable the tick when a single task runs.
+			  Disable the tick when a single task runs as well as
+			  disabling other kernel noises like having RCU callbacks
+			  offloaded. This is equivalent to the nohz_full parameter.

 			  A residual 1Hz tick is offloaded to workqueues, which you
 			  need to affine to housekeeping through the global
@ -2757,17 +2768,21 @@
 			nvhe: Standard nVHE-based mode, without support for
 			      protected guests.

-			protected: nVHE-based mode with support for guests whose
-				   state is kept private from the host.
+			protected: Mode with support for guests whose state is
+				   kept private from the host, using VHE or
+				   nVHE depending on HW support.

 			nested: VHE-based mode with support for nested
-				virtualization. Requires at least ARMv8.3
-				hardware.
+				virtualization. Requires at least ARMv8.4
+				hardware (with FEAT_NV2).

 			Defaults to VHE/nVHE based on hardware support. Setting
 			mode to "protected" will disable kexec and hibernation
-			for the host. "nested" is experimental and should be
-			used with extreme caution.
+			for the host. To force nVHE on VHE hardware, add
+			"arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
+			command-line.
+			"nested" is experimental and should be used with
+			extreme caution.

 	kvm-arm.vgic_v3_group0_trap=
 			[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
@ -3045,6 +3060,8 @@
 			* max_sec_lba48: Set or clear transfer size limit to
 			  65535 sectors.

+			* external: Mark port as external (hotplug-capable).
+
 			* [no]lpm: Enable or disable link power management.

 			* [no]setxfer: Indicate if transfer speed mode setting
@ -4696,6 +4713,10 @@
 		nomio		[S390] Do not use MIO instructions.
 		norid		[S390] ignore the RID field and force use of
 				one PCI domain per PCI function
+		notph		[PCIE] If the PCIE_TPH kernel config parameter
+				is enabled, this kernel boot option can be used
+				to disable PCIe TLP Processing Hints support
+				system-wide.

 	pcie_aspm=	[PCIE] Forcibly enable or ignore PCIe Active State Power
 			Management.
@ -4828,6 +4849,11 @@
 			       can be preempted anytime.  Tasks will also yield
 			       contended spinlocks (if the critical section isn't
 			       explicitly preempt disabled beyond the lock itself).
+			lazy - Scheduler controlled. Similar to full but instead
+			       of preempting the task immediately, the task gets
+			       one HZ tick time to yield itself before the
+			       preemption will be forced. One preemption is when the
+			       task returns to user space.

 	print-fatal-signals=
 			[KNL] debug: print fatal signals
@ -6089,6 +6115,10 @@
 			non-zero "wait" parameter.  See weight_single
 			and weight_many.

+	sdw_mclk_divider=[SDW]
+			Specify the MCLK divider for Intel SoundWire buses in
+			case the BIOS does not provide the clock rate properly.
+
 	skew_tick=	[KNL,EARLY] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.
@ -6176,6 +6206,16 @@
 			For more information see Documentation/mm/slub.rst.
 			(slub_nomerge legacy name also accepted for now)

+	slab_strict_numa	[MM]
+			Support memory policies on a per object level
+			in the slab allocator. The default is for memory
+			policies to be applied at the folio level when
+			a new folio is needed or a partial folio is
+			retrieved from the lists. Increases overhead
+			in the slab fastpaths but gains more accurate
+			NUMA kernel object placement which helps with slow
+			interconnects in NUMA systems.
+
 	slram=		[HW,MTD]

 	smart2=		[HW]
@ -6731,6 +6771,16 @@
 			Force threading of all interrupt handlers except those
 			marked explicitly IRQF_NO_THREAD.

+	thp_shmem=	[KNL]
+			Format: <size>[KMG],<size>[KMG]:<policy>;<size>[KMG]-<size>[KMG]:<policy>
+			Control the default policy of each hugepage size for the
+			internal shmem mount. <policy> is one of policies available
+			for the shmem mount ("always", "inherit", "never", "within_size",
+			and "advise").
+			It can be used multiple times for multiple shmem THP sizes.
+			See Documentation/admin-guide/mm/transhuge.rst for more
+			details.
+
 	topology=	[S390,EARLY]
 			Format: {off | on}
 			Specify if the kernel should make use of the cpu
@ -6966,6 +7016,13 @@
 			See Documentation/admin-guide/mm/transhuge.rst
 			for more details.

+	transparent_hugepage_shmem= [KNL]
+			Format: [always|within_size|advise|never|deny|force]
+			Can be used to control the hugepage allocation policy for
+			the internal shmem mount.
+			See Documentation/admin-guide/mm/transhuge.rst
+			for more details.
+
 	trusted.source=	[KEYS]
 			Format: <string>
 			This parameter identifies the trust source as a backend
@ -7002,6 +7059,19 @@
 			having this key zero'ed is acceptable. E.g. in testing
 			scenarios.

+	tsa=		[X86] Control mitigation for Transient Scheduler
+			Attacks on AMD CPUs. Search the following in your
+			favourite search engine for more details:
+
+			"Technical guidance for mitigating transient scheduler
+			attacks".
+
+			off		- disable the mitigation
+			on		- enable the mitigation (default)
+			user		- mitigate only user/kernel transitions
+			vm		- mitigate only guest/host transitions
+
+
 	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
 			[x86] reliable: mark tsc clocksource as reliable, this
--- a/Documentation/admin-guide/media/building.rst
+++ b/Documentation/admin-guide/media/building.rst
@ -15,7 +15,7 @@ Please notice, however, that, if:

 you should use the main media development tree ``master`` branch:

-    https://git.linuxtv.org/media_tree.git/
+    https://git.linuxtv.org/media.git/

 In this case, you may find some useful information at the
 `LinuxTv wiki pages <https://linuxtv.org/wiki>`_:
--- a/Documentation/admin-guide/media/saa7134.rst
+++ b/Documentation/admin-guide/media/saa7134.rst
@ -67,7 +67,7 @@ Changes / Fixes
 Please mail to linux-media AT vger.kernel.org unified diffs against
 the linux media git tree:

-    https://git.linuxtv.org/media_tree.git/
+    https://git.linuxtv.org/media.git/

 This is done by committing a patch at a clone of the git tree and
 submitting the patch using ``git send-email``. Don't forget to
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@ -326,6 +326,29 @@ PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
 is not defined within a valid ``thp_anon``, its policy will default to
 ``never``.

+Similarly to ``transparent_hugepage``, you can control the hugepage
+allocation policy for the internal shmem mount by using the kernel parameter
+``transparent_hugepage_shmem=<policy>``, where ``<policy>`` is one of the
+seven valid policies for shmem (``always``, ``within_size``, ``advise``,
+``never``, ``deny``, and ``force``).
+
+In the same manner as ``thp_anon`` controls each supported anonymous THP
+size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
+has the same format as ``thp_anon``, but also supports the policy
+``within_size``.
+
+``thp_shmem=`` may be specified multiple times to configure all THP sizes
+as required. If ``thp_shmem=`` is specified at least once, any shmem THP
+sizes not explicitly configured on the command line are implicitly set to
+``never``.
+
+``transparent_hugepage_shmem`` setting only affects the global toggle. If
+``thp_shmem`` is not specified, PMD_ORDER hugepage will default to
+``inherit``. However, if a valid ``thp_shmem`` setting is provided by the
+user, the PMD_ORDER hugepage policy will be overridden. If the policy for
+PMD_ORDER is not defined within a valid ``thp_shmem``, its policy will
+default to ``never``.
+
 Hugepages in tmpfs/shmem
 ========================

@ -530,10 +553,18 @@ anon_fault_fallback_charge
 	instead falls back to using huge pages with lower orders or
 	small pages even though the allocation was successful.

-swpout
-	is incremented every time a huge page is swapped out in one
+zswpout
+	is incremented every time a huge page is swapped out to zswap in one
 	piece without splitting.

+swpin
+	is incremented every time a huge page is swapped in from a non-zswap
+	swap device in one piece.
+
+swpout
+	is incremented every time a huge page is swapped out to a non-zswap
+	swap device in one piece without splitting.
+
 swpout_fallback
 	is incremented if a huge page has to be split before swapout.
 	Usually because failed to allocate some continuous swap space
--- a/Documentation/admin-guide/perf/nvidia-pmu.rst
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@ -34,7 +34,7 @@ strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
 traffic coverage.

 The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.

 Example usage:

@ -66,7 +66,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
 the PMU traffic coverage.

 The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.

 Example usage:

@ -86,6 +86,22 @@ Example usage:

   perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/

+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
+
 NVLink-C2C1 PMU
 -------------------

@ -96,7 +112,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
 the PMU traffic coverage.

 The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.

 Example usage:

@ -116,6 +132,22 @@ Example usage:

   perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/

+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
+
 CNVLink PMU
 ---------------

@ -125,13 +157,14 @@ to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
 for more info about the PMU traffic coverage.

 The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.

 Each SoC socket can be connected to one or more sockets via CNVLink. The user can
 use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
 Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
-socket 1 to 3.
-/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
+socket 1 to 3. The PMU will monitor all remote sockets by default if not
+specified.
+/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
 shows the valid bits that can be set in the "rem_socket" parameter.

 The PMU can not distinguish the remote traffic initiator, therefore it does not
@ -165,12 +198,13 @@ local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section
 for more info about the PMU traffic coverage.

 The events and configuration options of this PMU device are described in sysfs,
-see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
+see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.

 Each SoC socket can support multiple root ports. The user can use
 "root_port" bitmap parameter to select the port(s) to monitor, i.e.
-"root_port=0xF" corresponds to root port 0 to 3.
-/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
+"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
+ports by default if not specified.
+/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
 shows the valid bits that can be set in the "root_port" parameter.

 Example usage:
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
 In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
 table, so we need to expose it to sysfs. If boost is not active, but
 still supported, this maximum frequency will be larger than the one in
-``cpuinfo``. On systems that support preferred core, the driver will have
-different values for some cores than others and this will reflect the values
-advertised by the platform at bootup.
+``cpuinfo``.
 This attribute is read-only.

 ``amd_pstate_lowest_nonlinear_freq``
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@ -248,6 +248,20 @@ are the following:
 	If that frequency cannot be determined, this attribute should not
 	be present.

+``cpuinfo_avg_freq``
+        An average frequency (in KHz) of all CPUs belonging to a given policy,
+        derived from a hardware provided feedback and reported on a time frame
+        spanning at most few milliseconds.
+
+        This is expected to be based on the frequency the hardware actually runs
+        at and, as such, might require specialised hardware support (such as AMU
+        extension on ARM). If one cannot be determined, this attribute should
+        not be present.
+
+        Note, that failed attempt to retrieve current frequency for a given
+        CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that
+        remains idle (raised on ARM).
+
 ``cpuinfo_max_freq``
 	Maximum possible operating frequency the CPUs belonging to this policy
 	can run at (in kHz).
@ -293,7 +307,8 @@ are the following:
 	Some architectures (e.g. ``x86``) may attempt to provide information
 	more precisely reflecting the current CPU frequency through this
 	attribute, but that still may not be the exact current CPU frequency as
-	seen by the hardware at the moment.
+	seen by the hardware at the moment. This behavior though, is only
+	available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option.

 ``scaling_driver``
 	The scaling driver currently in use.
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@ -269,61 +269,56 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
 the CPU will ask the processor hardware to enter), it attempts to predict the
 idle duration and uses the predicted value for idle state selection.

-It first obtains the time until the closest timer event with the assumption
-that the scheduler tick will be stopped.  That time, referred to as the *sleep
-length* in what follows, is the upper bound on the time before the next CPU
-wakeup.  It is used to determine the sleep length range, which in turn is needed
-to get the sleep length correction factor.
+It first uses a simple pattern recognition algorithm to obtain a preliminary
+idle duration prediction.  Namely, it saves the last 8 observed idle duration
+values and, when predicting the idle duration next time, it computes the average
+and variance of them.  If the variance is small (smaller than 400 square
+milliseconds) or it is small relative to the average (the average is greater
+that 6 times the standard deviation), the average is regarded as the "typical
+interval" value.  Otherwise, either the longest or the shortest (depending on
+which one is farther from the average) of the saved observed idle duration
+values is discarded and the computation is repeated for the remaining ones.

-The ``menu`` governor maintains two arrays of sleep length correction factors.
-One of them is used when tasks previously running on the given CPU are waiting
-for some I/O operations to complete and the other one is used when that is not
-the case.  Each array contains several correction factor values that correspond
-to different sleep length ranges organized so that each range represented in the
-array is approximately 10 times wider than the previous one.
+Again, if the variance of them is small (in the above sense), the average is
+taken as the "typical interval" value and so on, until either the "typical
+interval" is determined or too many data points are disregarded.  In the latter
+case, if the size of the set of data points still under consideration is
+sufficiently large, the next idle duration is not likely to be above the largest
+idle duration value still in that set, so that value is taken as the predicted
+next idle duration.  Finally, if the set of data points still under
+consideration is too small, no prediction is made.
+
+If the preliminary prediction of the next idle duration computed this way is
+long enough, the governor obtains the time until the closest timer event with
+the assumption that the scheduler tick will be stopped.  That time, referred to
+as the *sleep length* in what follows, is the upper bound on the time before the
+next CPU wakeup.  It is used to determine the sleep length range, which in turn
+is needed to get the sleep length correction factor.
+
+The ``menu`` governor maintains an array containing several correction factor
+values that correspond to different sleep length ranges organized so that each
+range represented in the array is approximately 10 times wider than the previous
+one.

 The correction factor for the given sleep length range (determined before
 selecting the idle state for the CPU) is updated after the CPU has been woken
 up and the closer the sleep length is to the observed idle duration, the closer
 to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
 The sleep length is multiplied by the correction factor for the range that it
-falls into to obtain the first approximation of the predicted idle duration.
+falls into to obtain an approximation of the predicted idle duration that is
+compared to the "typical interval" determined previously and the minimum of
+the two is taken as the final idle duration prediction.

-Next, the governor uses a simple pattern recognition algorithm to refine its
-idle duration prediction.  Namely, it saves the last 8 observed idle duration
-values and, when predicting the idle duration next time, it computes the average
-and variance of them.  If the variance is small (smaller than 400 square
-milliseconds) or it is small relative to the average (the average is greater
-that 6 times the standard deviation), the average is regarded as the "typical
-interval" value.  Otherwise, the longest of the saved observed idle duration
-values is discarded and the computation is repeated for the remaining ones.
-Again, if the variance of them is small (in the above sense), the average is
-taken as the "typical interval" value and so on, until either the "typical
-interval" is determined or too many data points are disregarded, in which case
-the "typical interval" is assumed to equal "infinity" (the maximum unsigned
-integer value).  The "typical interval" computed this way is compared with the
-sleep length multiplied by the correction factor and the minimum of the two is
-taken as the predicted idle duration.
-
-Then, the governor computes an extra latency limit to help "interactive"
-workloads.  It uses the observation that if the exit latency of the selected
-idle state is comparable with the predicted idle duration, the total time spent
-in that state probably will be very short and the amount of energy to save by
-entering it will be relatively small, so likely it is better to avoid the
-overhead related to entering that state and exiting it.  Thus selecting a
-shallower state is likely to be a better option then.   The first approximation
-of the extra latency limit is the predicted idle duration itself which
-additionally is divided by a value depending on the number of tasks that
-previously ran on the given CPU and now they are waiting for I/O operations to
-complete.  The result of that division is compared with the latency limit coming
-from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
-framework and the minimum of the two is taken as the limit for the idle states'
-exit latency.
+If the "typical interval" value is small, which means that the CPU is likely
+to be woken up soon enough, the sleep length computation is skipped as it may
+be costly and the idle duration is simply predicted to equal the "typical
+interval" value.

 Now, the governor is ready to walk the list of idle states and choose one of
 them.  For this purpose, it compares the target residency of each state with
-the predicted idle duration and the exit latency of it with the computed latency
-limit.  It selects the state with the target residency closest to the predicted
+the predicted idle duration and the exit latency of it with the with the latency
+limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
+framework.  It selects the state with the target residency closest to the predicted
 idle duration, but still below it, and exit latency that does not exceed the
 limit.

--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@ -696,6 +696,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
 	Use per-logical-CPU P-State limits (see `Coordination of P-state
 	Limits`_ for details).

+``no_cas``
+	Do not enable capacity-aware scheduling (CAS) which is enabled by
+	default on hybrid systems.

 Diagnostics and Tuning
 ======================
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@ -212,17 +212,6 @@ pid>/``).
 This value defaults to 0.


-core_sort_vma
-=============
-
-The default coredump writes VMAs in address order. By setting
-``core_sort_vma`` to 1, VMAs will be written from smallest size
-to largest size. This is known to break at least elfutils, but
-can be handy when dealing with very large (and truncated)
-coredumps where the more useful debugging details are included
-in the smaller VMAs.
-
-
 core_uses_pid
 =============

@ -1546,6 +1535,13 @@ constant ``FUTEX_TID_MASK`` (0x3fffffff).
 If a value outside of this range is written to ``threads-max`` an
 ``EINVAL`` error occurs.

+timer_migration
+===============
+
+When set to a non-zero value, attempt to migrate timers away from idle cpus to
+allow them to remain in low power states longer.
+
+Default is set (1).

 traceoff_on_warning
 ===================
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@ -101,6 +101,7 @@ Bit  Log  Number  Reason that got the kernel tainted
 16  _/X   65536  auxiliary taint, defined for and used by distros
 17  _/T  131072  kernel was built with the struct randomization plugin
 18  _/N  262144  an in-kernel test has been run
+ 19  _/J  524288  userspace used a mutating debug operation in fwctl
 ===  ===  ======  ========================================================

 Note: The character ``_`` is representing a blank in this table to make reading
@ -184,3 +185,7 @@ More detailed explanation for tainting
     build time.

 18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
+
+ 19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
+     to use the devices debugging features. Device debugging features could
+     cause the device to malfunction in undefined ways.
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@ -28,7 +28,7 @@ should be a userspace tool that handles all the low-level details, keeps
 a database of the authorized devices and prompts users for new connections.

 More details about the sysfs interface for Thunderbolt devices can be
-found in ``Documentation/ABI/testing/sysfs-bus-thunderbolt``.
+found in Documentation/ABI/testing/sysfs-bus-thunderbolt.

 Those users who just want to connect any device without any sort of
 manual work can add following line to
--- a/Documentation/admin-guide/workload-tracing.rst
+++ b/Documentation/admin-guide/workload-tracing.rst
@ -83,7 +83,7 @@ scripts/ver_linux is a good way to check if your system already has
 the necessary tools::

  sudo apt-get build-essentials flex bison yacc
-  sudo apt install libelf-dev systemtap-sdt-dev libaudit-dev libslang2-dev libperl-dev libdw-dev
+  sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev

 cscope is a good tool to browse kernel sources. Let's install it now::

--- a/Documentation/arch/arm64/arm-cca.rst
+++ b/Documentation/arch/arm64/arm-cca.rst
@ -0,0 +1,69 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Arm Confidential Compute Architecture
+=====================================
+
+Arm systems that support the Realm Management Extension (RME) contain
+hardware to allow a VM guest to be run in a way which protects the code
+and data of the guest from the hypervisor. It extends the older "two
+world" model (Normal and Secure World) into four worlds: Normal, Secure,
+Root and Realm. Linux can then also be run as a guest to a monitor
+running in the Realm world.
+
+The monitor running in the Realm world is known as the Realm Management
+Monitor (RMM) and implements the Realm Management Monitor
+specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
+in EL2 and manages the stage 2 page tables etc of the guests running in
+Realm world), however much of the control is handled by a hypervisor
+running in the Normal World. The Normal World hypervisor uses the Realm
+Management Interface (RMI) defined by the RMM specification to request
+the RMM to perform operations (e.g. mapping memory or executing a vCPU).
+
+The RMM defines an environment for guests where the address space (IPA)
+is split into two. The lower half is protected - any memory that is
+mapped in this half cannot be seen by the Normal World and the RMM
+restricts what operations the Normal World can perform on this memory
+(e.g. the Normal World cannot replace pages in this region without the
+guest's cooperation). The upper half is shared, the Normal World is free
+to make changes to the pages in this region, and is able to emulate MMIO
+devices in this region too.
+
+A guest running in a Realm may also communicate with the RMM using the
+Realm Services Interface (RSI) to request changes in its environment or
+to perform attestation about its environment. In particular it may
+request that areas of the protected address space are transitioned
+between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
+guest to give up memory to be returned to the Normal World, or to
+request new memory from the Normal World.  Without an explicit request
+from the Realm guest the RMM will otherwise prevent the Normal World
+from making these changes.
+
+Linux as a Realm Guest
+----------------------
+
+To run Linux as a guest within a Realm, the following must be provided
+either by the VMM or by a `boot loader` run in the Realm before Linux:
+
+ * All protected RAM described to Linux (by DT or ACPI) must be marked
+   RIPAS RAM before handing control over to Linux.
+
+ * MMIO devices must be either unprotected (e.g. emulated by the Normal
+   World) or marked RIPAS DEV.
+
+ * MMIO devices emulated by the Normal World and used very early in boot
+   (specifically earlycon) must be specified in the upper half of IPA.
+   For earlycon this can be done by specifying the address on the
+   command line, e.g. with an IPA size of 33 bits and the base address
+   of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
+
+ * Linux will use bounce buffers for communicating with unprotected
+   devices. It will transition some protected memory to RIPAS EMPTY and
+   expect to be able to access unprotected pages at the same IPA address
+   but with the highest valid IPA bit set. The expectation is that the
+   VMM will remove the physical pages from the protected mapping and
+   provide those pages as unprotected pages.
+
+References
+----------
+[1] https://developer.arm.com/documentation/den0137/
--- a/Documentation/arch/arm64/asymmetric-32bit.rst
+++ b/Documentation/arch/arm64/asymmetric-32bit.rst
@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
 mode will return to host userspace with an ``exit_reason`` of
 ``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
 re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
+
+NOHZ FULL
+---------
+
+To avoid perturbing an adaptive-ticks CPU (specified using
+``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
+are treated as 64-bit-only when support for asymmetric 32-bit systems
+is enabled.
--- a/Documentation/arch/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of
 the RAM in the machine, or any other method the boot loader designer
 sees fit.)

+For Arm Confidential Compute Realms this includes ensuring that all
+protected RAM has a Realm IPA state (RIPAS) of "RAM".
+

 2. Setup the device tree
 -------------------------
@ -285,6 +288,12 @@ Before jumping into the kernel, the following conditions must be met:

    - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.

+  For CPUs with the Fine Grained Traps 2 (FEAT_FGT2) extension present:
+
+  - If EL3 is present and the kernel is entered at EL2:
+
+    - SCR_EL3.FGTEn2 (bit 59) must be initialised to 0b1.
+
  For CPUs with support for HCRX_EL2 (FEAT_HCX) present:

  - If EL3 is present and the kernel is entered at EL2:
@ -379,12 +388,31 @@ Before jumping into the kernel, the following conditions must be met:

    - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.

+  For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
+
+ - If EL3 is present:
+
+    - MDCR_EL3.EnPM2 (bit 7) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+    - HDFGRTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+    - HDFGRTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+    - HDFGRTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
+    - HDFGWTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
+    - HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
+    - HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
+
  For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):

  - If the kernel is entered at EL1 and EL2 is present:

    - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.

+    - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
+      must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
+
  For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):

  - If EL3 is present:
@ -411,6 +439,38 @@ Before jumping into the kernel, the following conditions must be met:

    - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.

+ - For CPUs with Guarded Control Stacks (FEAT_GCS):
+
+  - GCSCR_EL1 must be initialised to 0.
+
+  - GCSCRE0_EL1 must be initialised to 0.
+
+  - If EL3 is present:
+
+    - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
+
+  - If EL2 is present:
+
+    - GCSCR_EL2 must be initialised to 0.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+    - HCRX_EL2.GCSEn must be initialised to 0b1.
+
+    - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
+
+    - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
+
+    - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
+
+    - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+    - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
+    - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
+
+    - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented
--- a/Documentation/arch/arm64/cpu-feature-registers.rst
+++ b/Documentation/arch/arm64/cpu-feature-registers.rst
@ -152,6 +152,8 @@ infrastructure:
     +------------------------------+---------+---------+
     | DIT                          | [51-48] |    y    |
     +------------------------------+---------+---------+
+     | MPAM                         | [43-40] |    n    |
+     +------------------------------+---------+---------+
     | SVE                          | [35-32] |    y    |
     +------------------------------+---------+---------+
     | GIC                          | [27-24] |    n    |
--- a/Documentation/arch/arm64/elf_hwcaps.rst
+++ b/Documentation/arch/arm64/elf_hwcaps.rst
@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The
 kernel exposes the presence of these features to userspace through a set
 of flags called hwcaps, exposed in the auxiliary vector.

-Userspace software can test for features by acquiring the AT_HWCAP or
-AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
-flags are set, e.g.::
+Userspace software can test for features by acquiring the AT_HWCAP,
+AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
+whether the relevant flags are set, e.g.::

 	bool floating_point_is_present(void)
 	{
@ -170,26 +170,86 @@ HWCAP_PACG
    ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
    Documentation/arch/arm64/pointer-authentication.rst.

+HWCAP_GCS
+    Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
+    described by Documentation/arch/arm64/gcs.rst.
+
+HWCAP_CMPBR
+    Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
+
+HWCAP_FPRCVT
+    Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
+
+HWCAP_F8MM8
+    Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
+
+HWCAP_F8MM4
+    Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
+
+HWCAP_SVE_F16MM
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.F16MM == 0b0001.
+
+HWCAP_SVE_ELTPERM
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
+
+HWCAP_SVE_AES2
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.AES == 0b0011.
+
+HWCAP_SVE_BFSCALE
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.B16B16 == 0b0010.
+
+HWCAP_SVE2P2
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.SVEver == 0b0011.
+
+HWCAP_SME2P2
+    Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
+
+HWCAP_SME_SBITPERM
+    Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
+
+HWCAP_SME_AES
+    Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
+
+HWCAP_SME_SFEXPA
+    Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
+
+HWCAP_SME_STMOP
+    Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
+
+HWCAP_SME_SMOP4
+    Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
+
 HWCAP2_DCPODP
    Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.

 HWCAP2_SVE2
-    Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.SVEver == 0b0001.

 HWCAP2_SVEAES
-    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.AES == 0b0001.

 HWCAP2_SVEPMULL
-    Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.AES == 0b0010.

 HWCAP2_SVEBITPERM
-    Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.BitPerm == 0b0001.

 HWCAP2_SVESHA3
-    Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.SHA3 == 0b0001.

 HWCAP2_SVESM4
-    Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.SM4 == 0b0001.

 HWCAP2_FLAGM2
    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
@ -198,16 +258,20 @@ HWCAP2_FRINT
    Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.

 HWCAP2_SVEI8MM
-    Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.I8MM == 0b0001.

 HWCAP2_SVEF32MM
-    Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.F32MM == 0b0001.

 HWCAP2_SVEF64MM
-    Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.F64MM == 0b0001.

 HWCAP2_SVEBF16
-    Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.BF16 == 0b0001.

 HWCAP2_I8MM
    Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
@ -273,7 +337,8 @@ HWCAP2_EBF16
    Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.

 HWCAP2_SVE_EBF16
-    Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.BF16 == 0b0010.

 HWCAP2_CSSC
    Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
@ -282,7 +347,8 @@ HWCAP2_RPRFM
    Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.

 HWCAP2_SVE2P1
-    Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.SVEver == 0b0010.

 HWCAP2_SME2
    Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
@ -309,7 +375,8 @@ HWCAP2_HBC
    Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.

 HWCAP2_SVE_B16B16
-    Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
+    Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
+    ID_AA64ZFR0_EL1.B16B16 == 0b0001.

 HWCAP2_LRCPC3
    Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
--- a/Documentation/arch/arm64/gcs.rst
+++ b/Documentation/arch/arm64/gcs.rst
@ -0,0 +1,227 @@
+===============================================
+Guarded Control Stack support for AArch64 Linux
+===============================================
+
+This document outlines briefly the interface provided to userspace by Linux in
+order to support use of the ARM Guarded Control Stack (GCS) feature.
+
+This is an outline of the most important features and issues only and not
+intended to be exhaustive.
+
+
+
+1.  General
+-----------
+
+* GCS is an architecture feature intended to provide greater protection
+  against return oriented programming (ROP) attacks and to simplify the
+  implementation of features that need to collect stack traces such as
+  profiling.
+
+* When GCS is enabled a separate guarded control stack is maintained by the
+  PE which is writeable only through specific GCS operations.  This
+  stores the call stack only, when a procedure call instruction is
+  performed the current PC is pushed onto the GCS and on RET the
+  address in the LR is verified against that on the top of the GCS.
+
+* When active the current GCS pointer is stored in the system register
+  GCSPR_EL0.  This is readable by userspace but can only be updated
+  via specific GCS instructions.
+
+* The architecture provides instructions for switching between guarded
+  control stacks with checks to ensure that the new stack is a valid
+  target for switching.
+
+* The functionality of GCS is similar to that provided by the x86 Shadow
+  Stack feature, due to sharing of userspace interfaces the ABI refers to
+  shadow stacks rather than GCS.
+
+* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
+  AT_HWCAP entry.
+
+* GCS is enabled per thread.  While there is support for disabling GCS
+  at runtime this should be done with great care.
+
+* GCS memory access faults are reported as normal memory access faults.
+
+* GCS specific errors (those reported with EC 0x2d) will be reported as
+  SIGSEGV with a si_code of SEGV_CPERR (control protection error).
+
+* GCS is supported only for AArch64.
+
+* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
+  regardless of the GCS configuration for the thread.
+
+* The architecture supports enabling GCS without verifying that return values
+  in LR match those in the GCS, the LR will be ignored.  This is not supported
+  by Linux.
+
+
+
+2.  Enabling and disabling Guarded Control Stacks
+-------------------------------------------------
+
+* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
+  prctl(), this takes a single flags argument specifying which GCS features
+  should be used.
+
+* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
+  and enables GCS for the thread, enabling the functionality controlled by
+  GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
+
+* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
+  by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
+
+* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
+  by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
+
+* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
+
+* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
+  values as used for PR_SET_SHADOW_STACK_STATUS.  Any future changes to the
+  status of the specified GCS mode bits will be rejected.
+
+* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
+  userspace to prevent changes to any future features.
+
+* There is no support for a process to remove a lock that has been set for
+  it.
+
+* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
+  thread that called them, any other running threads will be unaffected.
+
+* New threads inherit the GCS configuration of the thread that created them.
+
+* GCS is disabled on exec().
+
+* The current GCS configuration for a thread may be read with the
+  PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
+  are passed to PR_SET_SHADOW_STACK_STATUS.
+
+* If GCS is disabled for a thread after having previously been enabled then
+  the stack will remain allocated for the lifetime of the thread.  At present
+  any attempt to reenable GCS for the thread will be rejected, this may be
+  revisited in future.
+
+* It should be noted that since enabling GCS will result in GCS becoming
+  active immediately it is not normally possible to return from the function
+  that invoked the prctl() that enabled GCS.  It is expected that the normal
+  usage will be that GCS is enabled very early in execution of a program.
+
+
+
+3.  Allocation of Guarded Control Stacks
+----------------------------------------
+
+* When GCS is enabled for a thread a new Guarded Control Stack will be
+  allocated for it of half the standard stack size or 2 gigabytes,
+  whichever is smaller.
+
+* When a new thread is created by a thread which has GCS enabled then a
+  new Guarded Control Stack will be allocated for the new thread with
+  half the size of the standard stack.
+
+* When a stack is allocated by enabling GCS or during thread creation then
+  the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
+  be set to point to the address of this 0 value, this can be used to
+  detect the top of the stack.
+
+* Additional Guarded Control Stacks can be allocated using the
+  map_shadow_stack() system call.
+
+* Stacks allocated using map_shadow_stack() can optionally have an end of
+  stack marker and cap placed at the top of the stack.  If the flag
+  SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
+  if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
+  bytes of the stack and if it is specified then the cap will be the next
+  8 bytes.  While specifying just SHADOW_STACK_SET_MARKER by itself is
+  valid since the marker is all bits 0 it has no observable effect.
+
+* Stacks allocated using map_shadow_stack() must have a size which is a
+  multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
+
+* An address can be specified to map_shadow_stack(), if one is provided then
+  it must be aligned to a page boundary.
+
+* When a thread is freed the Guarded Control Stack initially allocated for
+  that thread will be freed.  Note carefully that if the stack has been
+  switched this may not be the stack currently in use by the thread.
+
+
+4.  Signal handling
+--------------------
+
+* A new signal frame record gcs_context encodes the current GCS mode and
+  pointer for the interrupted context on signal delivery.  This will always
+  be present on systems that support GCS.
+
+* The record contains a flag field which reports the current GCS configuration
+  for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
+
+* The signal handler is run with the same GCS configuration as the interrupted
+  context.
+
+* When GCS is enabled for the interrupted thread a signal handling specific
+  GCS cap token will be written to the GCS, this is an architectural GCS cap
+  with the token type (bits 0..11) all clear.  The GCSPR_EL0 reported in the
+  signal frame will point to this cap token.
+
+* The signal handler will use the same GCS as the interrupted context.
+
+* When GCS is enabled on signal entry a frame with the address of the signal
+  return handler will be pushed onto the GCS, allowing return from the signal
+  handler via RET as normal.  This will not be reported in the gcs_context in
+  the signal frame.
+
+
+5.  Signal return
+-----------------
+
+When returning from a signal handler:
+
+* If there is a gcs_context record in the signal frame then the GCS flags
+  and GCSPR_EL0 will be restored from that context prior to further
+  validation.
+
+* If there is no gcs_context record in the signal frame then the GCS
+  configuration will be unchanged.
+
+* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
+  point to a valid GCS signal cap record, this will be popped from the
+  GCS prior to signal return.
+
+* If the GCS configuration is locked when returning from a signal then any
+  attempt to change the GCS configuration will be treated as an error.  This
+  is true even if GCS was not enabled prior to signal entry.
+
+* GCS may be disabled via signal return but any attempt to enable GCS via
+  signal return will be rejected.
+
+
+6.  ptrace extensions
+---------------------
+
+* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
+  PTRACE_SETREGSET.
+
+* The GCS mode, including enable and disable, may be configured via ptrace.
+  If GCS is enabled via ptrace no new GCS will be allocated for the thread.
+
+* Configuration via ptrace ignores locking of GCS mode bits.
+
+
+7.  ELF coredump extensions
+---------------------------
+
+* NT_ARM_GCS notes will be added to each coredump for each thread of the
+  dumped process.  The contents will be equivalent to the data that would
+  have been read if a PTRACE_GETREGSET of the corresponding type were
+  executed for each thread when the coredump was generated.
+
+
+
+8.  /proc extensions
+--------------------
+
+* Guarded Control Stack pages will include "ss" in their VmFlags in
+  /proc/<pid>/smaps.
--- a/Documentation/arch/arm64/index.rst
+++ b/Documentation/arch/arm64/index.rst
@ -10,16 +10,19 @@ ARM64 Architecture
    acpi_object_usage
    amu
    arm-acpi
+    arm-cca
    asymmetric-32bit
    booting
    cpu-feature-registers
    cpu-hotplug
    elf_hwcaps
+    gcs
    hugetlbpage
    kdump
    legacy_instructions
    memory
    memory-tagging-extension
+    mops
    perf
    pointer-authentication
    ptdump
--- a/Documentation/arch/arm64/mops.rst
+++ b/Documentation/arch/arm64/mops.rst
@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Memory copy/set instructions (MOPS)
+===================================
+
+A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
+instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
+
+A main or epilogue instruction can take a MOPS exception for various reasons,
+for example when a task is migrated to a CPU with a different MOPS
+implementation, or when the instruction's alignment and size requirements are
+not met. The software exception handler is then expected to reset the registers
+and restart execution from the prologue instruction. Normally this is handled
+by the kernel.
+
+For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
+the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
+
+.. _arm64_mops_hyp:
+
+Hypervisor requirements
+-----------------------
+
+A hypervisor running a Linux guest must handle all MOPS exceptions from the
+guest kernel, as Linux may not be able to handle the exception at all times.
+For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
+to another physical CPU with a different MOPS implementation.
+
+To do this, the hypervisor must:
+
+  - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
+
+  - Have an exception handler that implements the algorithm from the Arm ARM
+    rules CNTMJ and MWFQH.
+
+  - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
+    potential step of the current instruction.
+
+    Note: Clearing PSTATE.SS is needed so that a single step exception is taken
+    on the next instruction (the prologue instruction). Otherwise prologue
+    would get silently stepped over and the single step exception taken on the
+    main instruction. Note that if the guest instruction is not being stepped
+    then clearing PSTATE.SS has no effect.
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@ -57,6 +57,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Ampere         | AmpereOne AC04  | AC04_CPU_10     | AMPERE_ERRATUM_AC03_CPU_38  |
 +----------------+-----------------+-----------------+-----------------------------+
+| Ampere         | AmpereOne AC04  | AC04_CPU_23     | AMPERE_ERRATUM_AC04_CPU_23  |
+----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2457168        | ARM64_ERRATUM_2457168       |
 +----------------+-----------------+-----------------+-----------------------------+
@ -198,7 +200,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Neoverse-V3     | #3312417        | ARM64_ERRATUM_3194386       |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | MMU-500         | #841119,826419  | N/A                         |
+| ARM            | MMU-500         | #841119,826419  | ARM_SMMU_MMU_500_CPRE_ERRATA|
+|                |                 | #562869,1047329 |                             |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | MMU-600         | #1076982,1209401| N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
@ -255,8 +258,11 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
-| Hisilicon      | Hip{08,09,10,10C| #162001900      | N/A                         |
-|                | ,11} SMMU PMCG  |                 |                             |
+| Hisilicon      | Hip{08,09,09A,10| #162001900      | N/A                         |
+|                | ,10C,11}        |                 |                             |
+|                | SMMU PMCG       |                 |                             |
+----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip09           | #162100801      | HISILICON_ERRATUM_162100801 |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
--- a/Documentation/arch/arm64/sme.rst
+++ b/Documentation/arch/arm64/sme.rst
@ -346,6 +346,10 @@ The regset data starts with struct user_za_header, containing:

 * Writes to NT_ARM_ZT will set PSTATE.ZA to 1.

+* If any register data is provided along with SME_PT_VL_ONEXEC then the
+  registers data will be interpreted with the current vector length, not
+  the vector length configured for use on exec.
+

 8.  ELF coredump extensions
 ---------------------------
--- a/Documentation/arch/arm64/sve.rst
+++ b/Documentation/arch/arm64/sve.rst
@ -402,6 +402,10 @@ The regset data starts with struct user_sve_header, containing:
  streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
  if the target was not in streaming mode.

+* If any register data is provided along with SVE_PT_VL_ONEXEC then the
+  registers data will be interpreted with the current vector length, not
+  the vector length configured for use on exec.
+
 * The effect of writing a partial, incomplete payload is unspecified.


--- a/Documentation/arch/s390/driver-model.rst
+++ b/Documentation/arch/s390/driver-model.rst
@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
 --------------------

 The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
-devices, like lcs or ctc.
+devices, like qeth or ctc.

 The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
 this attributes creates a ccwgroup device consisting of these ccw devices (if
--- a/Documentation/arch/x86/amd-debugging.rst
+++ b/Documentation/arch/x86/amd-debugging.rst
@ -0,0 +1,368 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Debugging AMD Zen systems
+++++++++++++++++++++++++
+
+Introduction
+============
+
+This document describes techniques that are useful for debugging issues with
+AMD Zen systems.  It is intended for use by developers and technical users
+to help identify and resolve issues.
+
+S3 vs s2idle
+============
+
+On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
+and suspend-to-idle (s2idle).  To confirm which mode your system supports you
+can look at ``cat /sys/power/mem_sleep``.  If it shows ``s2idle [deep]`` then
+*S3* is supported.  If it shows ``[s2idle]`` then *s2idle* is
+supported.
+
+On systems that support *S3*, the firmware will be utilized to put all hardware into
+the appropriate low power state.
+
+On systems that support *s2idle*, the kernel will be responsible for transitioning devices
+into the appropriate low power state. When all devices are in the appropriate low
+power state, the hardware will transition into a hardware sleep state.
+
+After a suspend cycle you can tell how much time was spent in a hardware sleep
+state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
+
+This flowchart explains how the AMD s2idle suspend flow works.
+
+.. kernel-figure:: suspend.svg
+
+This flowchart explains how the amd s2idle resume flow works.
+
+.. kernel-figure:: resume.svg
+
+s2idle debugging tool
+=====================
+
+As there are a lot of places that problems can occur, a debugging tool has been
+created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+that can help test for common problems and offer suggestions.
+
+If you have an s2idle issue, it's best to start with this and follow instructions
+from its findings.  If you continue to have an issue, raise a bug with the
+report generated from this script to
+`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
+
+Spurious s2idle wakeups from an IRQ
+===================================
+
+Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
+This can be matched to ``/proc/interrupts`` to determine what device woke the system.
+
+If this isn't enough to debug the problem, then the following sysfs files
+can be set to add more verbosity to the wakeup process: ::
+
+  # echo 1 | sudo tee /sys/power/pm_debug_messages
+  # echo 1 | sudo tee /sys/power/pm_print_times
+
+After making those changes, the kernel will display messages that can
+be traced back to kernel s2idle loop code as well as display any active
+GPIO sources while waking up.
+
+If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
+needed.  These commands can enable additional trace data: ::
+
+  # echo enable | sudo tee /sys/module/acpi/parameters/trace_state
+  # echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
+  # echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
+  # echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
+
+Spurious s2idle wakeups from a GPIO
+===================================
+
+If a GPIO is active when waking up the system ideally you would look at the
+schematic to determine what device it is associated with. If the schematic
+is not available, another tactic is to look at the ACPI _EVT() entry
+to determine what device is notified when that GPIO is active.
+
+For a hypothetical example, say that GPIO 59 woke up the system.  You can
+look at the SSDT to determine what device is notified when GPIO 59 is active.
+
+First convert the GPIO number into hex. ::
+
+  $ python3 -c "print(hex(59))"
+  0x3b
+
+Next determine which ACPI table has the ``_EVT`` entry. For example: ::
+
+  $ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
+  grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
+
+Decode this table::
+
+  $ sudo cp /sys/firmware/acpi/tables/SSDT27 .
+  $ sudo iasl -d SSDT27
+
+Then look at the table and find the matching entry for GPIO 0x3b. ::
+
+  Case (0x3B)
+  {
+      M000 (0x393B)
+      M460 ("    Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
+      Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
+  }
+
+You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
+when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
+step further you can figure out which XHCI controller it is by matching it to
+ACPI.::
+
+  $ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
+  /sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
+  /sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
+  /sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
+  /sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
+  /sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
+  /sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
+  /sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
+
+Here you can see it matches to ``device:2d``. Look at the ``physical_node``
+to determine what PCI device that actually is. ::
+
+  $ ls -l /sys/bus/acpi/devices/device:2d/physical_node
+  lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
+
+So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
+
+The ``amd_s2idle.py`` script will capture most of these artifacts for you.
+
+s2idle PM debug messages
+========================
+
+During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
+to check all uPEP constraints.  Failing uPEP constraints does not prevent
+s0i3 entry.  This means that if some constraints are not met, it is possible
+the kernel may attempt to enter s2idle even if there are some known issues.
+
+To activate PM debugging, either specify ``pm_debug_messagess`` kernel
+command-line option at boot or write to ``/sys/power/pm_debug_messages``.
+Unmet constraints will be displayed in the kernel log and can be
+viewed by logging tools that process kernel ring buffer like ``dmesg`` or
+``journalctl``."
+
+If the system freezes on entry/exit before these messages are flushed, a
+useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
+notification to the platform to start s0i3 entry.  This will stop the
+system from freezing on entry or exit and let you view all the failed
+constraints. ::
+
+  cd /sys/bus/platform/drivers/amd_pmc
+  ls | grep AMD | sudo tee unbind
+
+After doing this, run the suspend cycle and look specifically for errors around: ::
+
+  ACPI: LPI: Constraint not met; min power state:%s current power state:%s
+
+Historical examples of s2idle issues
+====================================
+
+To help understand the types of issues that can occur and how to debug them,
+here are some historical examples of s2idle issues that have been resolved.
+
+Core offlining
+--------------
+An end user had reported that taking a core offline would prevent the system
+from properly entering s0i3.  This was debugged using internal AMD tools
+to capture and display a stream of metrics from the hardware showing what changed
+when a core was offlined.  It was determined that the hardware didn't get
+notification the offline cores were in the deepest state, and so it prevented
+CPU from going into the deepest state. The issue was debugged to a missing
+command to put cores into C3 upon offline.
+
+`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
+
+Corruption after resume
+-----------------------
+A big problem that occurred with Rembrandt was that there was graphical
+corruption after resume.  This happened because of a misalignment of PSP
+and driver responsibility.  The PSP will save and restore DMCUB, but the
+driver assumed it needed to reset DMCUB on resume.
+This actually was a misalignment for earlier silicon as well, but was not
+observed.
+
+`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
+
+Back to Back suspends fail
+--------------------------
+When using a wakeup source that triggers the IRQ to wakeup, a bug in the
+pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
+system going back to sleep properly.
+
+`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
+
+Spurious timer based wakeup after 5 minutes
+-------------------------------------------
+The HPET was being used to program the wakeup source for the system, however
+this was causing a spurious wakeup after 5 minutes.  The correct alarm to use
+was the ACPI alarm.
+
+`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
+
+Disk disappears after resume
+----------------------------
+After resuming from s2idle, the NVME disk would disappear.  This was due to the
+BIOS not specifying the _DSD StorageD3Enable property.  This caused the NVME
+driver not to put the disk into the expected state at suspend and to fail
+on resume.
+
+`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
+
+Spurious IRQ1
+-------------
+A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
+platform firmware bug where IRQ1 is triggered during s0i3 resume.
+
+This was fixed in the platform firmware, but a number of systems didn't
+receive any more platform firmware updates.
+
+`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
+
+Hardware timeout
+----------------
+The hardware performs many actions besides accepting the values from
+amd-pmc driver.  As the communication path with the hardware is a mailbox,
+it's possible that it might not respond quickly enough.
+This issue manifested as a failure to suspend: ::
+
+  PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
+  amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
+
+The timing problem was identified by comparing the values of the idle mask.
+
+`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
+
+Failed to reach hardware sleep state with panel on
+--------------------------------------------------
+On some Strix systems certain panels were observed to block the system from
+entering a hardware sleep state if the internal panel was on during the sequence.
+
+Even though the panel got turned off during suspend it exposed a timing problem
+where an interrupt caused the display hardware to wake up and block low power
+state entry.
+
+`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
+
+Runtime power consumption issues
+================================
+
+Runtime power consumption is influenced by many factors, including but not
+limited to the configuration of the PCIe Active State Power Management (ASPM),
+the display brightness, the EPP policy of the CPU, and the power management
+of the devices.
+
+ASPM
+----
+For the best runtime power consumption, ASPM should be programmed as intended
+by the BIOS from the hardware vendor.  To accomplish this the Linux kernel
+should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
+sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
+
+Most notably, if L1.2 is not configured properly for any devices, the SoC
+will not be able to enter the deepest idle state.
+
+EPP Policy
+----------
+The ``energy_performance_preference`` sysfs file can be used to set a bias
+of efficiency or performance for a CPU.  This has a direct relationship on
+the battery life when more heavily biased towards performance.
+
+
+BIOS debug messages
+===================
+
+Most OEM machines don't have a serial UART for outputting kernel or BIOS
+debug messages. However BIOS debug messages are useful for understanding
+both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
+
+As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
+the infrastructure used for exporting debugging messages is often the same
+as AMD reference BIOS.
+
+Manually Parsing
+----------------
+There is generally an ACPI method ``\M460`` that different paths of the AML
+will call to emit a message to the BIOS serial log. This method takes
+7 arguments, with the first being a string and the rest being optional
+integers::
+
+  Method (M460, 7, Serialized)
+
+Here is an example of a string that BIOS AML may call out using ``\M460``::
+
+  M460 ("  OEM-ASL-PCIe Address (0x%X)._REG (%d %d)  PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
+
+Normally when executed, the ``\M460`` method would populate the additional
+arguments into the string.  In order to get these messages from the Linux
+kernel a hook has been added into ACPICA that can capture the *arguments*
+sent to ``\M460`` and print them to the kernel ring buffer.
+For example the following message could be emitted into kernel ring buffer::
+
+  extrace-0174 ex_trace_args         :  "  OEM-ASL-PCIe Address (0x%X)._REG (%d %d)  PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
+
+In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
+and then turn on the following ACPICA tracing parameters.
+This can be done either on the kernel command line or at runtime:
+
+* ``acpi.trace_method_name=\M460``
+* ``acpi.trace_state=method``
+
+NOTE: These can be very noisy at bootup. If you turn these parameters on
+the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
+to a larger size such as 17 to avoid losing early boot messages.
+
+Tool assisted Parsing
+---------------------
+As mentioned above, parsing by hand can be tedious, especially with a lot of
+messages.  To help with this, a tool has been created at
+`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
+to help parse the messages.
+
+Random reboot issues
+====================
+
+When a random reboot occurs, the high-level reason for the reboot is stored
+in a register that will persist onto the next boot.
+
+There are 6 classes of reasons for the reboot:
+ * Software induced
+ * Power state transition
+ * Pin induced
+ * Hardware induced
+ * Remote reset
+ * Internal CPU event
+
+.. csv-table::
+   :header: "Bit", "Type", "Reason"
+   :align: left
+
+   "0",  "Pin",      "thermal pin BP_THERMTRIP_L was tripped"
+   "1",  "Pin",      "power button was pressed for 4 seconds"
+   "2",  "Pin",      "shutdown pin was tripped"
+   "4",  "Remote",   "remote ASF power off command was received"
+   "9",  "Internal", "internal CPU thermal limit was tripped"
+   "16", "Pin",      "system reset pin BP_SYS_RST_L was tripped"
+   "17", "Software", "software issued PCI reset"
+   "18", "Software", "software wrote 0x4 to reset control register 0xCF9"
+   "19", "Software", "software wrote 0x6 to reset control register 0xCF9"
+   "20", "Software", "software wrote 0xE to reset control register 0xCF9"
+   "21", "ACPI-state", "ACPI power state transition occurred"
+   "22", "Pin",      "keyboard reset pin KB_RST_L was tripped"
+   "23", "Internal", "internal CPU shutdown event occurred"
+   "24", "Hardware", "system failed to boot before failed boot timer expired"
+   "25", "Hardware", "hardware watchdog timer expired"
+   "26", "Remote",   "remote ASF reset command was received"
+   "27", "Internal", "an uncorrected error caused a data fabric sync flood event"
+   "29", "Internal", "FCH and MP1 failed warm reset handshake"
+   "30", "Internal", "a parity error occurred"
+   "31", "Internal", "a software sync flood event occurred"
+
+This information is read by the kernel at bootup and printed into
+the syslog. When a random reboot occurs this message can be helpful
+to determine the next component to debug.
--- a/Documentation/arch/x86/index.rst
+++ b/Documentation/arch/x86/index.rst
@ -25,6 +25,7 @@ x86-specific Documentation
   shstk
   iommu
   intel_txt
+   amd-debugging
   amd-memory-encryption
   amd_hsmp
   tdx
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@ -93,7 +93,7 @@ enters a C-state.

 The kernel provides a function to invoke the buffer clearing:

-    mds_clear_cpu_buffers()
+    x86_clear_cpu_buffers()

 Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
 Other than CFLAGS.ZF, this macro doesn't clobber any registers.
@ -185,9 +185,9 @@ Mitigation points
   idle clearing would be a window dressing exercise and is therefore not
   activated.

-   The invocation is controlled by the static key mds_idle_clear which is
-   switched depending on the chosen mitigation mode and the SMT state of
-   the system.
+   The invocation is controlled by the static key cpu_buf_idle_clear which is
+   switched depending on the chosen mitigation mode and the SMT state of the
+   system.

   The buffer clear is only invoked before entering the C-State to prevent
   that stale data from the idling CPU from spilling to the Hyper-Thread
--- a/Documentation/arch/x86/resume.svg
+++ b/Documentation/arch/x86/resume.svg
--- a/Documentation/arch/x86/suspend.svg
+++ b/Documentation/arch/x86/suspend.svg
--- a/Documentation/arch/x86/topology.rst
+++ b/Documentation/arch/x86/topology.rst
@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
    The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
    "core_id."

+  - topology_logical_core_id();
+
+    The logical core ID to which a thread belongs.
+


 System topology examples
--- a/Documentation/block/cmdline-partition.rst
+++ b/Documentation/block/cmdline-partition.rst
@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
    create a link to block device partition with the name "PARTNAME".
    User space application can access partition by partition name.

+ro
+    read-only. Flag the partition as read-only.
+
 Example:

    eMMC disk names are "mmcblk0" and "mmcblk0boot0".

  bootargs::

-    'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
+    'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'

  dmesg::

--- a/Documentation/block/inline-encryption.rst
+++ b/Documentation/block/inline-encryption.rst
@ -77,10 +77,10 @@ Basic design
 ============

 We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
-how it will be used.  This includes the actual bytes of the key; the size of the
-key; the algorithm and data unit size the key will be used with; and the number
-of bytes needed to represent the maximum data unit number the key will be used
-with.
+how it will be used.  This includes the type of the key (raw or
+hardware-wrapped); the actual bytes of the key; the size of the key; the
+algorithm and data unit size the key will be used with; and the number of bytes
+needed to represent the maximum data unit number the key will be used with.

 We introduce ``struct bio_crypt_ctx`` to represent an encryption context.  It
 contains a data unit number and a pointer to a blk_crypto_key.  We add pointers
@ -301,3 +301,250 @@ kernel will pretend that the device does not support hardware inline encryption
 When the crypto API fallback is enabled, this means that all bios with and
 encryption context will use the fallback, and IO will complete as usual.  When
 the fallback is disabled, a bio with an encryption context will be failed.
+
+.. _hardware_wrapped_keys:
+
+Hardware-wrapped keys
+=====================
+
+Motivation and threat model
+---------------------------
+
+Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
+relies on the raw encryption key(s) being present in kernel memory so that the
+encryption can be performed.  This traditionally isn't seen as a problem because
+the key(s) won't be present during an offline attack, which is the main type of
+attack that storage encryption is intended to protect from.
+
+However, there is an increasing desire to also protect users' data from other
+types of attacks (to the extent possible), including:
+
+- Cold boot attacks, where an attacker with physical access to a system suddenly
+  powers it off, then immediately dumps the system memory to extract recently
+  in-use encryption keys, then uses these keys to decrypt user data on-disk.
+
+- Online attacks where the attacker is able to read kernel memory without fully
+  compromising the system, followed by an offline attack where any extracted
+  keys can be used to decrypt user data on-disk.  An example of such an online
+  attack would be if the attacker is able to run some code on the system that
+  exploits a Meltdown-like vulnerability but is unable to escalate privileges.
+
+- Online attacks where the attacker fully compromises the system, but their data
+  exfiltration is significantly time-limited and/or bandwidth-limited, so in
+  order to completely exfiltrate the data they need to extract the encryption
+  keys to use in a later offline attack.
+
+Hardware-wrapped keys are a feature of inline encryption hardware that is
+designed to protect users' data from the above attacks (to the extent possible),
+without introducing limitations such as a maximum number of keys.
+
+Note that it is impossible to **fully** protect users' data from these attacks.
+Even in the attacks where the attacker "just" gets read access to kernel memory,
+they can still extract any user data that is present in memory, including
+plaintext pagecache pages of encrypted files.  The focus here is just on
+protecting the encryption keys, as those instantly give access to **all** user
+data in any following offline attack, rather than just some of it (where which
+data is included in that "some" might not be controlled by the attacker).
+
+Solution overview
+-----------------
+
+Inline encryption hardware typically has "keyslots" into which software can
+program keys for the hardware to use; the contents of keyslots typically can't
+be read back by software.  As such, the above security goals could be achieved
+if the kernel simply erased its copy of the key(s) after programming them into
+keyslot(s) and thereafter only referred to them via keyslot number.
+
+However, that naive approach runs into a couple problems:
+
+- It limits the number of unlocked keys to the number of keyslots, which
+  typically is a small number.  In cases where there is only one encryption key
+  system-wide (e.g., a full-disk encryption key), that can be tolerable.
+  However, in general there can be many logged-in users with many different
+  keys, and/or many running applications with application-specific encrypted
+  storage areas.  This is especially true if file-based encryption (e.g.
+  fscrypt) is being used.
+
+- Inline crypto engines typically lose the contents of their keyslots if the
+  storage controller (usually UFS or eMMC) is reset.  Resetting the storage
+  controller is a standard error recovery procedure that is executed if certain
+  types of storage errors occur, and such errors can occur at any time.
+  Therefore, when inline crypto is being used, the operating system must always
+  be ready to reprogram the keyslots without user intervention.
+
+Thus, it is important for the kernel to still have a way to "remind" the
+hardware about a key, without actually having the raw key itself.
+
+Somewhat less importantly, it is also desirable that the raw keys are never
+visible to software at all, even while being initially unlocked.  This would
+ensure that a read-only compromise of system memory will never allow a key to be
+extracted to be used off-system, even if it occurs when a key is being unlocked.
+
+To solve all these problems, some vendors of inline encryption hardware have
+made their hardware support *hardware-wrapped keys*.  Hardware-wrapped keys
+are encrypted keys that can only be unwrapped (decrypted) and used by hardware
+-- either by the inline encryption hardware itself, or by a dedicated hardware
+block that can directly provision keys to the inline encryption hardware.
+
+(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
+to add some clarity in cases where there could be other types of wrapped keys,
+such as in file-based encryption.  Key wrapping is a commonly used technique.)
+
+The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
+that is never exposed to software; it is either a persistent key (a "long-term
+wrapping key") or a per-boot key (an "ephemeral wrapping key").  The long-term
+wrapped form of the key is what is initially unlocked, but it is erased from
+memory as soon as it is converted into an ephemerally-wrapped key.  In-use
+hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
+
+As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
+the hardware also includes a level of indirection; it doesn't use the unwrapped
+key directly for inline encryption, but rather derives both an inline encryption
+key and a "software secret" from it.  Software can use the "software secret" for
+tasks that can't use the inline encryption hardware, such as filenames
+encryption.  The software secret is not protected from memory compromise.
+
+Key hierarchy
+-------------
+
+Here is the key hierarchy for a hardware-wrapped key::
+
+                       Hardware-wrapped key
+                                |
+                                |
+                          <Hardware KDF>
+                                |
+                  -----------------------------
+                  |                           |
+        Inline encryption key           Software secret
+
+The components are:
+
+- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
+  Function), in ephemerally-wrapped form.  The key wrapping algorithm is a
+  hardware implementation detail that doesn't impact kernel operation, but a
+  strong authenticated encryption algorithm such as AES-256-GCM is recommended.
+
+- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
+  derive subkeys after unwrapping the wrapped key.  The hardware's choice of KDF
+  doesn't impact kernel operation, but it does need to be known for testing
+  purposes, and it's also assumed to have at least a 256-bit security strength.
+  All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
+  with a particular choice of labels and contexts; new hardware should use this
+  already-vetted KDF.
+
+- *Inline encryption key*: a derived key which the hardware directly provisions
+  to a keyslot of the inline encryption hardware, without exposing it to
+  software.  In all known hardware, this will always be an AES-256-XTS key.
+  However, in principle other encryption algorithms could be supported too.
+  Hardware must derive distinct subkeys for each supported encryption algorithm.
+
+- *Software secret*: a derived key which the hardware returns to software so
+  that software can use it for cryptographic tasks that can't use inline
+  encryption.  This value is cryptographically isolated from the inline
+  encryption key, i.e. knowing one doesn't reveal the other.  (The KDF ensures
+  this.)  Currently, the software secret is always 32 bytes and thus is suitable
+  for cryptographic applications that require up to a 256-bit security strength.
+  Some use cases (e.g. full-disk encryption) won't require the software secret.
+
+Example: in the case of fscrypt, the fscrypt master key (the key that protects a
+particular set of encrypted directories) is made hardware-wrapped.  The inline
+encryption key is used as the file contents encryption key, while the software
+secret (rather than the master key directly) is used to key fscrypt's KDF
+(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
+
+Note that currently this design assumes a single inline encryption key per
+hardware-wrapped key, without any further key derivation.  Thus, in the case of
+fscrypt, currently hardware-wrapped keys are only compatible with the "inline
+encryption optimized" settings, which use one file contents encryption key per
+encryption policy rather than one per file.  This design could be extended to
+make the hardware derive per-file keys using per-file nonces passed down the
+storage stack, and in fact some hardware already supports this; future work is
+planned to remove this limitation by adding the corresponding kernel support.
+
+Kernel support
+--------------
+
+The inline encryption support of the kernel's block layer ("blk-crypto") has
+been extended to support hardware-wrapped keys as an alternative to raw keys,
+when hardware support is available.  This works in the following way:
+
+- A ``key_types_supported`` field is added to the crypto capabilities in
+  ``struct blk_crypto_profile``.  This allows device drivers to declare that
+  they support raw keys, hardware-wrapped keys, or both.
+
+- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
+  alternative to a raw key; a ``key_type`` field is added to
+  ``struct blk_crypto_config`` to distinguish between the different key types.
+  This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
+  key in a way very similar to using a raw key.
+
+- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added.  Device drivers
+  that support hardware-wrapped keys must implement this method.  Users of
+  blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
+
+- The programming and eviction of hardware-wrapped keys happens via
+  ``blk_crypto_ll_ops::keyslot_program`` and
+  ``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys.  If a
+  driver supports hardware-wrapped keys, then it must handle hardware-wrapped
+  keys being passed to these methods.
+
+blk-crypto-fallback doesn't support hardware-wrapped keys.  Therefore,
+hardware-wrapped keys can only be used with actual inline encryption hardware.
+
+All the above deals with hardware-wrapped keys in ephemerally-wrapped form only.
+To get such keys in the first place, new block device ioctls have been added to
+provide a generic interface to creating and preparing such keys:
+
+- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form.  It takes
+  in a pointer to a ``struct blk_crypto_import_key_arg``.  The caller must set
+  ``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the
+  raw key to import.  On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes
+  the resulting long-term wrapped key blob to the buffer pointed to by
+  ``lt_key_ptr``, which is of maximum size ``lt_key_size``.  It also updates
+  ``lt_key_size`` to be the actual size of the key.  On failure, it returns -1
+  and sets errno.  An errno of ``EOPNOTSUPP`` indicates that the block device
+  does not support hardware-wrapped keys.  An errno of ``EOVERFLOW`` indicates
+  that the output buffer did not have enough space for the key blob.
+
+- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the
+  hardware generate the key instead of importing one.  It takes in a pointer to
+  a ``struct blk_crypto_generate_key_arg``.
+
+- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to
+  ephemerally-wrapped form.  It takes in a pointer to a ``struct
+  blk_crypto_prepare_key_arg``.  The caller must set ``lt_key_ptr`` and
+  ``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped
+  key blob to convert.  On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes
+  the resulting ephemerally-wrapped key blob to the buffer pointed to by
+  ``eph_key_ptr``, which is of maximum size ``eph_key_size``.  It also updates
+  ``eph_key_size`` to be the actual size of the key.  On failure, it returns -1
+  and sets errno.  Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the
+  same as they do for ``BLKCRYPTOIMPORTKEY``.  An errno of ``EBADMSG`` indicates
+  that the long-term wrapped key is invalid.
+
+Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY``
+once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is
+unlocked and added to the kernel.  Note that these ioctls have no relevance for
+raw keys; they are only for hardware-wrapped keys.
+
+Testability
+-----------
+
+Both the hardware KDF and the inline encryption itself are well-defined
+algorithms that don't depend on any secrets other than the unwrapped key.
+Therefore, if the unwrapped key is known to software, these algorithms can be
+reproduced in software in order to verify the ciphertext that is written to disk
+by the inline encryption hardware.
+
+However, the unwrapped key will only be known to software for testing if the
+"import" functionality is used.  Proper testing is not possible in the
+"generate" case where the hardware generates the key itself.  The correct
+operation of the "generate" mode thus relies on the security and correctness of
+the hardware RNG and its use to generate the key, as well as the testing of the
+"import" mode as that should cover all parts other than the key generation.
+
+For an example of a test that verifies the ciphertext written to disk in the
+"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
+`Android's vts_kernel_encryption_test
+<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/main/encryption/>`_.
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:

 - user recovery feature description

-  Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
-  ``UBLK_F_USER_RECOVERY_REISSUE``.
+  Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
+  ``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
+  enable recovery of ublk devices after the ublk server exits, the ublk server
+  should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
+  ublk server may additionally specify at most one of
+  ``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
+  modify how I/O is handled while the ublk server is dying/dead (this is called
+  the ``nosrv`` case in the driver code).

-  With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
+  With just ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
  handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
  recovery stage and ublk device ID is kept. It is ublk server's
  responsibility to recover the device context by its own knowledge.
  Requests which have not been issued to userspace are requeued. Requests
  which have been issued to userspace are aborted.

-  With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
-  server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
+  With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after one ubq_daemon
+  (ublk server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
  requests which have been issued to userspace are requeued and will be
  re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
  ``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
  double-write since the driver may issue the same I/O request twice. It
  might be useful to a read-only FS or a VM backend.

+  With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
+  exits, requests which have issued to userspace are failed, as are any
+  subsequently issued requests. Applications continuously issuing I/O against
+  devices with this flag set will see a stream of I/O errors until a new ublk
+  server recovers the device.
+
 Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
 Once the flag is set, all control commands can be sent by unprivileged
 user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@ -835,7 +835,7 @@ section named by ``btf_ext_info_sec->sec_name_off``.
 See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
 for more information on CO-RE relocations.

-4.2 .BTF_ids section
+4.3 .BTF_ids section
 --------------------

 The .BTF_ids section encodes BTF ID values that are used within the kernel.
@ -896,6 +896,81 @@ and is used as a filter when resolving the BTF ID value.
 All the BTF ID lists and sets are compiled in the .BTF_ids section and
 resolved during the linking phase of kernel build by ``resolve_btfids`` tool.

+4.4 .BTF.base section
+---------------------
+Split BTF - where the .BTF section only contains types not in the associated
+base .BTF section - is an extremely efficient way to encode type information
+for kernel modules, since they generally consist of a few module-specific
+types along with a large set of shared kernel types. The former are encoded
+in split BTF, while the latter are encoded in base BTF, resulting in more
+compact representations. A type in split BTF that refers to a type in
+base BTF refers to it using its base BTF ID, and split BTF IDs start
+at last_base_BTF_ID + 1.
+
+The downside of this approach however is that this makes the split BTF
+somewhat brittle - when the base BTF changes, base BTF ID references are
+no longer valid and the split BTF itself becomes useless. The role of the
+.BTF.base section is to make split BTF more resilient for cases where
+the base BTF may change, as is the case for kernel modules not built every
+time the kernel is for example. .BTF.base contains named base types; INTs,
+FLOATs, STRUCTs, UNIONs, ENUM[64]s and FWDs. INTs and FLOATs are fully
+described in .BTF.base sections, while composite types like structs
+and unions are not fully defined - the .BTF.base type simply serves as
+a description of the type the split BTF referred to, so structs/unions
+have 0 members in the .BTF.base section. ENUM[64]s are similarly recorded
+with 0 members. Any other types are added to the split BTF. This
+distillation process then leaves us with a .BTF.base section with
+such minimal descriptions of base types and .BTF split section which refers
+to those base types. Later, we can relocate the split BTF using both the
+information stored in the .BTF.base section and the new .BTF base; the type
+information in the .BTF.base section allows us to update the split BTF
+references to point at the corresponding new base BTF IDs.
+
+BTF relocation happens on kernel module load when a kernel module has a
+.BTF.base section, and libbpf also provides a btf__relocate() API to
+accomplish this.
+
+As an example consider the following base BTF::
+
+      [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+      [2] STRUCT 'foo' size=8 vlen=2
+              'f1' type_id=1 bits_offset=0
+              'f2' type_id=1 bits_offset=32
+
+...and associated split BTF::
+
+      [3] PTR '(anon)' type_id=2
+
+i.e. split BTF describes a pointer to struct foo { int f1; int f2 };
+
+.BTF.base will consist of::
+
+      [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+      [2] STRUCT 'foo' size=8 vlen=0
+
+If we relocate the split BTF later using the following new base BTF::
+
+      [1] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
+      [2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+      [3] STRUCT 'foo' size=8 vlen=2
+              'f1' type_id=2 bits_offset=0
+              'f2' type_id=2 bits_offset=32
+
+...we can use our .BTF.base description to know that the split BTF reference
+is to struct foo, and relocation results in new split BTF::
+
+      [4] PTR '(anon)' type_id=3
+
+Note that we had to update BTF ID and start BTF ID for the split BTF.
+
+So we see how .BTF.base plays the role of facilitating later relocation,
+leading to more resilient split BTF.
+
+.BTF.base sections will be generated automatically for out-of-tree kernel module
+builds - i.e. where KBUILD_EXTMOD is set (as it would be for "make M=path/2/mod"
+cases). .BTF.base generation requires pahole support for the "distilled_base"
+BTF feature; this is available in pahole v1.28 and later.
+
 5. Using BTF
 ============

--- a/Documentation/core-api/cgroup.rst
+++ b/Documentation/core-api/cgroup.rst
@ -0,0 +1,9 @@
+==================
+Cgroup Kernel APIs
+==================
+
+Device Memory Cgroup API (dmemcg)
+=================================
+.. kernel-doc:: kernel/cgroup/dmem.c
+   :export:
+
--- a/Documentation/core-api/folio_queue.rst
+++ b/Documentation/core-api/folio_queue.rst
@ -151,19 +151,16 @@ The marks can be set by::

 	void folioq_mark(struct folio_queue *folioq, unsigned int slot);
 	void folioq_mark2(struct folio_queue *folioq, unsigned int slot);
-	void folioq_mark3(struct folio_queue *folioq, unsigned int slot);

 Cleared by::

 	void folioq_unmark(struct folio_queue *folioq, unsigned int slot);
 	void folioq_unmark2(struct folio_queue *folioq, unsigned int slot);
-	void folioq_unmark3(struct folio_queue *folioq, unsigned int slot);

 And the marks can be queried by::

 	bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot);
 	bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot);
-	bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot);

 The marks can be used for any purpose and are not interpreted by this API.

--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@ -108,6 +108,7 @@ more memory-management documentation in Documentation/mm/index.rst.
   dma-isa-lpc
   swiotlb
   mm-api
+   cgroup
   genalloc
   pin_user_pages
   boot-time-mm
--- a/Documentation/core-api/swiotlb.rst
+++ b/Documentation/core-api/swiotlb.rst
@ -295,9 +295,9 @@ slot set.

 Fourth, the io_tlb_slot array keeps track of any "padding slots" allocated to
 meet alloc_align_mask requirements described above. When
-swiotlb_tlb_map_single() allocates bounce buffer space to meet alloc_align_mask
+swiotlb_tbl_map_single() allocates bounce buffer space to meet alloc_align_mask
 requirements, it may allocate pre-padding space across zero or more slots. But
-when swiotbl_tlb_unmap_single() is called with the bounce buffer address, the
+when swiotlb_tbl_unmap_single() is called with the bounce buffer address, the
 alloc_align_mask value that governed the allocation, and therefore the
 allocation of any padding slots, is not known. The "pad_slots" field records
 the number of padding slots so that swiotlb_tbl_unmap_single() can free them.
--- a/Documentation/core-api/symbol-namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
@ -41,12 +41,12 @@ entries.
 In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
 exporting of kernel symbols to the kernel symbol table, variants of these are
 available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
-EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
-Please note that due to macro expansion that argument needs to be a
-preprocessor symbol. E.g. to export the symbol ``usb_stor_suspend`` into the
+EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace as a
+string constant. Note that this string must not contain whitespaces.
+E.g. to export the symbol ``usb_stor_suspend`` into the
 namespace ``USB_STORAGE``, use::

-	EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
+	EXPORT_SYMBOL_NS(usb_stor_suspend, "USB_STORAGE");

 The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
 ``namespace`` set accordingly. A symbol that is exported without a namespace will
@ -68,7 +68,7 @@ is to define the default namespace in the ``Makefile`` of the subsystem. E.g. to
 export all symbols defined in usb-common into the namespace USB_COMMON, add a
 line like this to drivers/usb/common/Makefile::

-	ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
+	ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"'

 That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
 symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
@ -79,7 +79,7 @@ A second option to define the default namespace is directly in the compilation
 unit as preprocessor statement. The above example would then read::

 	#undef  DEFAULT_SYMBOL_NAMESPACE
-	#define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
+	#define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON"

 within the corresponding compilation unit before any EXPORT_SYMBOL macro is
 used.
@ -94,7 +94,7 @@ for the namespaces it uses symbols from. E.g. a module using the
 usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
 using a statement like::

-	MODULE_IMPORT_NS(USB_STORAGE);
+	MODULE_IMPORT_NS("USB_STORAGE");

 This will create a ``modinfo`` tag in the module for each imported namespace.
 This has the side effect, that the imported namespaces of a module can be
--- a/Documentation/dev-tools/autofdo.rst
+++ b/Documentation/dev-tools/autofdo.rst
@ -0,0 +1,168 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Using AutoFDO with the Linux kernel
+===================================
+
+This enables AutoFDO build support for the kernel when using
+the Clang compiler. AutoFDO (Auto-Feedback-Directed Optimization)
+is a type of profile-guided optimization (PGO) used to enhance the
+performance of binary executables. It gathers information about the
+frequency of execution of various code paths within a binary using
+hardware sampling. This data is then used to guide the compiler's
+optimization decisions, resulting in a more efficient binary. AutoFDO
+is a powerful optimization technique, and data indicates that it can
+significantly improve kernel performance. It's especially beneficial
+for workloads affected by front-end stalls.
+
+For AutoFDO builds, unlike non-FDO builds, the user must supply a
+profile. Acquiring an AutoFDO profile can be done in several ways.
+AutoFDO profiles are created by converting hardware sampling using
+the "perf" tool. It is crucial that the workload used to create these
+perf files is representative; they must exhibit runtime
+characteristics similar to the workloads that are intended to be
+optimized. Failure to do so will result in the compiler optimizing
+for the wrong objective.
+
+The AutoFDO profile often encapsulates the program's behavior. If the
+performance-critical codes are architecture-independent, the profile
+can be applied across platforms to achieve performance gains. For
+instance, using the profile generated on Intel architecture to build
+a kernel for AMD architecture can also yield performance improvements.
+
+There are two methods for acquiring a representative profile:
+(1) Sample real workloads using a production environment.
+(2) Generate the profile using a representative load test.
+When enabling the AutoFDO build configuration without providing an
+AutoFDO profile, the compiler only modifies the dwarf information in
+the kernel without impacting runtime performance. It's advisable to
+use a kernel binary built with the same AutoFDO configuration to
+collect the perf profile. While it's possible to use a kernel built
+with different options, it may result in inferior performance.
+
+One can collect profiles using AutoFDO build for the previous kernel.
+AutoFDO employs relative line numbers to match the profiles, offering
+some tolerance for source changes. This mode is commonly used in a
+production environment for profile collection.
+
+In a profile collection based on a load test, the AutoFDO collection
+process consists of the following steps:
+
+#. Initial build: The kernel is built with AutoFDO options
+   without a profile.
+
+#. Profiling: The above kernel is then run with a representative
+   workload to gather execution frequency data. This data is
+   collected using hardware sampling, via perf. AutoFDO is most
+   effective on platforms supporting advanced PMU features like
+   LBR on Intel machines.
+
+#. AutoFDO profile generation: Perf output file is converted to
+   the AutoFDO profile via offline tools.
+
+The support requires a Clang compiler LLVM 17 or later.
+
+Preparation
+===========
+
+Configure the kernel with::
+
+   CONFIG_AUTOFDO_CLANG=y
+
+Customization
+=============
+
+The default CONFIG_AUTOFDO_CLANG setting covers kernel space objects for
+AutoFDO builds. One can, however, enable or disable AutoFDO build for
+individual files and directories by adding a line similar to the following
+to the respective kernel Makefile:
+
+- For enabling a single file (e.g. foo.o) ::
+
+   AUTOFDO_PROFILE_foo.o := y
+
+- For enabling all files in one directory ::
+
+   AUTOFDO_PROFILE := y
+
+- For disabling one file ::
+
+   AUTOFDO_PROFILE_foo.o := n
+
+- For disabling all files in one directory ::
+
+   AUTOFDO_PROFILE := n
+
+Workflow
+========
+
+Here is an example workflow for AutoFDO kernel:
+
+1)  Build the kernel on the host machine with LLVM enabled,
+    for example, ::
+
+      $ make menuconfig LLVM=1
+
+    Turn on AutoFDO build config::
+
+      CONFIG_AUTOFDO_CLANG=y
+
+    With a configuration that with LLVM enabled, use the following command::
+
+      $ scripts/config -e AUTOFDO_CLANG
+
+    After getting the config, build with ::
+
+      $ make LLVM=1
+
+2) Install the kernel on the test machine.
+
+3) Run the load tests. The '-c' option in perf specifies the sample
+   event period. We suggest using a suitable prime number, like 500009,
+   for this purpose.
+
+   - For Intel platforms::
+
+      $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+   - For AMD platforms:
+
+     The supported systems are: Zen3 with BRS, or Zen4 with amd_lbr_v2. To check,
+
+     For Zen3::
+
+      $ cat proc/cpuinfo | grep " brs"
+
+     For Zen4::
+
+      $ cat proc/cpuinfo | grep amd_lbr_v2
+
+     The following command generated the perf data file::
+
+      $ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+4) (Optional) Download the raw perf file to the host machine.
+
+5) To generate an AutoFDO profile, two offline tools are available:
+   create_llvm_prof and llvm_profgen. The create_llvm_prof tool is part
+   of the AutoFDO project and can be found on GitHub
+   (https://github.com/google/autofdo), version v0.30.1 or later.
+   The llvm_profgen tool is included in the LLVM compiler itself. It's
+   important to note that the version of llvm_profgen doesn't need to match
+   the version of Clang. It needs to be the LLVM 19 release of Clang
+   or later, or just from the LLVM trunk. ::
+
+      $ llvm-profgen --kernel --binary=<vmlinux> --perfdata=<perf_file> -o <profile_file>
+
+   or ::
+
+      $ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> --format=extbinary --out=<profile_file>
+
+   Note that multiple AutoFDO profile files can be merged into one via::
+
+      $ llvm-profdata merge -o <profile_file> <profile_1> <profile_2> ... <profile_n>
+
+6) Rebuild the kernel using the AutoFDO profile file with the same config as step 1,
+   (Note CONFIG_AUTOFDO_CLANG needs to be enabled)::
+
+      $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file>
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@ -34,6 +34,8 @@ Documentation/dev-tools/testing-overview.rst
   ktap
   checkuapi
   gpio-sloppy-logic-analyzer
+   autofdo
+   propeller


 .. only::  subproject and html
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@ -511,19 +511,14 @@ Tests
 ~~~~~

 There are KASAN tests that allow verifying that KASAN works and can detect
-certain types of memory corruptions. The tests consist of two parts:
+certain types of memory corruptions.

-1. Tests that are integrated with the KUnit Test Framework. Enabled with
-``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified
+All KASAN tests are integrated with the KUnit Test Framework and can be enabled
+via ``CONFIG_KASAN_KUNIT_TEST``. The tests can be run and partially verified
 automatically in a few different ways; see the instructions below.

-2. Tests that are currently incompatible with KUnit. Enabled with
-``CONFIG_KASAN_MODULE_TEST`` and can only be run as a module. These tests can
-only be verified manually by loading the kernel module and inspecting the
-kernel log for KASAN reports.
-
-Each KUnit-compatible KASAN test prints one of multiple KASAN reports if an
-error is detected. Then the test prints its number and status.
+Each KASAN test prints one of multiple KASAN reports if an error is detected.
+Then the test prints its number and status.

 When a test passes::

@ -550,16 +545,16 @@ Or, if one of the tests failed::

        not ok 1 - kasan

-There are a few ways to run KUnit-compatible KASAN tests.
+There are a few ways to run the KASAN tests.

 1. Loadable module

-   With ``CONFIG_KUNIT`` enabled, KASAN-KUnit tests can be built as a loadable
-   module and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.
+   With ``CONFIG_KUNIT`` enabled, the tests can be built as a loadable module
+   and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.

 2. Built-In

-   With ``CONFIG_KUNIT`` built-in, KASAN-KUnit tests can be built-in as well.
+   With ``CONFIG_KUNIT`` built-in, the tests can be built-in as well.
   In this case, the tests will run at boot as a late-init call.

 3. Using kunit_tool
--- a/Documentation/dev-tools/kmemleak.rst
+++ b/Documentation/dev-tools/kmemleak.rst
@ -161,6 +161,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
 - ``kmemleak_free_percpu``	 - notify of a percpu memory block freeing
 - ``kmemleak_update_trace``	 - update object allocation stack trace
 - ``kmemleak_not_leak``	 - mark an object as not a leak
+- ``kmemleak_transient_leak``	 - mark an object as a transient leak
 - ``kmemleak_ignore``		 - do not scan or report an object as leak
 - ``kmemleak_scan_area``	 - add scan areas inside a memory block
 - ``kmemleak_no_scan``	 - do not scan a memory block
--- a/Documentation/dev-tools/propeller.rst
+++ b/Documentation/dev-tools/propeller.rst
@ -0,0 +1,162 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Using Propeller with the Linux kernel
+=====================================
+
+This enables Propeller build support for the kernel when using Clang
+compiler. Propeller is a profile-guided optimization (PGO) method used
+to optimize binary executables. Like AutoFDO, it utilizes hardware
+sampling to gather information about the frequency of execution of
+different code paths within a binary. Unlike AutoFDO, this information
+is then used right before linking phase to optimize (among others)
+block layout within and across functions.
+
+A few important notes about adopting Propeller optimization:
+
+#. Although it can be used as a standalone optimization step, it is
+   strongly recommended to apply Propeller on top of AutoFDO,
+   AutoFDO+ThinLTO or Instrument FDO. The rest of this document
+   assumes this paradigm.
+
+#. Propeller uses another round of profiling on top of
+   AutoFDO/AutoFDO+ThinLTO/iFDO. The whole build process involves
+   "build-afdo - train-afdo - build-propeller - train-propeller -
+   build-optimized".
+
+#. Propeller requires LLVM 19 release or later for Clang/Clang++
+   and the linker(ld.lld).
+
+#. In addition to LLVM toolchain, Propeller requires a profiling
+   conversion tool: https://github.com/google/autofdo with a release
+   after v0.30.1: https://github.com/google/autofdo/releases/tag/v0.30.1.
+
+The Propeller optimization process involves the following steps:
+
+#. Initial building: Build the AutoFDO or AutoFDO+ThinLTO binary as
+   you would normally do, but with a set of compile-time / link-time
+   flags, so that a special metadata section is created within the
+   kernel binary. The special section is only intend to be used by the
+   profiling tool, it is not part of the runtime image, nor does it
+   change kernel run time text sections.
+
+#. Profiling: The above kernel is then run with a representative
+   workload to gather execution frequency data. This data is collected
+   using hardware sampling, via perf. Propeller is most effective on
+   platforms supporting advanced PMU features like LBR on Intel
+   machines. This step is the same as profiling the kernel for AutoFDO
+   (the exact perf parameters can be different).
+
+#. Propeller profile generation: Perf output file is converted to a
+   pair of Propeller profiles via an offline tool.
+
+#. Optimized build: Build the AutoFDO or AutoFDO+ThinLTO optimized
+   binary as you would normally do, but with a compile-time /
+   link-time flag to pick up the Propeller compile time and link time
+   profiles. This build step uses 3 profiles - the AutoFDO profile,
+   the Propeller compile-time profile and the Propeller link-time
+   profile.
+
+#. Deployment: The optimized kernel binary is deployed and used
+   in production environments, providing improved performance
+   and reduced latency.
+
+Preparation
+===========
+
+Configure the kernel with::
+
+   CONFIG_AUTOFDO_CLANG=y
+   CONFIG_PROPELLER_CLANG=y
+
+Customization
+=============
+
+The default CONFIG_PROPELLER_CLANG setting covers kernel space objects
+for Propeller builds. One can, however, enable or disable Propeller build
+for individual files and directories by adding a line similar to the
+following to the respective kernel Makefile:
+
+- For enabling a single file (e.g. foo.o)::
+
+   PROPELLER_PROFILE_foo.o := y
+
+- For enabling all files in one directory::
+
+   PROPELLER_PROFILE := y
+
+- For disabling one file::
+
+   PROPELLER_PROFILE_foo.o := n
+
+- For disabling all files in one directory::
+
+   PROPELLER__PROFILE := n
+
+
+Workflow
+========
+
+Here is an example workflow for building an AutoFDO+Propeller kernel:
+
+1) Assuming an AutoFDO profile is already collected following
+   instructions in the AutoFDO document, build the kernel on the host
+   machine, with AutoFDO and Propeller build configs ::
+
+      CONFIG_AUTOFDO_CLANG=y
+      CONFIG_PROPELLER_CLANG=y
+
+   and ::
+
+      $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo-profile-name>
+
+2) Install the kernel on the test machine.
+
+3) Run the load tests. The '-c' option in perf specifies the sample
+   event period. We suggest using a suitable prime number, like 500009,
+   for this purpose.
+
+   - For Intel platforms::
+
+      $ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+   - For AMD platforms::
+
+      $ perf record --pfm-event RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
+
+   Note you can repeat the above steps to collect multiple <perf_file>s.
+
+4) (Optional) Download the raw perf file(s) to the host machine.
+
+5) Use the create_llvm_prof tool (https://github.com/google/autofdo) to
+   generate Propeller profile. ::
+
+      $ create_llvm_prof --binary=<vmlinux> --profile=<perf_file>
+                         --format=propeller --propeller_output_module_name
+                         --out=<propeller_profile_prefix>_cc_profile.txt
+                         --propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
+
+   "<propeller_profile_prefix>" can be something like "/home/user/dir/any_string".
+
+   This command generates a pair of Propeller profiles:
+   "<propeller_profile_prefix>_cc_profile.txt" and
+   "<propeller_profile_prefix>_ld_profile.txt".
+
+   If there are more than 1 perf_file collected in the previous step,
+   you can create a temp list file "<perf_file_list>" with each line
+   containing one perf file name and run::
+
+      $ create_llvm_prof --binary=<vmlinux> --profile=@<perf_file_list>
+                         --format=propeller --propeller_output_module_name
+                         --out=<propeller_profile_prefix>_cc_profile.txt
+                         --propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
+
+6) Rebuild the kernel using the AutoFDO and Propeller
+   profiles. ::
+
+      CONFIG_AUTOFDO_CLANG=y
+      CONFIG_PROPELLER_CLANG=y
+
+   and ::
+
+      $ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file> CLANG_PROPELLER_PROFILE_PREFIX=<propeller_profile_prefix>
--- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Ceva AHCI SATA Controller

 maintainers:
-  - Mubin Sayyed <mubin.sayyed@amd.com>
  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>

 description: |
--- a/Documentation/devicetree/bindings/connector/usb-connector.yaml
+++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml
@ -253,6 +253,53 @@ properties:

    additionalProperties: false

+  sink-wait-cap-time-ms:
+    description: Represents the max time in ms that USB Type-C port (in sink
+      role) should wait for the port partner (source role) to send source caps.
+      SinkWaitCap timer starts when port in sink role attaches to the source.
+      This timer will stop when sink receives PD source cap advertisement before
+      timeout in which case it'll move to capability negotiation stage. A
+      timeout leads to a hard reset message by the port.
+    minimum: 310
+    maximum: 620
+    default: 310
+
+  ps-source-off-time-ms:
+    description: Represents the max time in ms that a DRP in source role should
+      take to turn off power after the PsSourceOff timer starts. PsSourceOff
+      timer starts when a sink's PHY layer receives EOP of the GoodCRC message
+      (corresponding to an Accept message sent in response to a PR_Swap or a
+      FR_Swap request). This timer stops when last bit of GoodCRC EOP
+      corresponding to the received PS_RDY message is transmitted by the PHY
+      layer. A timeout shall lead to error recovery in the type-c port.
+    minimum: 750
+    maximum: 920
+    default: 920
+
+  cc-debounce-time-ms:
+    description: Represents the max time in ms that a port shall wait to
+      determine if it's attached to a partner.
+    minimum: 100
+    maximum: 200
+    default: 200
+
+  sink-bc12-completion-time-ms:
+    description: Represents the max time in ms that a port in sink role takes
+      to complete Battery Charger (BC1.2) Detection. BC1.2 detection is a
+      hardware mechanism, which in some TCPC implementations, can run in
+      parallel once the Type-C connection state machine reaches the "potential
+      connect as sink" state. In TCPCs where this causes delays to respond to
+      the incoming PD messages, sink-bc12-completion-time-ms is used to delay
+      PD negotiation till BC1.2 detection completes.
+    default: 0
+
+  pd-revision:
+    description: Specifies the maximum USB PD revision and version supported by
+      the connector. This property is specified in the following order;
+      <revision_major, revision_minor, version_major, version_minor>.
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    maxItems: 4
+
 dependencies:
  sink-vdos-v1: [ sink-vdos ]
  sink-vdos: [ sink-vdos-v1 ]
@ -380,7 +427,7 @@ examples:
    };

  # USB-C connector attached to a typec port controller(ptn5110), which has
-  # power delivery support and enables drp.
+  # power delivery support, explicitly defines time properties and enables drp.
  - |
    #include <dt-bindings/usb/pd.h>
    typec: ptn5110 {
@ -393,6 +440,10 @@ examples:
            sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
                         PDO_VAR(5000, 12000, 2000)>;
            op-sink-microwatt = <10000000>;
+            sink-wait-cap-time-ms = <465>;
+            ps-source-off-time-ms = <835>;
+            cc-debounce-time-ms = <101>;
+            sink-bc12-completion-time-ms = <500>;
        };
    };

--- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
+++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml
@ -12,7 +12,6 @@ description:
  PS_MODE). Every pin can be configured as input/output.

 maintainers:
-  - Mubin Sayyed <mubin.sayyed@amd.com>
  - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>

 properties:
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@ -36,10 +36,12 @@ properties:
        items:
          - enum:
              - qcom,qcm2290-smmu-500
+              - qcom,qcs615-smmu-500
              - qcom,qcs8300-smmu-500
              - qcom,qdu1000-smmu-500
              - qcom,sa8255p-smmu-500
              - qcom,sa8775p-smmu-500
+              - qcom,sar2130p-smmu-500
              - qcom,sc7180-smmu-500
              - qcom,sc7280-smmu-500
              - qcom,sc8180x-smmu-500
@ -88,6 +90,7 @@ properties:
              - qcom,qcm2290-smmu-500
              - qcom,sa8255p-smmu-500
              - qcom,sa8775p-smmu-500
+              - qcom,sar2130p-smmu-500
              - qcom,sc7280-smmu-500
              - qcom,sc8180x-smmu-500
              - qcom,sc8280xp-smmu-500
@ -524,6 +527,7 @@ allOf:
        compatible:
          items:
            - enum:
+                - qcom,sar2130p-smmu-500
                - qcom,sm8550-smmu-500
                - qcom,sm8650-smmu-500
                - qcom,x1e80100-smmu-500
@ -555,6 +559,7 @@ allOf:
              - cavium,smmu-v2
              - marvell,ap806-smmu-500
              - nvidia,smmu-500
+              - qcom,qcs615-smmu-500
              - qcom,qcs8300-smmu-500
              - qcom,qdu1000-smmu-500
              - qcom,sa8255p-smmu-500
--- a/Documentation/devicetree/bindings/media/video-interfaces.yaml
+++ b/Documentation/devicetree/bindings/media/video-interfaces.yaml
@ -210,6 +210,27 @@ properties:
      lane-polarities property is omitted, the value must be interpreted as 0
      (normal). This property is valid for serial busses only.

+  line-orders:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 8
+    items:
+      enum:
+        - 0 # ABC
+        - 1 # ACB
+        - 2 # BAC
+        - 3 # BCA
+        - 4 # CAB
+        - 5 # CBA
+    description:
+      An array of line orders of the CSI-2 C-PHY data lanes. The order of the
+      lanes are the same as in data-lanes property. Valid values are 0-5 as
+      defined in the MIPI Discovery and Configuration (DisCo) Specification for
+      Imaging. The length of the array must be the same length as the
+      data-lanes property. If the line-orders property is omitted, the value
+      shall be interpreted as 0 (ABC). This property is valid for CSI-2 C-PHY
+      busses only.
+
  strobe:
    $ref: /schemas/types.yaml#/definitions/uint32
    enum: [ 0, 1 ]
--- a/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt
+++ b/Documentation/devicetree/bindings/mfd/motorola-cpcap.txt
@ -31,6 +31,10 @@ node must be named "audio-codec".
 Required properties for the audio-codec subnode:

 - #sound-dai-cells = <1>;
+- interrupts		: should contain jack detection interrupts, with headset
+			  detect interrupt matching "hs" and microphone bias 2
+			  detect interrupt matching "mb2" in interrupt-names.
+- interrupt-names	: Contains "hs", "mb2"

 The audio-codec provides two DAIs. The first one is connected to the
 Stereo HiFi DAC and the second one is connected to the Voice DAC.
@ -52,6 +56,8 @@ Example:

 		audio-codec {
 			#sound-dai-cells = <1>;
+			interrupts-extended = <&cpcap 9 0>, <&cpcap 10 0>;
+			interrupt-names = "hs", "mb2";

 			/* HiFi */
 			port@0 {
--- a/Documentation/devicetree/bindings/mmc/mmc-card.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-card.yaml
@ -13,6 +13,10 @@ description: |
  This documents describes the devicetree bindings for a mmc-host controller
  child node describing a mmc-card / an eMMC.

+  It's possible to define a fixed partition table for an eMMC for the user
+  partition, the 2 BOOT partition (boot1/2) and the 4 GP (gp1/2/3/4) if supported
+  by the eMMC.
+
 properties:
  compatible:
    const: mmc-card
@ -26,6 +30,24 @@ properties:
      Use this to indicate that the mmc-card has a broken hpi
      implementation, and that hpi should not be used.

+patternProperties:
+  "^partitions(-boot[12]|-gp[14])?$":
+    $ref: /schemas/mtd/partitions/partitions.yaml
+
+    patternProperties:
+      "^partition@[0-9a-f]+$":
+        $ref: /schemas/mtd/partitions/partition.yaml
+
+        properties:
+          reg:
+            description: Must be multiple of 512 as it's converted
+              internally from bytes to SECTOR_SIZE (512 bytes)
+
+        required:
+          - reg
+
+        unevaluatedProperties: false
+
 required:
  - compatible
  - reg
@ -42,6 +64,36 @@ examples:
            compatible = "mmc-card";
            reg = <0>;
            broken-hpi;
+
+            partitions {
+                compatible = "fixed-partitions";
+
+                #address-cells = <1>;
+                #size-cells = <1>;
+
+                partition@0 {
+                    label = "kernel"; /* Kernel */
+                    reg = <0x0 0x2000000>; /* 32 MB */
+                };
+
+                partition@2000000 {
+                    label = "rootfs";
+                    reg = <0x2000000 0x40000000>; /* 1GB */
+                };
+            };
+
+            partitions-boot1 {
+                compatible = "fixed-partitions";
+
+                #address-cells = <1>;
+                #size-cells = <1>;
+
+                partition@0 {
+                    label = "bl";
+                    reg = <0x0 0x2000000>; /* 32MB */
+                    read-only;
+                };
+            };
        };
    };

--- a/Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml
+++ b/Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml
@ -0,0 +1,122 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/aeonsemi,as21xxx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Aeonsemi AS21XXX Ethernet PHY
+
+maintainers:
+  - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+  Aeonsemi AS21xxx Ethernet PHYs requires a firmware to be loaded to actually
+  work. The same firmware is compatible with various PHYs of the same family.
+
+  A PHY with not firmware loaded will be exposed on the MDIO bus with ID
+  0x7500 0x7500 or 0x7500 0x9410 on C45 registers.
+
+  This can be done and is implemented by OEM in 2 different way:
+    - Attached SPI flash directly to the PHY with the firmware. The PHY
+      will self load the firmware in the presence of this configuration.
+    - Manually provided firmware loaded from a file in the filesystem.
+
+  Each PHY can support up to 5 LEDs.
+
+  AS2xxx PHY Name logic:
+
+  AS21x1xxB1
+      ^ ^^
+      | |J: Supports SyncE/PTP
+      | |P: No SyncE/PTP support
+      | 1: Supports 2nd Serdes
+      | 2: Not 2nd Serdes support
+      0: 10G, 5G, 2.5G
+      5: 5G, 2.5G
+      2: 2.5G
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - ethernet-phy-id7500.9410
+          - ethernet-phy-id7500.9402
+          - ethernet-phy-id7500.9412
+          - ethernet-phy-id7500.9422
+          - ethernet-phy-id7500.9432
+          - ethernet-phy-id7500.9442
+          - ethernet-phy-id7500.9452
+          - ethernet-phy-id7500.9462
+          - ethernet-phy-id7500.9472
+          - ethernet-phy-id7500.9482
+          - ethernet-phy-id7500.9492
+  required:
+    - compatible
+
+properties:
+  reg:
+    maxItems: 1
+
+  firmware-name:
+    description: specify the name of PHY firmware to load
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: ethernet-phy-id7500.9410
+then:
+  required:
+    - firmware-name
+else:
+  properties:
+    firmware-name: false
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/leds/common.h>
+
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@1f {
+            compatible = "ethernet-phy-id7500.9410",
+                         "ethernet-phy-ieee802.3-c45";
+
+            reg = <31>;
+            firmware-name = "as21x1x_fw.bin";
+
+            leds {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                led@0 {
+                    reg = <0>;
+                    color = <LED_COLOR_ID_GREEN>;
+                    function = LED_FUNCTION_LAN;
+                    function-enumerator = <0>;
+                    default-state = "keep";
+                };
+
+                led@1 {
+                    reg = <1>;
+                    color = <LED_COLOR_ID_GREEN>;
+                    function = LED_FUNCTION_LAN;
+                    function-enumerator = <1>;
+                    default-state = "keep";
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
@ -19,6 +19,7 @@ properties:
      - qcom,qca2066-bt
      - qcom,qca6174-bt
      - qcom,qca9377-bt
+      - qcom,wcn3950-bt
      - qcom,wcn3988-bt
      - qcom,wcn3990-bt
      - qcom,wcn3991-bt
@ -101,7 +102,10 @@ properties:
  max-speed: true

  firmware-name:
-    description: specify the name of nvm firmware to load
+    minItems: 1
+    items:
+      - description: specify the name of nvm firmware to load
+      - description: specify the name of rampatch firmware to load

  local-bd-address: true

@ -135,6 +139,7 @@ allOf:
        compatible:
          contains:
            enum:
+              - qcom,wcn3950-bt
              - qcom,wcn3988-bt
              - qcom,wcn3990-bt
              - qcom,wcn3991-bt
@ -154,16 +159,11 @@ allOf:
              - qcom,wcn6750-bt
    then:
      required:
-        - enable-gpios
-        - swctrl-gpios
-        - vddio-supply
        - vddaon-supply
-        - vddbtcxmx-supply
        - vddrfacmn-supply
        - vddrfa0p8-supply
        - vddrfa1p7-supply
        - vddrfa1p2-supply
-        - vddasd-supply
  - if:
      properties:
        compatible:
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@ -158,6 +158,27 @@ properties:
      Mark the corresponding energy efficient ethernet mode as
      broken and request the ethernet to stop advertising it.

+  timing-role:
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - forced-master
+      - forced-slave
+      - preferred-master
+      - preferred-slave
+    description: |
+      Specifies the timing role of the PHY in the network link. This property is
+      required for setups where the role must be explicitly assigned via the
+      device tree due to limitations in hardware strapping or incorrect strap
+      configurations.
+      It is applicable to Single Pair Ethernet (1000/100/10Base-T1) and other
+      PHY types, including 1000Base-T, where it controls whether the PHY should
+      be a master (clock source) or a slave (clock receiver).
+
+      - 'forced-master': The PHY is forced to operate as a master.
+      - 'forced-slave': The PHY is forced to operate as a slave.
+      - 'preferred-master': Prefer the PHY to be master but allow negotiation.
+      - 'preferred-slave': Prefer the PHY to be slave but allow negotiation.
+
  pses:
    $ref: /schemas/types.yaml#/definitions/phandle-array
    maxItems: 1
@ -211,6 +232,22 @@ properties:
      PHY's that have configurable TX internal delays. If this property is
      present then the PHY applies the TX delay.

+  tx-amplitude-100base-tx-percent:
+    description:
+      Transmit amplitude gain applied for 100BASE-TX. 100% matches 2V
+      peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default
+      will be left as is.
+
+  mac-termination-ohms:
+    maximum: 200
+    description:
+      The xMII signals need series termination on the driver side to match both
+      the output driver impedance and the line characteristic impedance, to
+      prevent reflections and EMI problems. Select a resistance value which is
+      supported by the builtin resistors of the PHY, otherwise the resistors may
+      have to be placed on board. When omitted, the PHYs default will be left as
+      is.
+
  leds:
    type: object

--- a/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
+++ b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
@ -48,6 +48,12 @@ properties:
  firmware-name:
    description: specify the name of PHY firmware to load

+  marvell,mdi-cfg-order:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description:
+      force normal (0) or reverse (1) order of MDI pairs, overriding MDI_CFG bootstrap pin.
+
  nvmem-cells:
    description: phandle to the firmware nvmem cell
    maxItems: 1
--- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@ -62,6 +62,22 @@ allOf:
            reference clock output when RMII mode enabled.
            Only supported on TJA1100 and TJA1101.

+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ethernet-phy-id001b.b010
+              - ethernet-phy-id001b.b013
+              - ethernet-phy-id001b.b030
+              - ethernet-phy-id001b.b031
+
+    then:
+      properties:
+        nxp,rmii-refclk-out:
+          type: boolean
+          description: Enable 50MHz RMII reference clock output on REF_CLK pin.
+
 patternProperties:
  "^ethernet-phy@[0-9a-f]+$":
    type: object
--- a/Documentation/devicetree/bindings/net/rfkill-gpio.yaml
+++ b/Documentation/devicetree/bindings/net/rfkill-gpio.yaml
@ -32,6 +32,10 @@ properties:
  shutdown-gpios:
    maxItems: 1

+  default-blocked:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: configure rfkill state as blocked at boot
+
 required:
  - compatible
  - radio-type
@ -48,4 +52,5 @@ examples:
        label = "rfkill-pcie-wlan";
        radio-type = "wlan";
        shutdown-gpios = <&gpio2 25 GPIO_ACTIVE_HIGH>;
+        default-blocked;
    };
--- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
@ -96,6 +96,35 @@ properties:
      - master
      - slave

+  ti,gpio2-clk-out:
+    description: |
+       DP83822 PHY only.
+       The GPIO2 pin on the DP83822 can be configured as clock output. When
+       omitted, the PHY's default will be left as is.
+
+       - 'mac-if': In MII mode the clock frequency is 25-MHz, in RMII Mode the
+         clock frequency is 50-MHz and in RGMII Mode the clock frequency is
+         25-MHz.
+       - 'xi': XI clock(pass-through clock from XI pin).
+       - 'int-ref': Internal reference clock 25-MHz.
+       - 'rmii-master-mode-ref': RMII master mode reference clock 50-MHz. RMII
+         master mode reference clock is identical to MAC IF clock in RMII master
+         mode.
+       - 'free-running': Free running clock 125-MHz.
+       - 'recovered': Recovered clock is a 125-MHz recovered clock from a
+         connected link partner.
+    $ref: /schemas/types.yaml#/definitions/string
+    enum:
+      - mac-if
+      - xi
+      - int-ref
+      - rmii-master-mode-ref
+      - free-running
+      - recovered
+
+  mac-termination-ohms:
+    enum: [43, 44, 46, 48, 50, 53, 55, 58, 61, 65, 69, 73, 78, 84, 91, 99]
+
 required:
  - reg

@ -110,6 +139,8 @@ examples:
        reg = <0>;
        rx-internal-delay-ps = <1>;
        tx-internal-delay-ps = <1>;
+        ti,gpio2-clk-out = "xi";
+        mac-termination-ohms = <43>;
      };
    };

--- a/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm4329-fmac.yaml
@ -53,6 +53,7 @@ properties:
          - pci14e4,4488  # BCM4377
          - pci14e4,4425  # BCM4378
          - pci14e4,4433  # BCM4387
+          - pci14e4,449d  # BCM43752

  reg:
    description: SDIO function number for the device (for most cases
@ -121,6 +122,14 @@ properties:
      NVRAM. This would normally be filled in by the bootloader from platform
      configuration data.

+  clocks:
+    items:
+      - description: External Low Power Clock input (32.768KHz)
+
+  clock-names:
+    items:
+      - const: lpo
+
 required:
  - compatible
  - reg
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
@ -93,20 +93,41 @@ properties:

  ieee80211-freq-limit: true

-  qcom,ath10k-calibration-data:
+  qcom,calibration-data:
    $ref: /schemas/types.yaml#/definitions/uint8-array
    description:
      Calibration data + board-specific data as a byte array. The length
      can vary between hardware versions.

-  qcom,ath10k-calibration-variant:
+  qcom,ath10k-calibration-data:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    deprecated: true
+    description:
+      Calibration data + board-specific data as a byte array. The length
+      can vary between hardware versions.
+
+  qcom,calibration-variant:
    $ref: /schemas/types.yaml#/definitions/string
    description:
      Unique variant identifier of the calibration data in board-2.bin
      for designs with colliding bus and device specific ids

+  qcom,ath10k-calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
+    description:
+      Unique variant identifier of the calibration data in board-2.bin
+      for designs with colliding bus and device specific ids
+
+  qcom,pre-calibration-data:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Pre-calibration data as a byte array. The length can vary between
+      hardware versions.
+
  qcom,ath10k-pre-calibration-data:
    $ref: /schemas/types.yaml#/definitions/uint8-array
+    deprecated: true
    description:
      Pre-calibration data as a byte array. The length can vary between
      hardware versions.
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
@ -23,8 +23,15 @@ properties:
  reg:
    maxItems: 1

+  qcom,calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: |
+      string to uniquely identify variant of the calibration data for designs
+      with colliding bus and device ids
+
  qcom,ath11k-calibration-variant:
    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
    description: |
      string to uniquely identify variant of the calibration data for designs
      with colliding bus and device ids
@ -50,6 +57,9 @@ properties:
  vddrfa1p7-supply:
    description: VDD_RFA_1P7 supply regulator handle

+  vddrfa1p8-supply:
+    description: VDD_RFA_1P8 supply regulator handle
+
  vddpcie0p9-supply:
    description: VDD_PCIE_0P9 supply regulator handle

@ -77,6 +87,22 @@ allOf:
        - vddrfa1p7-supply
        - vddpcie0p9-supply
        - vddpcie1p8-supply
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: pci17cb,1103
+    then:
+      required:
+        - vddrfacmn-supply
+        - vddaon-supply
+        - vddwlcx-supply
+        - vddwlmx-supply
+        - vddrfa0p8-supply
+        - vddrfa1p2-supply
+        - vddrfa1p8-supply
+        - vddpcie0p9-supply
+        - vddpcie1p8-supply

 additionalProperties: false

@ -99,7 +125,17 @@ examples:
                compatible = "pci17cb,1103";
                reg = <0x10000 0x0 0x0 0x0 0x0>;

-                qcom,ath11k-calibration-variant = "LE_X13S";
+                vddrfacmn-supply = <&vreg_pmu_rfa_cmn_0p8>;
+                vddaon-supply = <&vreg_pmu_aon_0p8>;
+                vddwlcx-supply = <&vreg_pmu_wlcx_0p8>;
+                vddwlmx-supply = <&vreg_pmu_wlmx_0p8>;
+                vddpcie1p8-supply = <&vreg_pmu_pcie_1p8>;
+                vddpcie0p9-supply = <&vreg_pmu_pcie_0p9>;
+                vddrfa0p8-supply = <&vreg_pmu_rfa_0p8>;
+                vddrfa1p2-supply = <&vreg_pmu_rfa_1p2>;
+                vddrfa1p8-supply = <&vreg_pmu_rfa_1p7>;
+
+                qcom,calibration-variant = "LE_X13S";
            };
        };
    };
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@ -42,8 +42,15 @@ properties:
        * reg
        * reg-names

+  qcom,calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    description:
+      string to uniquely identify variant of the calibration data in the
+      board-2.bin for designs with colliding bus and device specific ids
+
  qcom,ath11k-calibration-variant:
    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
    description:
      string to uniquely identify variant of the calibration data in the
      board-2.bin for designs with colliding bus and device specific ids
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml
@ -0,0 +1,211 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/qcom,ath12k-wsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface
+
+maintainers:
+  - Jeff Johnson <jjohnson@kernel.org>
+  - Kalle Valo <kvalo@kernel.org>
+
+description: |
+  Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface.
+
+  The ath12k devices (QCN9274) feature WSI support. WSI stands for
+  WLAN Serial Interface. It is used for the exchange of specific
+  control information across radios based on the doorbell mechanism.
+  This WSI connection is essential to exchange control information
+  among these devices.
+
+  The WSI interface includes TX and RX ports, which are used to connect
+  multiple WSI-supported devices together, forming a WSI group.
+
+  Diagram to represent one WSI connection (one WSI group) among
+  three devices.
+
+           +-------+        +-------+        +-------+
+           | pcie1 |        | pcie2 |        | pcie3 |
+           |       |        |       |        |       |
+    +----->|  wsi  |------->|  wsi  |------->|  wsi  |-----+
+    |      | grp 0 |        | grp 0 |        | grp 0 |     |
+    |      +-------+        +-------+        +-------+     |
+    +------------------------------------------------------+
+
+  Diagram to represent two WSI connections (two separate WSI groups)
+  among four devices.
+
+       +-------+    +-------+          +-------+    +-------+
+       | pcie0 |    | pcie1 |          | pcie2 |    | pcie3 |
+       |       |    |       |          |       |    |       |
+   +-->|  wsi  |--->|  wsi  |--+   +-->|  wsi  |--->|  wsi  |--+
+   |   | grp 0 |    | grp 0 |  |   |   | grp 1 |    | grp 1 |  |
+   |   +-------+    +-------+  |   |   +-------+    +-------+  |
+   +---------------------------+   +---------------------------+
+
+properties:
+  compatible:
+    enum:
+      - pci17cb,1109  # QCN9274
+
+  reg:
+    maxItems: 1
+
+  qcom,calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    description:
+      String to uniquely identify variant of the calibration data for designs
+      with colliding bus and device ids
+
+  qcom,ath12k-calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    deprecated: true
+    description:
+      String to uniquely identify variant of the calibration data for designs
+      with colliding bus and device ids
+
+  qcom,wsi-controller:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The WSI controller device in the WSI group aids (is capable) to
+      synchronize the Timing Synchronization Function (TSF) clock across
+      all devices in the WSI group.
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          This is the TX port of WSI interface. It is attached to the RX
+          port of the next device in the WSI connection.
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          This is the RX port of WSI interface. It is attached to the TX
+          port of the previous device in the WSI connection.
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pcie {
+        #address-cells = <3>;
+        #size-cells = <2>;
+
+        pcie@0 {
+            device_type = "pci";
+            reg = <0x0 0x0 0x0 0x0 0x0>;
+            #address-cells = <3>;
+            #size-cells = <2>;
+            ranges;
+
+            wifi@0 {
+                compatible = "pci17cb,1109";
+                reg = <0x0 0x0 0x0 0x0 0x0>;
+
+                qcom,calibration-variant = "RDP433_1";
+
+                ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                        reg = <0>;
+
+                        wifi1_wsi_tx: endpoint {
+                            remote-endpoint = <&wifi2_wsi_rx>;
+                        };
+                    };
+
+                    port@1 {
+                        reg = <1>;
+
+                        wifi1_wsi_rx: endpoint {
+                            remote-endpoint = <&wifi3_wsi_tx>;
+                        };
+                    };
+                };
+            };
+        };
+
+        pcie@1 {
+            device_type = "pci";
+            reg = <0x0 0x0 0x1 0x0 0x0>;
+            #address-cells = <3>;
+            #size-cells = <2>;
+            ranges;
+
+            wifi@0 {
+                compatible = "pci17cb,1109";
+                reg = <0x0 0x0 0x0 0x0 0x0>;
+
+                qcom,calibration-variant = "RDP433_2";
+                qcom,wsi-controller;
+
+                ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                        reg = <0>;
+
+                        wifi2_wsi_tx: endpoint {
+                            remote-endpoint = <&wifi3_wsi_rx>;
+                        };
+                    };
+
+                    port@1 {
+                        reg = <1>;
+
+                        wifi2_wsi_rx: endpoint {
+                            remote-endpoint = <&wifi1_wsi_tx>;
+                        };
+                    };
+                };
+            };
+        };
+
+        pcie@2 {
+            device_type = "pci";
+            reg = <0x0 0x0 0x2 0x0 0x0>;
+            #address-cells = <3>;
+            #size-cells = <2>;
+            ranges;
+
+            wifi@0 {
+                compatible = "pci17cb,1109";
+                reg = <0x0 0x0 0x0 0x0 0x0>;
+
+                qcom,calibration-variant = "RDP433_3";
+
+                ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                        reg = <0>;
+
+                        wifi3_wsi_tx: endpoint {
+                            remote-endpoint = <&wifi1_wsi_rx>;
+                        };
+                    };
+
+                    port@1 {
+                        reg = <1>;
+
+                        wifi3_wsi_rx: endpoint {
+                            remote-endpoint = <&wifi2_wsi_tx>;
+                        };
+                    };
+                };
+            };
+        };
+    };
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-common.yaml
@ -17,11 +17,11 @@ description:
 properties:
  clocks:
    minItems: 3
-    maxItems: 4
+    maxItems: 5

  clock-names:
    minItems: 3
-    maxItems: 4
+    maxItems: 5

  num-lanes:
    const: 1
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie-ep.yaml
@ -22,6 +22,7 @@ properties:
      - fsl,imx8mm-pcie-ep
      - fsl,imx8mq-pcie-ep
      - fsl,imx8mp-pcie-ep
+      - fsl,imx8q-pcie-ep
      - fsl,imx95-pcie-ep

  clocks:
@ -74,6 +75,20 @@ allOf:
            - const: dbi2
            - const: atu

+  - if:
+      properties:
+        compatible:
+          enum:
+            - fsl,imx8q-pcie-ep
+    then:
+      properties:
+        reg:
+          maxItems: 2
+        reg-names:
+          items:
+            - const: dbi
+            - const: addr_space
+
  - if:
      properties:
        compatible:
@ -103,13 +118,21 @@ allOf:
      properties:
        clocks:
          minItems: 4
+          maxItems: 4
        clock-names:
          items:
            - const: pcie
            - const: pcie_bus
            - const: pcie_phy
            - const: pcie_aux
-    else:
+
+  - if:
+      properties:
+        compatible:
+          enum:
+            - fsl,imx8mm-pcie-ep
+            - fsl,imx8mp-pcie-ep
+    then:
      properties:
        clocks:
          maxItems: 3
@ -119,6 +142,20 @@ allOf:
            - const: pcie_bus
            - const: pcie_aux

+  - if:
+      properties:
+        compatible:
+          enum:
+            - fsl,imxq-pcie-ep
+    then:
+      properties:
+        clocks:
+          maxItems: 3
+        clock-names:
+          items:
+            - const: dbi
+            - const: mstr
+            - const: slv

 unevaluatedProperties: false

--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@ -40,10 +40,11 @@ properties:
      - description: PCIe PHY clock.
      - description: Additional required clock entry for imx6sx-pcie,
           imx6sx-pcie-ep, imx8mq-pcie, imx8mq-pcie-ep.
+      - description: PCIe reference clock.

  clock-names:
    minItems: 3
-    maxItems: 4
+    maxItems: 5

  interrupts:
    items:
@ -127,7 +128,7 @@ allOf:
    then:
      properties:
        clocks:
-          minItems: 4
+          maxItems: 4
        clock-names:
          items:
            - const: pcie
@ -140,11 +141,10 @@ allOf:
        compatible:
          enum:
            - fsl,imx8mq-pcie
-            - fsl,imx95-pcie
    then:
      properties:
        clocks:
-          minItems: 4
+          maxItems: 4
        clock-names:
          items:
            - const: pcie
@ -200,6 +200,23 @@ allOf:
            - const: mstr
            - const: slv

+  - if:
+      properties:
+        compatible:
+          enum:
+            - fsl,imx95-pcie
+    then:
+      properties:
+        clocks:
+          maxItems: 5
+        clock-names:
+          items:
+            - const: pcie
+            - const: pcie_bus
+            - const: pcie_phy
+            - const: pcie_aux
+            - const: ref
+
 unevaluatedProperties: false

 examples:
--- a/Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt
+++ b/Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt
@ -1,52 +0,0 @@
-NXP Layerscape PCIe Gen4 controller
-
-This PCIe controller is based on the Mobiveil PCIe IP and thus inherits all
-the common properties defined in mobiveil-pcie.txt.
-
-Required properties:
- compatible: should contain the platform identifier such as:
-  "fsl,lx2160a-pcie"
- reg: base addresses and lengths of the PCIe controller register blocks.
-  "csr_axi_slave": Bridge config registers
-  "config_axi_slave": PCIe controller registers
- interrupts: A list of interrupt outputs of the controller. Must contain an
-  entry for each entry in the interrupt-names property.
- interrupt-names: It could include the following entries:
-  "intr": The interrupt that is asserted for controller interrupts
-  "aer": Asserted for aer interrupt when chip support the aer interrupt with
-	 none MSI/MSI-X/INTx mode,but there is interrupt line for aer.
-  "pme": Asserted for pme interrupt when chip support the pme interrupt with
-	 none MSI/MSI-X/INTx mode,but there is interrupt line for pme.
- dma-coherent: Indicates that the hardware IP block can ensure the coherency
-  of the data transferred from/to the IP block. This can avoid the software
-  cache flush/invalid actions, and improve the performance significantly.
- msi-parent : See the generic MSI binding described in
-  Documentation/devicetree/bindings/interrupt-controller/msi.txt.
-
-Example:
-
-	pcie@3400000 {
-		compatible = "fsl,lx2160a-pcie";
-		reg = <0x00 0x03400000 0x0 0x00100000   /* controller registers */
-		       0x80 0x00000000 0x0 0x00001000>; /* configuration space */
-		reg-names = "csr_axi_slave", "config_axi_slave";
-		interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* AER interrupt */
-			     <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* PME interrupt */
-			     <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
-		interrupt-names = "aer", "pme", "intr";
-		#address-cells = <3>;
-		#size-cells = <2>;
-		device_type = "pci";
-		apio-wins = <8>;
-		ppio-wins = <8>;
-		dma-coherent;
-		bus-range = <0x0 0xff>;
-		msi-parent = <&its>;
-		ranges = <0x82000000 0x0 0x40000000 0x80 0x40000000 0x0 0x40000000>;
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0 0 0 7>;
-		interrupt-map = <0000 0 0 1 &gic 0 0 GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
-				<0000 0 0 2 &gic 0 0 GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
-				<0000 0 0 3 &gic 0 0 GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
-				<0000 0 0 4 &gic 0 0 GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
-	};
--- a/Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml
@ -0,0 +1,173 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/mbvl,gpex40-pcie.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mobiveil AXI PCIe Host Bridge
+
+maintainers:
+  - Frank Li <Frank Li@nxp.com>
+
+description:
+  Mobiveil's GPEX 4.0 is a PCIe Gen4 host bridge IP. This configurable IP
+  has up to 8 outbound and inbound windows for address translation.
+
+  NXP Layerscape PCIe Gen4 controller (Deprecated) base on Mobiveil's GPEX 4.0.
+
+properties:
+  compatible:
+    enum:
+      - fsl,lx2160a-pcie
+      - mbvl,gpex40-pcie
+
+  reg:
+    items:
+      - description: PCIe controller registers
+      - description: Bridge config registers
+      - description: GPIO registers to control slot power
+      - description: MSI registers
+    minItems: 2
+
+  reg-names:
+    items:
+      - const: csr_axi_slave
+      - const: config_axi_slave
+      - const: gpio_slave
+      - const: apb_csr
+    minItems: 2
+
+  apio-wins:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      number of requested APIO outbound windows
+        1. Config window
+        2. Memory window
+    default: 2
+    maximum: 256
+
+  ppio-wins:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: number of requested PPIO inbound windows
+    default: 1
+    maximum: 256
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 3
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 3
+
+  dma-coherent: true
+
+  msi-parent: true
+
+required:
+  - compatible
+  - reg
+  - reg-names
+
+allOf:
+  - $ref: /schemas/pci/pci-host-bridge.yaml#
+  - if:
+      properties:
+        compatible:
+          enum:
+            - fsl,lx2160a-pcie
+    then:
+      properties:
+        reg:
+          maxItems: 2
+
+        reg-names:
+          maxItems: 2
+
+        interrupts:
+          minItems: 3
+
+        interrupt-names:
+          items:
+            - const: aer
+            - const: pme
+            - const: intr
+    else:
+      properties:
+        dma-coherent: false
+        msi-parent: false
+        interrupts:
+          maxItems: 1
+        interrupt-names: false
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    pcie@b0000000 {
+        compatible = "mbvl,gpex40-pcie";
+        reg = <0xb0000000 0x00010000>,
+              <0xa0000000 0x00001000>,
+              <0xff000000 0x00200000>,
+              <0xb0010000 0x00001000>;
+        reg-names = "csr_axi_slave",
+                    "config_axi_slave",
+                    "gpio_slave",
+                    "apb_csr";
+        ranges = <0x83000000 0 0x00000000 0xa8000000 0 0x8000000>;
+        #address-cells = <3>;
+        #size-cells = <2>;
+        device_type = "pci";
+        apio-wins = <2>;
+        ppio-wins = <1>;
+        bus-range = <0x00 0xff>;
+        interrupt-controller;
+        #interrupt-cells = <1>;
+        interrupt-parent = <&gic>;
+        interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-map-mask = <0 0 0 7>;
+        interrupt-map = <0 0 0 0 &pci_express 0>,
+                        <0 0 0 1 &pci_express 1>,
+                        <0 0 0 2 &pci_express 2>,
+                        <0 0 0 3 &pci_express 3>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+        pcie@3400000 {
+            compatible = "fsl,lx2160a-pcie";
+            reg = <0x00 0x03400000 0x0 0x00100000   /* controller registers */
+                   0x80 0x00000000 0x0 0x00001000>; /* configuration space */
+            reg-names = "csr_axi_slave", "config_axi_slave";
+            ranges = <0x82000000 0x0 0x40000000 0x80 0x40000000 0x0 0x40000000>;
+            interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* AER interrupt */
+                         <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* PME interrupt */
+                         <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
+            interrupt-names = "aer", "pme", "intr";
+            #address-cells = <3>;
+            #size-cells = <2>;
+            device_type = "pci";
+            apio-wins = <8>;
+            ppio-wins = <8>;
+            dma-coherent;
+            bus-range = <0x00 0xff>;
+            msi-parent = <&its>;
+            #interrupt-cells = <1>;
+            interrupt-map-mask = <0 0 0 7>;
+            interrupt-map = <0000 0 0 1 &gic 0 0 GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+                            <0000 0 0 2 &gic 0 0 GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+                            <0000 0 0 3 &gic 0 0 GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+                            <0000 0 0 4 &gic 0 0 GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };
--- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
@ -149,7 +149,7 @@ allOf:
    then:
      properties:
        clocks:
-          minItems: 4
+          minItems: 6

        clock-names:
          items:
@ -178,7 +178,7 @@ allOf:
    then:
      properties:
        clocks:
-          minItems: 4
+          minItems: 6

        clock-names:
          items:
@ -207,6 +207,7 @@ allOf:
      properties:
        clocks:
          minItems: 4
+          maxItems: 4

        clock-names:
          items:
--- a/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml
+++ b/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml
@ -17,6 +17,12 @@ properties:
  compatible:
    const: microchip,pcie-host-1.0 # PolarFire

+  reg:
+    minItems: 3
+
+  reg-names:
+    minItems: 3
+
  clocks:
    description:
      Fabric Interface Controllers, FICs, are the interface between the FPGA
@ -44,6 +50,8 @@ properties:
    items:
      pattern: '^fic[0-3]$'

+  dma-coherent: true
+
  ranges:
    minItems: 1
    maxItems: 3
@ -62,8 +70,9 @@ examples:
            pcie0: pcie@2030000000 {
                    compatible = "microchip,pcie-host-1.0";
                    reg = <0x0 0x70000000 0x0 0x08000000>,
-                          <0x0 0x43000000 0x0 0x00010000>;
-                    reg-names = "cfg", "apb";
+                          <0x0 0x43008000 0x0 0x00002000>,
+                          <0x0 0x4300a000 0x0 0x00002000>;
+                    reg-names = "cfg", "bridge", "ctrl";
                    device_type = "pci";
                    #address-cells = <3>;
                    #size-cells = <2>;
--- a/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
@ -1,72 +0,0 @@
-* Mobiveil AXI PCIe Root Port Bridge DT description
-
-Mobiveil's GPEX 4.0 is a PCIe Gen4 root port bridge IP. This configurable IP
-has up to 8 outbound and inbound windows for the address translation.
-
-Required properties:
- #address-cells: Address representation for root ports, set to <3>
- #size-cells: Size representation for root ports, set to <2>
- #interrupt-cells: specifies the number of cells needed to encode an
-	interrupt source. The value must be 1.
- compatible: Should contain "mbvl,gpex40-pcie"
- reg: Should contain PCIe registers location and length
-	Mandatory:
-	"config_axi_slave": PCIe controller registers
-	"csr_axi_slave"	  : Bridge config registers
-	Optional:
-	"gpio_slave"	  : GPIO registers to control slot power
-	"apb_csr"	  : MSI registers
-
- device_type: must be "pci"
- apio-wins : number of requested apio outbound windows
-		default 2 outbound windows are configured -
-		1. Config window
-		2. Memory window
- ppio-wins : number of requested ppio inbound windows
-		default 1 inbound memory window is configured.
- bus-range: PCI bus numbers covered
- interrupt-controller: identifies the node as an interrupt controller
- #interrupt-cells: specifies the number of cells needed to encode an
-	interrupt source. The value must be 1.
- interrupts: The interrupt line of the PCIe controller
-		last cell of this field is set to 4 to
-		denote it as IRQ_TYPE_LEVEL_HIGH type interrupt.
- interrupt-map-mask,
-	interrupt-map: standard PCI properties to define the mapping of the
-	PCI interface to interrupt numbers.
- ranges: ranges for the PCI memory regions (I/O space region is not
-	supported by hardware)
-	Please refer to the standard PCI bus binding document for a more
-	detailed explanation
-
-
-Example:
-++++++++
-	pcie0: pcie@a0000000 {
-		#address-cells = <3>;
-		#size-cells = <2>;
-		compatible = "mbvl,gpex40-pcie";
-		reg =	<0xa0000000 0x00001000>,
-			<0xb0000000 0x00010000>,
-			<0xff000000 0x00200000>,
-			<0xb0010000 0x00001000>;
-		reg-names =	"config_axi_slave",
-				"csr_axi_slave",
-				"gpio_slave",
-				"apb_csr";
-		device_type = "pci";
-		apio-wins = <2>;
-		ppio-wins = <1>;
-		bus-range = <0x00000000 0x000000ff>;
-		interrupt-controller;
-		interrupt-parent = <&gic>;
-		#interrupt-cells = <1>;
-		interrupts = < 0 89 4 >;
-		interrupt-map-mask = <0 0 0 7>;
-		interrupt-map = <0 0 0 0 &pci_express 0>,
-				<0 0 0 1 &pci_express 1>,
-				<0 0 0 2 &pci_express 2>,
-				<0 0 0 3 &pci_express 3>;
-		ranges = < 0x83000000 0 0x00000000 0xa8000000 0 0x8000000>;
-
-	};
--- a/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml
+++ b/Documentation/devicetree/bindings/pci/plda,xpressrich3-axi-common.yaml
@ -18,12 +18,18 @@ allOf:

 properties:
  reg:
-    maxItems: 2
+    maxItems: 3
+    minItems: 2

  reg-names:
-    items:
+    oneOf:
+      - items:
          - const: cfg
          - const: apb
+      - items:
+          - const: cfg
+          - const: bridge
+          - const: ctrl

  interrupts:
    minItems: 1
--- a/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie-common.yaml
@ -81,6 +81,10 @@ properties:
  vddpe-3v3-supply:
    description: PCIe endpoint power supply

+  operating-points-v2: true
+  opp-table:
+    type: object
+
 required:
  - reg
  - reg-names
--- a/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie-sm8450.yaml
@ -70,10 +70,6 @@ properties:
      - const: msi7
      - const: global

-  operating-points-v2: true
-  opp-table:
-    type: object
-
  resets:
    maxItems: 1

--- a/Show More
+++ b/Show More