Import of kernel-6.12.0-124.8.1.el10_1
This commit is contained in:
parent
9c95bdb733
commit
8e33fb9082
@ -109,6 +109,10 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Indicates whether a storage device is capable of storing
|
||||
integrity metadata. Set if the device is T10 PI-capable.
|
||||
This flag is set to 1 if the storage media is formatted
|
||||
with T10 Protection Information. If the storage media is
|
||||
not formatted with T10 Protection Information, this flag
|
||||
is set to 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/format
|
||||
@ -117,6 +121,13 @@ Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Metadata format for integrity capable block device.
|
||||
E.g. T10-DIF-TYPE1-CRC.
|
||||
This field describes the type of T10 Protection Information
|
||||
that the block device can send and receive.
|
||||
If the device can store application integrity metadata but
|
||||
no T10 Protection Information profile is used, this field
|
||||
contains "nop".
|
||||
If the device does not support integrity metadata, this
|
||||
field contains "none".
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/protection_interval_bytes
|
||||
@ -142,7 +153,17 @@ Date: June 2008
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
Description:
|
||||
Number of bytes of integrity tag space available per
|
||||
512 bytes of data.
|
||||
protection_interval_bytes, which is typically
|
||||
the device's logical block size.
|
||||
This field describes the size of the application tag
|
||||
if the storage device is formatted with T10 Protection
|
||||
Information and permits use of the application tag.
|
||||
The tag_size is reported in bytes and indicates the
|
||||
space available for adding an opaque tag to each block
|
||||
(protection_interval_bytes).
|
||||
If the device does not support T10 Protection Information
|
||||
(even if the device provides application integrity
|
||||
metadata space), this field is set to 0.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/integrity/write_generate
|
||||
@ -229,6 +250,17 @@ Description:
|
||||
encryption, refer to Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/hw_wrapped_keys
|
||||
Date: February 2025
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The presence of this file indicates that the device
|
||||
supports hardware-wrapped inline encryption keys, i.e. key blobs
|
||||
that can only be unwrapped and used by dedicated hardware. For
|
||||
more information about hardware-wrapped inline encryption keys,
|
||||
see Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/max_dun_bits
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
@ -267,6 +299,15 @@ Description:
|
||||
use with inline encryption.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/raw_keys
|
||||
Date: February 2025
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] The presence of this file indicates that the device
|
||||
supports raw inline encryption keys, i.e. keys that are managed
|
||||
in raw, plaintext form in software.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/dax
|
||||
Date: June 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
@ -424,6 +465,13 @@ Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk.
|
||||
|
||||
What: /sys/block/<disk>/queue/iostats_passthrough
|
||||
Date: October 2024
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This file is used to control (on/off) the iostats
|
||||
accounting of the disk for passthrough commands.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/logical_block_size
|
||||
Date: May 2009
|
||||
|
||||
9
Documentation/ABI/stable/sysfs-class-bluetooth
Normal file
9
Documentation/ABI/stable/sysfs-class-bluetooth
Normal file
@ -0,0 +1,9 @@
|
||||
What: /sys/class/bluetooth/hci<index>/reset
|
||||
Date: 14-Jan-2025
|
||||
KernelVersion: 6.13
|
||||
Contact: linux-bluetooth@vger.kernel.org
|
||||
Description: This write-only attribute allows users to trigger the vendor reset
|
||||
method on the Bluetooth device when arbitrary data is written.
|
||||
The reset may or may not be done through the device transport
|
||||
(e.g., UART/USB), and can also be done through an out-of-band
|
||||
approach such as GPIO.
|
||||
@ -177,6 +177,12 @@ Description:
|
||||
The cache write policy: 0 for write-back, 1 for write-through,
|
||||
other or unknown.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/address_mode
|
||||
Date: March 2025
|
||||
Contact: Dave Jiang <dave.jiang@intel.com>
|
||||
Description:
|
||||
The address mode: 0 for reserved, 1 for extended-linear.
|
||||
|
||||
What: /sys/devices/system/node/nodeX/x86/sgx_total_bytes
|
||||
Date: November 2021
|
||||
Contact: Jarkko Sakkinen <jarkko@kernel.org>
|
||||
|
||||
@ -342,6 +342,70 @@ Description: Specific uncompressed frame descriptors
|
||||
support
|
||||
========================= =====================================
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased
|
||||
Date: Sept 2024
|
||||
KernelVersion: 5.15
|
||||
Description: Framebased format descriptors
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name
|
||||
Date: Sept 2024
|
||||
KernelVersion: 5.15
|
||||
Description: Specific framebased format descriptors
|
||||
|
||||
================== =======================================
|
||||
bFormatIndex unique id for this format descriptor;
|
||||
only defined after parent header is
|
||||
linked into the streaming class;
|
||||
read-only
|
||||
bmaControls this format's data for bmaControls in
|
||||
the streaming header
|
||||
bmInterlaceFlags specifies interlace information,
|
||||
read-only
|
||||
bAspectRatioY the X dimension of the picture aspect
|
||||
ratio, read-only
|
||||
bAspectRatioX the Y dimension of the picture aspect
|
||||
ratio, read-only
|
||||
bDefaultFrameIndex optimum frame index for this stream
|
||||
bBitsPerPixel number of bits per pixel used to
|
||||
specify color in the decoded video
|
||||
frame
|
||||
guidFormat globally unique id used to identify
|
||||
stream-encoding format
|
||||
================== =======================================
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/framebased/name/name
|
||||
Date: Sept 2024
|
||||
KernelVersion: 5.15
|
||||
Description: Specific framebased frame descriptors
|
||||
|
||||
========================= =====================================
|
||||
bFrameIndex unique id for this framedescriptor;
|
||||
only defined after parent format is
|
||||
linked into the streaming header;
|
||||
read-only
|
||||
dwFrameInterval indicates how frame interval can be
|
||||
programmed; a number of values
|
||||
separated by newline can be specified
|
||||
dwDefaultFrameInterval the frame interval the device would
|
||||
like to use as default
|
||||
dwBytesPerLine Specifies the number of bytes per line
|
||||
of video for packed fixed frame size
|
||||
formats, allowing the receiver to
|
||||
perform stride alignment of the video.
|
||||
If the bVariableSize value (above) is
|
||||
TRUE (1), or if the format does not
|
||||
permit such alignment, this value shall
|
||||
be set to zero (0).
|
||||
dwMaxBitRate the maximum bit rate at the shortest
|
||||
frame interval in bps
|
||||
dwMinBitRate the minimum bit rate at the longest
|
||||
frame interval in bps
|
||||
wHeight height of decoded bitmap frame in px
|
||||
wWidth width of decoded bitmam frame in px
|
||||
bmCapabilities still image support, fixed frame-rate
|
||||
support
|
||||
========================= =====================================
|
||||
|
||||
What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header
|
||||
Date: Dec 2014
|
||||
KernelVersion: 4.0
|
||||
|
||||
25
Documentation/ABI/testing/debugfs-hisi-migration
Normal file
25
Documentation/ABI/testing/debugfs-hisi-migration
Normal file
@ -0,0 +1,25 @@
|
||||
What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/dev_data
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Longfang Liu <liulongfang@huawei.com>
|
||||
Description: Read the configuration data and some status data
|
||||
required for device live migration. These data include device
|
||||
status data, queue configuration data, some task configuration
|
||||
data and device attribute data. The output format of the data
|
||||
is defined by the live migration driver.
|
||||
|
||||
What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/migf_data
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Longfang Liu <liulongfang@huawei.com>
|
||||
Description: Read the data from the last completed live migration.
|
||||
This data includes the same device status data as in "dev_data".
|
||||
The migf_data is the dev_data that is migrated.
|
||||
|
||||
What: /sys/kernel/debug/vfio/<device>/migration/hisi_acc/cmd_state
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.13
|
||||
Contact: Longfang Liu <liulongfang@huawei.com>
|
||||
Description: Used to obtain the device command sending and receiving
|
||||
channel status. Returns failure or success logs based on the
|
||||
results.
|
||||
@ -0,0 +1,25 @@
|
||||
What: /sys/bus/event_source/devices/vpa_pmu/format
|
||||
Date: November 2024
|
||||
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
|
||||
Description: Read-only. Attribute group to describe the magic bits
|
||||
that go into perf_event_attr.config for a particular pmu.
|
||||
(See ABI/testing/sysfs-bus-event_source-devices-format).
|
||||
|
||||
Each attribute under this group defines a bit range of the
|
||||
perf_event_attr.config. Supported attribute are listed
|
||||
below::
|
||||
|
||||
event = "config:0-31" - event ID
|
||||
|
||||
For example::
|
||||
|
||||
l1_to_l2_lat = "event=0x1"
|
||||
|
||||
What: /sys/bus/event_source/devices/vpa_pmu/events
|
||||
Date: November 2024
|
||||
Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
|
||||
Description: Read-only. Attribute group to describe performance monitoring
|
||||
events for the Virtual Processor Area events. Each attribute
|
||||
in this group describes a single performance monitoring event
|
||||
supported by vpa_pmu. The name of the file is the name of
|
||||
the event (See ABI/testing/sysfs-bus-event_source-devices-events).
|
||||
@ -163,6 +163,17 @@ Description:
|
||||
will be present in sysfs. Writing 1 to this file
|
||||
will perform reset.
|
||||
|
||||
What: /sys/bus/pci/devices/.../reset_subordinate
|
||||
Date: October 2024
|
||||
Contact: linux-pci@vger.kernel.org
|
||||
Description:
|
||||
This is visible only for bridge devices. If you want to reset
|
||||
all devices attached through the subordinate bus of a specific
|
||||
bridge device, writing 1 to this will try to do it. This will
|
||||
affect all devices attached to the system through this bridge
|
||||
similiar to writing 1 to their individual "reset" file, so use
|
||||
with caution.
|
||||
|
||||
What: /sys/bus/pci/devices/.../vpd
|
||||
Date: February 2008
|
||||
Contact: Ben Hutchings <bwh@kernel.org>
|
||||
|
||||
@ -149,6 +149,19 @@ Description:
|
||||
advertise to the partner. The currently used capabilities are in
|
||||
brackets. Selection happens by writing to the file.
|
||||
|
||||
What: /sys/class/typec/<port>/usb_capability
|
||||
Date: November 2024
|
||||
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||
Description: Lists the supported USB Modes. The default USB mode that is used
|
||||
next time with the Enter_USB Message is in brackets. The default
|
||||
mode can be changed by writing to the file when supported by the
|
||||
driver.
|
||||
|
||||
Valid values:
|
||||
- usb2 (USB 2.0)
|
||||
- usb3 (USB 3.2)
|
||||
- usb4 (USB4)
|
||||
|
||||
USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)
|
||||
|
||||
What: /sys/class/typec/<port>-partner/accessory_mode
|
||||
@ -220,6 +233,20 @@ Description:
|
||||
directory exists, it will have an attribute file for every VDO
|
||||
in Discover Identity command result.
|
||||
|
||||
What: /sys/class/typec/<port>-partner/usb_mode
|
||||
Date: November 2024
|
||||
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||
Description: The USB Modes that the partner device supports. The active mode
|
||||
is displayed in brackets. The active USB mode can be changed by
|
||||
writing to this file when the port driver is able to send Data
|
||||
Reset Message to the partner. That requires USB Power Delivery
|
||||
contract between the partner and the port.
|
||||
|
||||
Valid values:
|
||||
- usb2 (USB 2.0)
|
||||
- usb3 (USB 3.2)
|
||||
- usb4 (USB4)
|
||||
|
||||
USB Type-C cable devices (eg. /sys/class/typec/port0-cable/)
|
||||
|
||||
Note: Electronically Marked Cables will have a device also for one cable plug
|
||||
|
||||
@ -523,6 +523,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v1
|
||||
/sys/devices/system/cpu/vulnerabilities/spectre_v2
|
||||
/sys/devices/system/cpu/vulnerabilities/srbds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsa
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
|
||||
13
Documentation/ABI/testing/sysfs-driver-amd-sfh
Normal file
13
Documentation/ABI/testing/sysfs-driver-amd-sfh
Normal file
@ -0,0 +1,13 @@
|
||||
What: /sys/bus/pci/drivers/pcie_mp2_amd/*/hpd
|
||||
Date: April 2025
|
||||
Contact: mario.limonciello@amd.com
|
||||
Description:
|
||||
Human presence detection (HPD) enable/disable.
|
||||
When HPD is enabled, the device will be able to detect the
|
||||
presence of a human and will send an interrupt that can be
|
||||
used to wake the system from a low power state.
|
||||
When HPD is disabled, the device will not be able to detect
|
||||
the presence of a human.
|
||||
|
||||
Access: Read/Write
|
||||
Valid values: enabled/disabled
|
||||
13
Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
Normal file
13
Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd
Normal file
@ -0,0 +1,13 @@
|
||||
What: /sys/bus/hid/drivers/hid-appletb-kbd/<dev>/mode
|
||||
Date: September, 2023
|
||||
KernelVersion: 6.5
|
||||
Contact: linux-input@vger.kernel.org
|
||||
Description:
|
||||
The set of keys displayed on the Touch Bar.
|
||||
Valid values are:
|
||||
== =================
|
||||
0 Escape key only
|
||||
1 Function keys
|
||||
2 Media/brightness keys
|
||||
3 None
|
||||
== =================
|
||||
38
Documentation/ABI/testing/sysfs-driver-hid-corsair-void
Normal file
38
Documentation/ABI/testing/sysfs-driver-hid-corsair-void
Normal file
@ -0,0 +1,38 @@
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_headset
|
||||
Date: January 2024
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (R) The firmware version of the headset
|
||||
* Returns -ENODATA if no version was reported
|
||||
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/fw_version_receiver
|
||||
Date: January 2024
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (R) The firmware version of the receiver
|
||||
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/microphone_up
|
||||
Date: July 2023
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (R) Get the physical position of the microphone
|
||||
* 1 -> Microphone up
|
||||
* 0 -> Microphone down
|
||||
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/send_alert
|
||||
Date: July 2023
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (W) Play a built-in notification from the headset (0 / 1)
|
||||
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/set_sidetone
|
||||
Date: December 2023
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (W) Set the sidetone volume (0 - sidetone_max)
|
||||
|
||||
What: /sys/bus/hid/drivers/hid-corsair-void/<dev>/sidetone_max
|
||||
Date: July 2024
|
||||
KernelVersion: 6.13
|
||||
Contact: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
|
||||
Description: (R) Report the maximum sidetone volume
|
||||
@ -55,6 +55,15 @@ Description:
|
||||
An attribute which indicates whether the patch supports
|
||||
atomic-replace.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/stack_order
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.14.0
|
||||
Description:
|
||||
This attribute specifies the sequence in which live patch modules
|
||||
are applied to the system. If multiple live patches modify the same
|
||||
function, the implementation with the biggest 'stack_order' number
|
||||
is used, unless a transition is currently in progress.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/<object>
|
||||
Date: Nov 2014
|
||||
KernelVersion: 3.19.0
|
||||
|
||||
@ -117,6 +117,35 @@ by the PCI endpoint function driver.
|
||||
The PCI endpoint function driver should use pci_epc_mem_free_addr() to
|
||||
free the memory space allocated using pci_epc_mem_alloc_addr().
|
||||
|
||||
* pci_epc_map_addr()
|
||||
|
||||
A PCI endpoint function driver should use pci_epc_map_addr() to map to a RC
|
||||
PCI address the CPU address of local memory obtained with
|
||||
pci_epc_mem_alloc_addr().
|
||||
|
||||
* pci_epc_unmap_addr()
|
||||
|
||||
A PCI endpoint function driver should use pci_epc_unmap_addr() to unmap the
|
||||
CPU address of local memory mapped to a RC address with pci_epc_map_addr().
|
||||
|
||||
* pci_epc_mem_map()
|
||||
|
||||
A PCI endpoint controller may impose constraints on the RC PCI addresses that
|
||||
can be mapped. The function pci_epc_mem_map() allows endpoint function
|
||||
drivers to allocate and map controller memory while handling such
|
||||
constraints. This function will determine the size of the memory that must be
|
||||
allocated with pci_epc_mem_alloc_addr() for successfully mapping a RC PCI
|
||||
address range. This function will also indicate the size of the PCI address
|
||||
range that was actually mapped, which can be less than the requested size, as
|
||||
well as the offset into the allocated memory to use for accessing the mapped
|
||||
RC PCI address range.
|
||||
|
||||
* pci_epc_mem_unmap()
|
||||
|
||||
A PCI endpoint function driver can use pci_epc_mem_unmap() to unmap and free
|
||||
controller memory that was allocated and mapped using pci_epc_mem_map().
|
||||
|
||||
|
||||
Other EPC APIs
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
|
||||
@ -81,8 +81,8 @@ device, the following commands can be used::
|
||||
|
||||
# echo 0x104c > functions/pci_epf_test/func1/vendorid
|
||||
# echo 0xb500 > functions/pci_epf_test/func1/deviceid
|
||||
# echo 16 > functions/pci_epf_test/func1/msi_interrupts
|
||||
# echo 8 > functions/pci_epf_test/func1/msix_interrupts
|
||||
# echo 32 > functions/pci_epf_test/func1/msi_interrupts
|
||||
# echo 2048 > functions/pci_epf_test/func1/msix_interrupts
|
||||
|
||||
|
||||
Binding pci-epf-test Device to EP Controller
|
||||
@ -123,113 +123,83 @@ above::
|
||||
Using Endpoint Test function Device
|
||||
-----------------------------------
|
||||
|
||||
pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint
|
||||
tests. To compile this tool the following commands should be used::
|
||||
Kselftest added in tools/testing/selftests/pci_endpoint can be used to run all
|
||||
the default PCI endpoint tests. To build the Kselftest for PCI endpoint
|
||||
subsystem, the following commands should be used::
|
||||
|
||||
# cd <kernel-dir>
|
||||
# make -C tools/pci
|
||||
# make -C tools/testing/selftests/pci_endpoint
|
||||
|
||||
or if you desire to compile and install in your system::
|
||||
|
||||
# cd <kernel-dir>
|
||||
# make -C tools/pci install
|
||||
# make -C tools/testing/selftests/pci_endpoint INSTALL_PATH=/usr/bin install
|
||||
|
||||
The tool and script will be located in <rootfs>/usr/bin/
|
||||
The test will be located in <rootfs>/usr/bin/
|
||||
|
||||
|
||||
pcitest.sh Output
|
||||
~~~~~~~~~~~~~~~~~
|
||||
Kselftest Output
|
||||
~~~~~~~~~~~~~~~~
|
||||
::
|
||||
|
||||
# pcitest.sh
|
||||
BAR tests
|
||||
# pci_endpoint_test
|
||||
TAP version 13
|
||||
1..16
|
||||
# Starting 16 tests from 9 test cases.
|
||||
# RUN pci_ep_bar.BAR0.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR0.BAR_TEST
|
||||
ok 1 pci_ep_bar.BAR0.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR1.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR1.BAR_TEST
|
||||
ok 2 pci_ep_bar.BAR1.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR2.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR2.BAR_TEST
|
||||
ok 3 pci_ep_bar.BAR2.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR3.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR3.BAR_TEST
|
||||
ok 4 pci_ep_bar.BAR3.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR4.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR4.BAR_TEST
|
||||
ok 5 pci_ep_bar.BAR4.BAR_TEST
|
||||
# RUN pci_ep_bar.BAR5.BAR_TEST ...
|
||||
# OK pci_ep_bar.BAR5.BAR_TEST
|
||||
ok 6 pci_ep_bar.BAR5.BAR_TEST
|
||||
# RUN pci_ep_basic.CONSECUTIVE_BAR_TEST ...
|
||||
# OK pci_ep_basic.CONSECUTIVE_BAR_TEST
|
||||
ok 7 pci_ep_basic.CONSECUTIVE_BAR_TEST
|
||||
# RUN pci_ep_basic.LEGACY_IRQ_TEST ...
|
||||
# OK pci_ep_basic.LEGACY_IRQ_TEST
|
||||
ok 8 pci_ep_basic.LEGACY_IRQ_TEST
|
||||
# RUN pci_ep_basic.MSI_TEST ...
|
||||
# OK pci_ep_basic.MSI_TEST
|
||||
ok 9 pci_ep_basic.MSI_TEST
|
||||
# RUN pci_ep_basic.MSIX_TEST ...
|
||||
# OK pci_ep_basic.MSIX_TEST
|
||||
ok 10 pci_ep_basic.MSIX_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.READ_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.READ_TEST
|
||||
ok 11 pci_ep_data_transfer.memcpy.READ_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.WRITE_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.WRITE_TEST
|
||||
ok 12 pci_ep_data_transfer.memcpy.WRITE_TEST
|
||||
# RUN pci_ep_data_transfer.memcpy.COPY_TEST ...
|
||||
# OK pci_ep_data_transfer.memcpy.COPY_TEST
|
||||
ok 13 pci_ep_data_transfer.memcpy.COPY_TEST
|
||||
# RUN pci_ep_data_transfer.dma.READ_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.READ_TEST
|
||||
ok 14 pci_ep_data_transfer.dma.READ_TEST
|
||||
# RUN pci_ep_data_transfer.dma.WRITE_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.WRITE_TEST
|
||||
ok 15 pci_ep_data_transfer.dma.WRITE_TEST
|
||||
# RUN pci_ep_data_transfer.dma.COPY_TEST ...
|
||||
# OK pci_ep_data_transfer.dma.COPY_TEST
|
||||
ok 16 pci_ep_data_transfer.dma.COPY_TEST
|
||||
# PASSED: 16 / 16 tests passed.
|
||||
# Totals: pass:16 fail:0 xfail:0 xpass:0 skip:0 error:0
|
||||
|
||||
BAR0: OKAY
|
||||
BAR1: OKAY
|
||||
BAR2: OKAY
|
||||
BAR3: OKAY
|
||||
BAR4: NOT OKAY
|
||||
BAR5: NOT OKAY
|
||||
|
||||
Interrupt tests
|
||||
Testcase 16 (pci_ep_data_transfer.dma.COPY_TEST) will fail for most of the DMA
|
||||
capable endpoint controllers due to the absence of the MEMCPY over DMA. For such
|
||||
controllers, it is advisable to skip this testcase using this
|
||||
command::
|
||||
|
||||
SET IRQ TYPE TO LEGACY: OKAY
|
||||
LEGACY IRQ: NOT OKAY
|
||||
SET IRQ TYPE TO MSI: OKAY
|
||||
MSI1: OKAY
|
||||
MSI2: OKAY
|
||||
MSI3: OKAY
|
||||
MSI4: OKAY
|
||||
MSI5: OKAY
|
||||
MSI6: OKAY
|
||||
MSI7: OKAY
|
||||
MSI8: OKAY
|
||||
MSI9: OKAY
|
||||
MSI10: OKAY
|
||||
MSI11: OKAY
|
||||
MSI12: OKAY
|
||||
MSI13: OKAY
|
||||
MSI14: OKAY
|
||||
MSI15: OKAY
|
||||
MSI16: OKAY
|
||||
MSI17: NOT OKAY
|
||||
MSI18: NOT OKAY
|
||||
MSI19: NOT OKAY
|
||||
MSI20: NOT OKAY
|
||||
MSI21: NOT OKAY
|
||||
MSI22: NOT OKAY
|
||||
MSI23: NOT OKAY
|
||||
MSI24: NOT OKAY
|
||||
MSI25: NOT OKAY
|
||||
MSI26: NOT OKAY
|
||||
MSI27: NOT OKAY
|
||||
MSI28: NOT OKAY
|
||||
MSI29: NOT OKAY
|
||||
MSI30: NOT OKAY
|
||||
MSI31: NOT OKAY
|
||||
MSI32: NOT OKAY
|
||||
SET IRQ TYPE TO MSI-X: OKAY
|
||||
MSI-X1: OKAY
|
||||
MSI-X2: OKAY
|
||||
MSI-X3: OKAY
|
||||
MSI-X4: OKAY
|
||||
MSI-X5: OKAY
|
||||
MSI-X6: OKAY
|
||||
MSI-X7: OKAY
|
||||
MSI-X8: OKAY
|
||||
MSI-X9: NOT OKAY
|
||||
MSI-X10: NOT OKAY
|
||||
MSI-X11: NOT OKAY
|
||||
MSI-X12: NOT OKAY
|
||||
MSI-X13: NOT OKAY
|
||||
MSI-X14: NOT OKAY
|
||||
MSI-X15: NOT OKAY
|
||||
MSI-X16: NOT OKAY
|
||||
[...]
|
||||
MSI-X2047: NOT OKAY
|
||||
MSI-X2048: NOT OKAY
|
||||
|
||||
Read Tests
|
||||
|
||||
SET IRQ TYPE TO MSI: OKAY
|
||||
READ ( 1 bytes): OKAY
|
||||
READ ( 1024 bytes): OKAY
|
||||
READ ( 1025 bytes): OKAY
|
||||
READ (1024000 bytes): OKAY
|
||||
READ (1024001 bytes): OKAY
|
||||
|
||||
Write Tests
|
||||
|
||||
WRITE ( 1 bytes): OKAY
|
||||
WRITE ( 1024 bytes): OKAY
|
||||
WRITE ( 1025 bytes): OKAY
|
||||
WRITE (1024000 bytes): OKAY
|
||||
WRITE (1024001 bytes): OKAY
|
||||
|
||||
Copy Tests
|
||||
|
||||
COPY ( 1 bytes): OKAY
|
||||
COPY ( 1024 bytes): OKAY
|
||||
COPY ( 1025 bytes): OKAY
|
||||
COPY (1024000 bytes): OKAY
|
||||
COPY (1024001 bytes): OKAY
|
||||
# pci_endpoint_test -f pci_ep_bar -f pci_ep_basic -v memcpy -T COPY_TEST -v dma
|
||||
|
||||
@ -18,3 +18,4 @@ PCI Bus Subsystem
|
||||
pcieaer-howto
|
||||
endpoint/index
|
||||
boot-interrupts
|
||||
tph
|
||||
|
||||
@ -217,8 +217,12 @@ capability structure except the PCI Express capability structure,
|
||||
that is shared between many drivers including the service drivers.
|
||||
RMW Capability accessors (pcie_capability_clear_and_set_word(),
|
||||
pcie_capability_set_word(), and pcie_capability_clear_word()) protect
|
||||
a selected set of PCI Express Capability Registers (Link Control
|
||||
Register and Root Control Register). Any change to those registers
|
||||
should be performed using RMW accessors to avoid problems due to
|
||||
concurrent updates. For the up-to-date list of protected registers,
|
||||
see pcie_capability_clear_and_set_word().
|
||||
a selected set of PCI Express Capability Registers:
|
||||
|
||||
* Link Control Register
|
||||
* Root Control Register
|
||||
* Link Control 2 Register
|
||||
|
||||
Any change to those registers should be performed using RMW accessors to
|
||||
avoid problems due to concurrent updates. For the up-to-date list of
|
||||
protected registers, see pcie_capability_clear_and_set_word().
|
||||
|
||||
132
Documentation/PCI/tph.rst
Normal file
132
Documentation/PCI/tph.rst
Normal file
@ -0,0 +1,132 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
|
||||
===========
|
||||
TPH Support
|
||||
===========
|
||||
|
||||
:Copyright: 2024 Advanced Micro Devices, Inc.
|
||||
:Authors: - Eric van Tassell <eric.vantassell@amd.com>
|
||||
- Wei Huang <wei.huang2@amd.com>
|
||||
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
TPH (TLP Processing Hints) is a PCIe feature that allows endpoint devices
|
||||
to provide optimization hints for requests that target memory space.
|
||||
These hints, in a format called Steering Tags (STs), are embedded in the
|
||||
requester's TLP headers, enabling the system hardware, such as the Root
|
||||
Complex, to better manage platform resources for these requests.
|
||||
|
||||
For example, on platforms with TPH-based direct data cache injection
|
||||
support, an endpoint device can include appropriate STs in its DMA
|
||||
traffic to specify which cache the data should be written to. This allows
|
||||
the CPU core to have a higher probability of getting data from cache,
|
||||
potentially improving performance and reducing latency in data
|
||||
processing.
|
||||
|
||||
|
||||
How to Use TPH
|
||||
==============
|
||||
|
||||
TPH is presented as an optional extended capability in PCIe. The Linux
|
||||
kernel handles TPH discovery during boot, but it is up to the device
|
||||
driver to request TPH enablement if it is to be utilized. Once enabled,
|
||||
the driver uses the provided API to obtain the Steering Tag for the
|
||||
target memory and to program the ST into the device's ST table.
|
||||
|
||||
Enable TPH support in Linux
|
||||
---------------------------
|
||||
|
||||
To support TPH, the kernel must be built with the CONFIG_PCIE_TPH option
|
||||
enabled.
|
||||
|
||||
Manage TPH
|
||||
----------
|
||||
|
||||
To enable TPH for a device, use the following function::
|
||||
|
||||
int pcie_enable_tph(struct pci_dev *pdev, int mode);
|
||||
|
||||
This function enables TPH support for device with a specific ST mode.
|
||||
Current supported modes include:
|
||||
|
||||
* PCI_TPH_ST_NS_MODE - NO ST Mode
|
||||
* PCI_TPH_ST_IV_MODE - Interrupt Vector Mode
|
||||
* PCI_TPH_ST_DS_MODE - Device Specific Mode
|
||||
|
||||
`pcie_enable_tph()` checks whether the requested mode is actually
|
||||
supported by the device before enabling. The device driver can figure out
|
||||
which TPH mode is supported and can be properly enabled based on the
|
||||
return value of `pcie_enable_tph()`.
|
||||
|
||||
To disable TPH, use the following function::
|
||||
|
||||
void pcie_disable_tph(struct pci_dev *pdev);
|
||||
|
||||
Manage ST
|
||||
---------
|
||||
|
||||
Steering Tags are platform specific. PCIe spec does not specify where STs
|
||||
are from. Instead PCI Firmware Specification defines an ACPI _DSM method
|
||||
(see the `Revised _DSM for Cache Locality TPH Features ECN
|
||||
<https://members.pcisig.com/wg/PCI-SIG/document/15470>`_) for retrieving
|
||||
STs for a target memory of various properties. This method is what is
|
||||
supported in this implementation.
|
||||
|
||||
To retrieve a Steering Tag for a target memory associated with a specific
|
||||
CPU, use the following function::
|
||||
|
||||
int pcie_tph_get_cpu_st(struct pci_dev *pdev, enum tph_mem_type type,
|
||||
unsigned int cpu_uid, u16 *tag);
|
||||
|
||||
The `type` argument is used to specify the memory type, either volatile
|
||||
or persistent, of the target memory. The `cpu_uid` argument specifies the
|
||||
CPU where the memory is associated to.
|
||||
|
||||
After the ST value is retrieved, the device driver can use the following
|
||||
function to write the ST into the device::
|
||||
|
||||
int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index,
|
||||
u16 tag);
|
||||
|
||||
The `index` argument is the ST table entry index the ST tag will be
|
||||
written into. `pcie_tph_set_st_entry()` will figure out the proper
|
||||
location of ST table, either in the MSI-X table or in the TPH Extended
|
||||
Capability space, and write the Steering Tag into the ST entry pointed by
|
||||
the `index` argument.
|
||||
|
||||
It is completely up to the driver to decide how to use these TPH
|
||||
functions. For example a network device driver can use the TPH APIs above
|
||||
to update the Steering Tag when interrupt affinity of a RX/TX queue has
|
||||
been changed. Here is a sample code for IRQ affinity notifier:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
static void irq_affinity_notified(struct irq_affinity_notify *notify,
|
||||
const cpumask_t *mask)
|
||||
{
|
||||
struct drv_irq *irq;
|
||||
unsigned int cpu_id;
|
||||
u16 tag;
|
||||
|
||||
irq = container_of(notify, struct drv_irq, affinity_notify);
|
||||
cpumask_copy(irq->cpu_mask, mask);
|
||||
|
||||
/* Pick a right CPU as the target - here is just an example */
|
||||
cpu_id = cpumask_first(irq->cpu_mask);
|
||||
|
||||
if (pcie_tph_get_cpu_st(irq->pdev, TPH_MEM_TYPE_VM, cpu_id,
|
||||
&tag))
|
||||
return;
|
||||
|
||||
if (pcie_tph_set_st_entry(irq->pdev, irq->msix_nr, tag))
|
||||
return;
|
||||
}
|
||||
|
||||
Disable TPH system-wide
|
||||
-----------------------
|
||||
|
||||
There is a kernel command line option available to control TPH feature:
|
||||
* "notph": TPH will be disabled for all endpoint devices.
|
||||
@ -100,29 +100,29 @@ Get delays, since system boot, for pid 10::
|
||||
# ./getdelays -d -p 10
|
||||
(output similar to next case)
|
||||
|
||||
Get sum of delays, since system boot, for all pids with tgid 5::
|
||||
Get sum and peak of delays, since system boot, for all pids with tgid 242::
|
||||
|
||||
# ./getdelays -d -t 5
|
||||
bash-4.4# ./getdelays -d -t 242
|
||||
print delayacct stats ON
|
||||
TGID 5
|
||||
TGID 242
|
||||
|
||||
|
||||
CPU count real total virtual total delay total delay average
|
||||
8 7000000 6872122 3382277 0.423ms
|
||||
IO count delay total delay average
|
||||
0 0 0.000ms
|
||||
SWAP count delay total delay average
|
||||
0 0 0.000ms
|
||||
RECLAIM count delay total delay average
|
||||
0 0 0.000ms
|
||||
THRASHING count delay total delay average
|
||||
0 0 0.000ms
|
||||
COMPACT count delay total delay average
|
||||
0 0 0.000ms
|
||||
WPCOPY count delay total delay average
|
||||
0 0 0.000ms
|
||||
IRQ count delay total delay average
|
||||
0 0 0.000ms
|
||||
CPU count real total virtual total delay total delay average delay max delay min
|
||||
39 156000000 156576579 2111069 0.054ms 0.212296ms 0.031307ms
|
||||
IO count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
SWAP count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
RECLAIM count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
THRASHING count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
COMPACT count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
WPCOPY count delay total delay average delay max delay min
|
||||
156 11215873 0.072ms 0.207403ms 0.033913ms
|
||||
IRQ count delay total delay average delay max delay min
|
||||
0 0 0.000ms 0.000000ms 0.000000ms
|
||||
|
||||
Get IO accounting for pid 1, it works only with -p::
|
||||
|
||||
|
||||
@ -90,9 +90,7 @@ Brief summary of control files.
|
||||
used.
|
||||
memory.swappiness set/show swappiness parameter of vmscan
|
||||
(See sysctl's vm.swappiness)
|
||||
memory.move_charge_at_immigrate set/show controls of moving charges
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
memory.move_charge_at_immigrate This knob is deprecated.
|
||||
memory.oom_control set/show oom controls.
|
||||
This knob is deprecated and shouldn't be
|
||||
used.
|
||||
@ -243,10 +241,6 @@ behind this approach is that a cgroup that aggressively uses a shared
|
||||
page will eventually get charged for it (once it is uncharged from
|
||||
the cgroup that brought it in -- this will happen on memory pressure).
|
||||
|
||||
But see :ref:`section 8.2 <cgroup-v1-memory-movable-charges>` when moving a
|
||||
task to another cgroup, its pages may be recharged to the new cgroup, if
|
||||
move_charge_at_immigrate has been chosen.
|
||||
|
||||
2.4 Swap Extension
|
||||
--------------------------------------
|
||||
|
||||
@ -756,78 +750,8 @@ If we want to change this to 1G, we can at any time use::
|
||||
|
||||
THIS IS DEPRECATED!
|
||||
|
||||
It's expensive and unreliable! It's better practice to launch workload
|
||||
tasks directly from inside their target cgroup. Use dedicated workload
|
||||
cgroups to allow fine-grained policy adjustments without having to
|
||||
move physical pages between control domains.
|
||||
|
||||
Users can move charges associated with a task along with task migration, that
|
||||
is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
|
||||
This feature is not supported in !CONFIG_MMU environments because of lack of
|
||||
page tables.
|
||||
|
||||
8.1 Interface
|
||||
-------------
|
||||
|
||||
This feature is disabled by default. It can be enabled (and disabled again) by
|
||||
writing to memory.move_charge_at_immigrate of the destination cgroup.
|
||||
|
||||
If you want to enable it::
|
||||
|
||||
# echo (some positive value) > memory.move_charge_at_immigrate
|
||||
|
||||
.. note::
|
||||
Each bits of move_charge_at_immigrate has its own meaning about what type
|
||||
of charges should be moved. See :ref:`section 8.2
|
||||
<cgroup-v1-memory-movable-charges>` for details.
|
||||
|
||||
.. note::
|
||||
Charges are moved only when you move mm->owner, in other words,
|
||||
a leader of a thread group.
|
||||
|
||||
.. note::
|
||||
If we cannot find enough space for the task in the destination cgroup, we
|
||||
try to make space by reclaiming memory. Task migration may fail if we
|
||||
cannot make enough space.
|
||||
|
||||
.. note::
|
||||
It can take several seconds if you move charges much.
|
||||
|
||||
And if you want disable it again::
|
||||
|
||||
# echo 0 > memory.move_charge_at_immigrate
|
||||
|
||||
.. _cgroup-v1-memory-movable-charges:
|
||||
|
||||
8.2 Type of charges which can be moved
|
||||
--------------------------------------
|
||||
|
||||
Each bit in move_charge_at_immigrate has its own meaning about what type of
|
||||
charges should be moved. But in any case, it must be noted that an account of
|
||||
a page or a swap can be moved only when it is charged to the task's current
|
||||
(old) memory cgroup.
|
||||
|
||||
+---+--------------------------------------------------------------------------+
|
||||
|bit| what type of charges would be moved ? |
|
||||
+===+==========================================================================+
|
||||
| 0 | A charge of an anonymous page (or swap of it) used by the target task. |
|
||||
| | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
|
||||
+---+--------------------------------------------------------------------------+
|
||||
| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
|
||||
| | and swaps of tmpfs file) mmapped by the target task. Unlike the case of |
|
||||
| | anonymous pages, file pages (and swaps) in the range mmapped by the task |
|
||||
| | will be moved even if the task hasn't done page fault, i.e. they might |
|
||||
| | not be the task's "RSS", but other task's "RSS" that maps the same file. |
|
||||
| | The mapcount of the page is ignored (the page can be moved independent |
|
||||
| | of the mapcount). You must enable Swap Extension (see 2.4) to |
|
||||
| | enable move of swap charges. |
|
||||
+---+--------------------------------------------------------------------------+
|
||||
|
||||
8.3 TODO
|
||||
--------
|
||||
|
||||
- All of moving charge operations are done under cgroup_mutex. It's not good
|
||||
behavior to hold the mutex too long, so we may need some trick.
|
||||
Reading memory.move_charge_at_immigrate will always return 0 and writing
|
||||
to it will always return -EINVAL.
|
||||
|
||||
9. Memory thresholds
|
||||
====================
|
||||
|
||||
@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||
5-6. Device
|
||||
5-7. RDMA
|
||||
5-7-1. RDMA Interface Files
|
||||
5-8. HugeTLB
|
||||
5.8-1. HugeTLB Interface Files
|
||||
5-9. Misc
|
||||
5.9-1 Miscellaneous cgroup Interface Files
|
||||
5.9-2 Migration and Ownership
|
||||
5-10. Others
|
||||
5-10-1. perf_event
|
||||
5-8. DMEM
|
||||
5-9. HugeTLB
|
||||
5.9-1. HugeTLB Interface Files
|
||||
5-10. Misc
|
||||
5.10-1 Miscellaneous cgroup Interface Files
|
||||
5.10-2 Migration and Ownership
|
||||
5-11. Others
|
||||
5-11-1. perf_event
|
||||
5-N. Non-normative information
|
||||
5-N-1. CPU controller root cgroup process behaviour
|
||||
5-N-2. IO controller root cgroup process behaviour
|
||||
@ -1655,6 +1656,11 @@ The following nested keys are defined.
|
||||
pgdemote_khugepaged
|
||||
Number of pages demoted by khugepaged.
|
||||
|
||||
hugetlb
|
||||
Amount of memory used by hugetlb pages. This metric only shows
|
||||
up if hugetlb usage is accounted for in memory.current (i.e.
|
||||
cgroup is mounted with the memory_hugetlb_accounting option).
|
||||
|
||||
memory.numa_stat
|
||||
A read-only nested-keyed file which exists on non-root cgroups.
|
||||
|
||||
@ -2621,6 +2627,49 @@ RDMA Interface Files
|
||||
mlx4_0 hca_handle=1 hca_object=20
|
||||
ocrdma1 hca_handle=1 hca_object=23
|
||||
|
||||
DMEM
|
||||
----
|
||||
|
||||
The "dmem" controller regulates the distribution and accounting of
|
||||
device memory regions. Because each memory region may have its own page size,
|
||||
which does not have to be equal to the system page size, the units are always bytes.
|
||||
|
||||
DMEM Interface Files
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
dmem.max, dmem.min, dmem.low
|
||||
A readwrite nested-keyed file that exists for all the cgroups
|
||||
except root that describes current configured resource limit
|
||||
for a region.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 1073741824
|
||||
drm/0000:03:00.0/stolen max
|
||||
|
||||
The semantics are the same as for the memory cgroup controller, and are
|
||||
calculated in the same way.
|
||||
|
||||
dmem.capacity
|
||||
A read-only file that describes maximum region capacity.
|
||||
It only exists on the root cgroup. Not all memory can be
|
||||
allocated by cgroups, as the kernel reserves some for
|
||||
internal use.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 8514437120
|
||||
drm/0000:03:00.0/stolen 67108864
|
||||
|
||||
dmem.current
|
||||
A read-only file that describes current resource usage.
|
||||
It exists for all the cgroup except root.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 12550144
|
||||
drm/0000:03:00.0/stolen 8650752
|
||||
|
||||
HugeTLB
|
||||
-------
|
||||
|
||||
|
||||
@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled
|
||||
illegal Windows/NTFS/SMB characters to a remap range (this mount parameter
|
||||
is the default for SMB3). This remap (``mapposix``) range is also
|
||||
compatible with Mac (and "Services for Mac" on some older Windows).
|
||||
When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically
|
||||
disabled.
|
||||
|
||||
CIFS VFS Mount Options
|
||||
======================
|
||||
|
||||
@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
|
||||
combination with a microcode update. The microcode clears the affected CPU
|
||||
buffers when the VERW instruction is executed.
|
||||
|
||||
Kernel reuses the MDS function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
Kernel does the buffer clearing with x86_clear_cpu_buffers().
|
||||
|
||||
On MDS affected CPUs, the kernel already invokes CPU buffer clear on
|
||||
kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
|
||||
|
||||
@ -159,6 +159,7 @@ is applicable::
|
||||
SCSI Appropriate SCSI support is enabled.
|
||||
A lot of drivers have their options described inside
|
||||
the Documentation/scsi/ sub-directory.
|
||||
SDW SoundWire support is enabled.
|
||||
SECURITY Different security models are enabled.
|
||||
SELINUX SELinux support is enabled.
|
||||
SERIAL Serial support is enabled.
|
||||
|
||||
@ -446,9 +446,15 @@
|
||||
arm64.nobti [ARM64] Unconditionally disable Branch Target
|
||||
Identification support
|
||||
|
||||
arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack
|
||||
support
|
||||
|
||||
arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
|
||||
Set instructions support
|
||||
|
||||
arm64.nompam [ARM64] Unconditionally disable Memory Partitioning And
|
||||
Monitoring support
|
||||
|
||||
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
|
||||
support
|
||||
|
||||
@ -2304,6 +2310,9 @@
|
||||
per_cpu_perf_limits
|
||||
Allow per-logical-CPU P-State performance control limits using
|
||||
cpufreq sysfs interface
|
||||
no_cas
|
||||
Do not enable capacity-aware scheduling (CAS) on
|
||||
hybrid systems
|
||||
|
||||
intremap= [X86-64,Intel-IOMMU,EARLY]
|
||||
on enable Interrupt Remapping (default)
|
||||
@ -2441,7 +2450,9 @@
|
||||
specified in the flag list (default: domain):
|
||||
|
||||
nohz
|
||||
Disable the tick when a single task runs.
|
||||
Disable the tick when a single task runs as well as
|
||||
disabling other kernel noises like having RCU callbacks
|
||||
offloaded. This is equivalent to the nohz_full parameter.
|
||||
|
||||
A residual 1Hz tick is offloaded to workqueues, which you
|
||||
need to affine to housekeeping through the global
|
||||
@ -2757,17 +2768,21 @@
|
||||
nvhe: Standard nVHE-based mode, without support for
|
||||
protected guests.
|
||||
|
||||
protected: nVHE-based mode with support for guests whose
|
||||
state is kept private from the host.
|
||||
protected: Mode with support for guests whose state is
|
||||
kept private from the host, using VHE or
|
||||
nVHE depending on HW support.
|
||||
|
||||
nested: VHE-based mode with support for nested
|
||||
virtualization. Requires at least ARMv8.3
|
||||
hardware.
|
||||
virtualization. Requires at least ARMv8.4
|
||||
hardware (with FEAT_NV2).
|
||||
|
||||
Defaults to VHE/nVHE based on hardware support. Setting
|
||||
mode to "protected" will disable kexec and hibernation
|
||||
for the host. "nested" is experimental and should be
|
||||
used with extreme caution.
|
||||
for the host. To force nVHE on VHE hardware, add
|
||||
"arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
|
||||
command-line.
|
||||
"nested" is experimental and should be used with
|
||||
extreme caution.
|
||||
|
||||
kvm-arm.vgic_v3_group0_trap=
|
||||
[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
|
||||
@ -3045,6 +3060,8 @@
|
||||
* max_sec_lba48: Set or clear transfer size limit to
|
||||
65535 sectors.
|
||||
|
||||
* external: Mark port as external (hotplug-capable).
|
||||
|
||||
* [no]lpm: Enable or disable link power management.
|
||||
|
||||
* [no]setxfer: Indicate if transfer speed mode setting
|
||||
@ -4696,6 +4713,10 @@
|
||||
nomio [S390] Do not use MIO instructions.
|
||||
norid [S390] ignore the RID field and force use of
|
||||
one PCI domain per PCI function
|
||||
notph [PCIE] If the PCIE_TPH kernel config parameter
|
||||
is enabled, this kernel boot option can be used
|
||||
to disable PCIe TLP Processing Hints support
|
||||
system-wide.
|
||||
|
||||
pcie_aspm= [PCIE] Forcibly enable or ignore PCIe Active State Power
|
||||
Management.
|
||||
@ -4828,6 +4849,11 @@
|
||||
can be preempted anytime. Tasks will also yield
|
||||
contended spinlocks (if the critical section isn't
|
||||
explicitly preempt disabled beyond the lock itself).
|
||||
lazy - Scheduler controlled. Similar to full but instead
|
||||
of preempting the task immediately, the task gets
|
||||
one HZ tick time to yield itself before the
|
||||
preemption will be forced. One preemption is when the
|
||||
task returns to user space.
|
||||
|
||||
print-fatal-signals=
|
||||
[KNL] debug: print fatal signals
|
||||
@ -6089,6 +6115,10 @@
|
||||
non-zero "wait" parameter. See weight_single
|
||||
and weight_many.
|
||||
|
||||
sdw_mclk_divider=[SDW]
|
||||
Specify the MCLK divider for Intel SoundWire buses in
|
||||
case the BIOS does not provide the clock rate properly.
|
||||
|
||||
skew_tick= [KNL,EARLY] Offset the periodic timer tick per cpu to mitigate
|
||||
xtime_lock contention on larger systems, and/or RCU lock
|
||||
contention on all systems with CONFIG_MAXSMP set.
|
||||
@ -6176,6 +6206,16 @@
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
(slub_nomerge legacy name also accepted for now)
|
||||
|
||||
slab_strict_numa [MM]
|
||||
Support memory policies on a per object level
|
||||
in the slab allocator. The default is for memory
|
||||
policies to be applied at the folio level when
|
||||
a new folio is needed or a partial folio is
|
||||
retrieved from the lists. Increases overhead
|
||||
in the slab fastpaths but gains more accurate
|
||||
NUMA kernel object placement which helps with slow
|
||||
interconnects in NUMA systems.
|
||||
|
||||
slram= [HW,MTD]
|
||||
|
||||
smart2= [HW]
|
||||
@ -6731,6 +6771,16 @@
|
||||
Force threading of all interrupt handlers except those
|
||||
marked explicitly IRQF_NO_THREAD.
|
||||
|
||||
thp_shmem= [KNL]
|
||||
Format: <size>[KMG],<size>[KMG]:<policy>;<size>[KMG]-<size>[KMG]:<policy>
|
||||
Control the default policy of each hugepage size for the
|
||||
internal shmem mount. <policy> is one of policies available
|
||||
for the shmem mount ("always", "inherit", "never", "within_size",
|
||||
and "advise").
|
||||
It can be used multiple times for multiple shmem THP sizes.
|
||||
See Documentation/admin-guide/mm/transhuge.rst for more
|
||||
details.
|
||||
|
||||
topology= [S390,EARLY]
|
||||
Format: {off | on}
|
||||
Specify if the kernel should make use of the cpu
|
||||
@ -6966,6 +7016,13 @@
|
||||
See Documentation/admin-guide/mm/transhuge.rst
|
||||
for more details.
|
||||
|
||||
transparent_hugepage_shmem= [KNL]
|
||||
Format: [always|within_size|advise|never|deny|force]
|
||||
Can be used to control the hugepage allocation policy for
|
||||
the internal shmem mount.
|
||||
See Documentation/admin-guide/mm/transhuge.rst
|
||||
for more details.
|
||||
|
||||
trusted.source= [KEYS]
|
||||
Format: <string>
|
||||
This parameter identifies the trust source as a backend
|
||||
@ -7002,6 +7059,19 @@
|
||||
having this key zero'ed is acceptable. E.g. in testing
|
||||
scenarios.
|
||||
|
||||
tsa= [X86] Control mitigation for Transient Scheduler
|
||||
Attacks on AMD CPUs. Search the following in your
|
||||
favourite search engine for more details:
|
||||
|
||||
"Technical guidance for mitigating transient scheduler
|
||||
attacks".
|
||||
|
||||
off - disable the mitigation
|
||||
on - enable the mitigation (default)
|
||||
user - mitigate only user/kernel transitions
|
||||
vm - mitigate only guest/host transitions
|
||||
|
||||
|
||||
tsc= Disable clocksource stability checks for TSC.
|
||||
Format: <string>
|
||||
[x86] reliable: mark tsc clocksource as reliable, this
|
||||
|
||||
@ -15,7 +15,7 @@ Please notice, however, that, if:
|
||||
|
||||
you should use the main media development tree ``master`` branch:
|
||||
|
||||
https://git.linuxtv.org/media_tree.git/
|
||||
https://git.linuxtv.org/media.git/
|
||||
|
||||
In this case, you may find some useful information at the
|
||||
`LinuxTv wiki pages <https://linuxtv.org/wiki>`_:
|
||||
|
||||
@ -67,7 +67,7 @@ Changes / Fixes
|
||||
Please mail to linux-media AT vger.kernel.org unified diffs against
|
||||
the linux media git tree:
|
||||
|
||||
https://git.linuxtv.org/media_tree.git/
|
||||
https://git.linuxtv.org/media.git/
|
||||
|
||||
This is done by committing a patch at a clone of the git tree and
|
||||
submitting the patch using ``git send-email``. Don't forget to
|
||||
|
||||
@ -326,6 +326,29 @@ PMD_ORDER THP policy will be overridden. If the policy for PMD_ORDER
|
||||
is not defined within a valid ``thp_anon``, its policy will default to
|
||||
``never``.
|
||||
|
||||
Similarly to ``transparent_hugepage``, you can control the hugepage
|
||||
allocation policy for the internal shmem mount by using the kernel parameter
|
||||
``transparent_hugepage_shmem=<policy>``, where ``<policy>`` is one of the
|
||||
seven valid policies for shmem (``always``, ``within_size``, ``advise``,
|
||||
``never``, ``deny``, and ``force``).
|
||||
|
||||
In the same manner as ``thp_anon`` controls each supported anonymous THP
|
||||
size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
|
||||
has the same format as ``thp_anon``, but also supports the policy
|
||||
``within_size``.
|
||||
|
||||
``thp_shmem=`` may be specified multiple times to configure all THP sizes
|
||||
as required. If ``thp_shmem=`` is specified at least once, any shmem THP
|
||||
sizes not explicitly configured on the command line are implicitly set to
|
||||
``never``.
|
||||
|
||||
``transparent_hugepage_shmem`` setting only affects the global toggle. If
|
||||
``thp_shmem`` is not specified, PMD_ORDER hugepage will default to
|
||||
``inherit``. However, if a valid ``thp_shmem`` setting is provided by the
|
||||
user, the PMD_ORDER hugepage policy will be overridden. If the policy for
|
||||
PMD_ORDER is not defined within a valid ``thp_shmem``, its policy will
|
||||
default to ``never``.
|
||||
|
||||
Hugepages in tmpfs/shmem
|
||||
========================
|
||||
|
||||
@ -530,10 +553,18 @@ anon_fault_fallback_charge
|
||||
instead falls back to using huge pages with lower orders or
|
||||
small pages even though the allocation was successful.
|
||||
|
||||
swpout
|
||||
is incremented every time a huge page is swapped out in one
|
||||
zswpout
|
||||
is incremented every time a huge page is swapped out to zswap in one
|
||||
piece without splitting.
|
||||
|
||||
swpin
|
||||
is incremented every time a huge page is swapped in from a non-zswap
|
||||
swap device in one piece.
|
||||
|
||||
swpout
|
||||
is incremented every time a huge page is swapped out to a non-zswap
|
||||
swap device in one piece without splitting.
|
||||
|
||||
swpout_fallback
|
||||
is incremented if a huge page has to be split before swapout.
|
||||
Usually because failed to allocate some continuous swap space
|
||||
|
||||
@ -34,7 +34,7 @@ strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
|
||||
traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_scf_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -66,7 +66,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -86,6 +86,22 @@ Example usage:
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
|
||||
|
||||
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
|
||||
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
|
||||
parameter to select the port(s) to monitor. Each bit represents the port number,
|
||||
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
|
||||
PMU will monitor both ports by default if not specified.
|
||||
|
||||
Example for port filtering:
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
|
||||
|
||||
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
|
||||
|
||||
NVLink-C2C1 PMU
|
||||
-------------------
|
||||
|
||||
@ -96,7 +112,7 @@ Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -116,6 +132,22 @@ Example usage:
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
|
||||
|
||||
The NVLink-C2C has two ports that can be connected to one GPU (occupying both
|
||||
ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
|
||||
parameter to select the port(s) to monitor. Each bit represents the port number,
|
||||
e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
|
||||
PMU will monitor both ports by default if not specified.
|
||||
|
||||
Example for port filtering:
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 0 on port 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
|
||||
|
||||
* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
|
||||
|
||||
CNVLink PMU
|
||||
---------------
|
||||
|
||||
@ -125,13 +157,14 @@ to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can be connected to one or more sockets via CNVLink. The user can
|
||||
use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
|
||||
Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
|
||||
socket 1 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
|
||||
socket 1 to 3. The PMU will monitor all remote sockets by default if not
|
||||
specified.
|
||||
/sys/bus/event_source/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
|
||||
shows the valid bits that can be set in the "rem_socket" parameter.
|
||||
|
||||
The PMU can not distinguish the remote traffic initiator, therefore it does not
|
||||
@ -165,12 +198,13 @@ local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
|
||||
see /sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can support multiple root ports. The user can use
|
||||
"root_port" bitmap parameter to select the port(s) to monitor, i.e.
|
||||
"root_port=0xF" corresponds to root port 0 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
|
||||
"root_port=0xF" corresponds to root port 0 to 3. The PMU will monitor all root
|
||||
ports by default if not specified.
|
||||
/sys/bus/event_source/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
|
||||
shows the valid bits that can be set in the "root_port" parameter.
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -251,9 +251,7 @@ performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``. On systems that support preferred core, the driver will have
|
||||
different values for some cores than others and this will reflect the values
|
||||
advertised by the platform at bootup.
|
||||
``cpuinfo``.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
|
||||
@ -248,6 +248,20 @@ are the following:
|
||||
If that frequency cannot be determined, this attribute should not
|
||||
be present.
|
||||
|
||||
``cpuinfo_avg_freq``
|
||||
An average frequency (in KHz) of all CPUs belonging to a given policy,
|
||||
derived from a hardware provided feedback and reported on a time frame
|
||||
spanning at most few milliseconds.
|
||||
|
||||
This is expected to be based on the frequency the hardware actually runs
|
||||
at and, as such, might require specialised hardware support (such as AMU
|
||||
extension on ARM). If one cannot be determined, this attribute should
|
||||
not be present.
|
||||
|
||||
Note, that failed attempt to retrieve current frequency for a given
|
||||
CPU(s) will result in an appropriate error, i.e: EAGAIN for CPU that
|
||||
remains idle (raised on ARM).
|
||||
|
||||
``cpuinfo_max_freq``
|
||||
Maximum possible operating frequency the CPUs belonging to this policy
|
||||
can run at (in kHz).
|
||||
@ -293,7 +307,8 @@ are the following:
|
||||
Some architectures (e.g. ``x86``) may attempt to provide information
|
||||
more precisely reflecting the current CPU frequency through this
|
||||
attribute, but that still may not be the exact current CPU frequency as
|
||||
seen by the hardware at the moment.
|
||||
seen by the hardware at the moment. This behavior though, is only
|
||||
available via c:macro:``CPUFREQ_ARCH_CUR_FREQ`` option.
|
||||
|
||||
``scaling_driver``
|
||||
The scaling driver currently in use.
|
||||
|
||||
@ -269,61 +269,56 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
|
||||
the CPU will ask the processor hardware to enter), it attempts to predict the
|
||||
idle duration and uses the predicted value for idle state selection.
|
||||
|
||||
It first obtains the time until the closest timer event with the assumption
|
||||
that the scheduler tick will be stopped. That time, referred to as the *sleep
|
||||
length* in what follows, is the upper bound on the time before the next CPU
|
||||
wakeup. It is used to determine the sleep length range, which in turn is needed
|
||||
to get the sleep length correction factor.
|
||||
It first uses a simple pattern recognition algorithm to obtain a preliminary
|
||||
idle duration prediction. Namely, it saves the last 8 observed idle duration
|
||||
values and, when predicting the idle duration next time, it computes the average
|
||||
and variance of them. If the variance is small (smaller than 400 square
|
||||
milliseconds) or it is small relative to the average (the average is greater
|
||||
that 6 times the standard deviation), the average is regarded as the "typical
|
||||
interval" value. Otherwise, either the longest or the shortest (depending on
|
||||
which one is farther from the average) of the saved observed idle duration
|
||||
values is discarded and the computation is repeated for the remaining ones.
|
||||
|
||||
The ``menu`` governor maintains two arrays of sleep length correction factors.
|
||||
One of them is used when tasks previously running on the given CPU are waiting
|
||||
for some I/O operations to complete and the other one is used when that is not
|
||||
the case. Each array contains several correction factor values that correspond
|
||||
to different sleep length ranges organized so that each range represented in the
|
||||
array is approximately 10 times wider than the previous one.
|
||||
Again, if the variance of them is small (in the above sense), the average is
|
||||
taken as the "typical interval" value and so on, until either the "typical
|
||||
interval" is determined or too many data points are disregarded. In the latter
|
||||
case, if the size of the set of data points still under consideration is
|
||||
sufficiently large, the next idle duration is not likely to be above the largest
|
||||
idle duration value still in that set, so that value is taken as the predicted
|
||||
next idle duration. Finally, if the set of data points still under
|
||||
consideration is too small, no prediction is made.
|
||||
|
||||
If the preliminary prediction of the next idle duration computed this way is
|
||||
long enough, the governor obtains the time until the closest timer event with
|
||||
the assumption that the scheduler tick will be stopped. That time, referred to
|
||||
as the *sleep length* in what follows, is the upper bound on the time before the
|
||||
next CPU wakeup. It is used to determine the sleep length range, which in turn
|
||||
is needed to get the sleep length correction factor.
|
||||
|
||||
The ``menu`` governor maintains an array containing several correction factor
|
||||
values that correspond to different sleep length ranges organized so that each
|
||||
range represented in the array is approximately 10 times wider than the previous
|
||||
one.
|
||||
|
||||
The correction factor for the given sleep length range (determined before
|
||||
selecting the idle state for the CPU) is updated after the CPU has been woken
|
||||
up and the closer the sleep length is to the observed idle duration, the closer
|
||||
to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
|
||||
The sleep length is multiplied by the correction factor for the range that it
|
||||
falls into to obtain the first approximation of the predicted idle duration.
|
||||
falls into to obtain an approximation of the predicted idle duration that is
|
||||
compared to the "typical interval" determined previously and the minimum of
|
||||
the two is taken as the final idle duration prediction.
|
||||
|
||||
Next, the governor uses a simple pattern recognition algorithm to refine its
|
||||
idle duration prediction. Namely, it saves the last 8 observed idle duration
|
||||
values and, when predicting the idle duration next time, it computes the average
|
||||
and variance of them. If the variance is small (smaller than 400 square
|
||||
milliseconds) or it is small relative to the average (the average is greater
|
||||
that 6 times the standard deviation), the average is regarded as the "typical
|
||||
interval" value. Otherwise, the longest of the saved observed idle duration
|
||||
values is discarded and the computation is repeated for the remaining ones.
|
||||
Again, if the variance of them is small (in the above sense), the average is
|
||||
taken as the "typical interval" value and so on, until either the "typical
|
||||
interval" is determined or too many data points are disregarded, in which case
|
||||
the "typical interval" is assumed to equal "infinity" (the maximum unsigned
|
||||
integer value). The "typical interval" computed this way is compared with the
|
||||
sleep length multiplied by the correction factor and the minimum of the two is
|
||||
taken as the predicted idle duration.
|
||||
|
||||
Then, the governor computes an extra latency limit to help "interactive"
|
||||
workloads. It uses the observation that if the exit latency of the selected
|
||||
idle state is comparable with the predicted idle duration, the total time spent
|
||||
in that state probably will be very short and the amount of energy to save by
|
||||
entering it will be relatively small, so likely it is better to avoid the
|
||||
overhead related to entering that state and exiting it. Thus selecting a
|
||||
shallower state is likely to be a better option then. The first approximation
|
||||
of the extra latency limit is the predicted idle duration itself which
|
||||
additionally is divided by a value depending on the number of tasks that
|
||||
previously ran on the given CPU and now they are waiting for I/O operations to
|
||||
complete. The result of that division is compared with the latency limit coming
|
||||
from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
|
||||
framework and the minimum of the two is taken as the limit for the idle states'
|
||||
exit latency.
|
||||
If the "typical interval" value is small, which means that the CPU is likely
|
||||
to be woken up soon enough, the sleep length computation is skipped as it may
|
||||
be costly and the idle duration is simply predicted to equal the "typical
|
||||
interval" value.
|
||||
|
||||
Now, the governor is ready to walk the list of idle states and choose one of
|
||||
them. For this purpose, it compares the target residency of each state with
|
||||
the predicted idle duration and the exit latency of it with the computed latency
|
||||
limit. It selects the state with the target residency closest to the predicted
|
||||
the predicted idle duration and the exit latency of it with the with the latency
|
||||
limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
|
||||
framework. It selects the state with the target residency closest to the predicted
|
||||
idle duration, but still below it, and exit latency that does not exceed the
|
||||
limit.
|
||||
|
||||
|
||||
@ -696,6 +696,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
|
||||
Use per-logical-CPU P-State limits (see `Coordination of P-state
|
||||
Limits`_ for details).
|
||||
|
||||
``no_cas``
|
||||
Do not enable capacity-aware scheduling (CAS) which is enabled by
|
||||
default on hybrid systems.
|
||||
|
||||
Diagnostics and Tuning
|
||||
======================
|
||||
|
||||
@ -212,17 +212,6 @@ pid>/``).
|
||||
This value defaults to 0.
|
||||
|
||||
|
||||
core_sort_vma
|
||||
=============
|
||||
|
||||
The default coredump writes VMAs in address order. By setting
|
||||
``core_sort_vma`` to 1, VMAs will be written from smallest size
|
||||
to largest size. This is known to break at least elfutils, but
|
||||
can be handy when dealing with very large (and truncated)
|
||||
coredumps where the more useful debugging details are included
|
||||
in the smaller VMAs.
|
||||
|
||||
|
||||
core_uses_pid
|
||||
=============
|
||||
|
||||
@ -1546,6 +1535,13 @@ constant ``FUTEX_TID_MASK`` (0x3fffffff).
|
||||
If a value outside of this range is written to ``threads-max`` an
|
||||
``EINVAL`` error occurs.
|
||||
|
||||
timer_migration
|
||||
===============
|
||||
|
||||
When set to a non-zero value, attempt to migrate timers away from idle cpus to
|
||||
allow them to remain in low power states longer.
|
||||
|
||||
Default is set (1).
|
||||
|
||||
traceoff_on_warning
|
||||
===================
|
||||
|
||||
@ -101,6 +101,7 @@ Bit Log Number Reason that got the kernel tainted
|
||||
16 _/X 65536 auxiliary taint, defined for and used by distros
|
||||
17 _/T 131072 kernel was built with the struct randomization plugin
|
||||
18 _/N 262144 an in-kernel test has been run
|
||||
19 _/J 524288 userspace used a mutating debug operation in fwctl
|
||||
=== === ====== ========================================================
|
||||
|
||||
Note: The character ``_`` is representing a blank in this table to make reading
|
||||
@ -184,3 +185,7 @@ More detailed explanation for tainting
|
||||
build time.
|
||||
|
||||
18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
|
||||
|
||||
19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
|
||||
to use the devices debugging features. Device debugging features could
|
||||
cause the device to malfunction in undefined ways.
|
||||
|
||||
@ -28,7 +28,7 @@ should be a userspace tool that handles all the low-level details, keeps
|
||||
a database of the authorized devices and prompts users for new connections.
|
||||
|
||||
More details about the sysfs interface for Thunderbolt devices can be
|
||||
found in ``Documentation/ABI/testing/sysfs-bus-thunderbolt``.
|
||||
found in Documentation/ABI/testing/sysfs-bus-thunderbolt.
|
||||
|
||||
Those users who just want to connect any device without any sort of
|
||||
manual work can add following line to
|
||||
|
||||
@ -83,7 +83,7 @@ scripts/ver_linux is a good way to check if your system already has
|
||||
the necessary tools::
|
||||
|
||||
sudo apt-get build-essentials flex bison yacc
|
||||
sudo apt install libelf-dev systemtap-sdt-dev libaudit-dev libslang2-dev libperl-dev libdw-dev
|
||||
sudo apt install libelf-dev systemtap-sdt-dev libslang2-dev libperl-dev libdw-dev
|
||||
|
||||
cscope is a good tool to browse kernel sources. Let's install it now::
|
||||
|
||||
|
||||
69
Documentation/arch/arm64/arm-cca.rst
Normal file
69
Documentation/arch/arm64/arm-cca.rst
Normal file
@ -0,0 +1,69 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================================
|
||||
Arm Confidential Compute Architecture
|
||||
=====================================
|
||||
|
||||
Arm systems that support the Realm Management Extension (RME) contain
|
||||
hardware to allow a VM guest to be run in a way which protects the code
|
||||
and data of the guest from the hypervisor. It extends the older "two
|
||||
world" model (Normal and Secure World) into four worlds: Normal, Secure,
|
||||
Root and Realm. Linux can then also be run as a guest to a monitor
|
||||
running in the Realm world.
|
||||
|
||||
The monitor running in the Realm world is known as the Realm Management
|
||||
Monitor (RMM) and implements the Realm Management Monitor
|
||||
specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs
|
||||
in EL2 and manages the stage 2 page tables etc of the guests running in
|
||||
Realm world), however much of the control is handled by a hypervisor
|
||||
running in the Normal World. The Normal World hypervisor uses the Realm
|
||||
Management Interface (RMI) defined by the RMM specification to request
|
||||
the RMM to perform operations (e.g. mapping memory or executing a vCPU).
|
||||
|
||||
The RMM defines an environment for guests where the address space (IPA)
|
||||
is split into two. The lower half is protected - any memory that is
|
||||
mapped in this half cannot be seen by the Normal World and the RMM
|
||||
restricts what operations the Normal World can perform on this memory
|
||||
(e.g. the Normal World cannot replace pages in this region without the
|
||||
guest's cooperation). The upper half is shared, the Normal World is free
|
||||
to make changes to the pages in this region, and is able to emulate MMIO
|
||||
devices in this region too.
|
||||
|
||||
A guest running in a Realm may also communicate with the RMM using the
|
||||
Realm Services Interface (RSI) to request changes in its environment or
|
||||
to perform attestation about its environment. In particular it may
|
||||
request that areas of the protected address space are transitioned
|
||||
between 'RAM' and 'EMPTY' (in either direction). This allows a Realm
|
||||
guest to give up memory to be returned to the Normal World, or to
|
||||
request new memory from the Normal World. Without an explicit request
|
||||
from the Realm guest the RMM will otherwise prevent the Normal World
|
||||
from making these changes.
|
||||
|
||||
Linux as a Realm Guest
|
||||
----------------------
|
||||
|
||||
To run Linux as a guest within a Realm, the following must be provided
|
||||
either by the VMM or by a `boot loader` run in the Realm before Linux:
|
||||
|
||||
* All protected RAM described to Linux (by DT or ACPI) must be marked
|
||||
RIPAS RAM before handing control over to Linux.
|
||||
|
||||
* MMIO devices must be either unprotected (e.g. emulated by the Normal
|
||||
World) or marked RIPAS DEV.
|
||||
|
||||
* MMIO devices emulated by the Normal World and used very early in boot
|
||||
(specifically earlycon) must be specified in the upper half of IPA.
|
||||
For earlycon this can be done by specifying the address on the
|
||||
command line, e.g. with an IPA size of 33 bits and the base address
|
||||
of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000``
|
||||
|
||||
* Linux will use bounce buffers for communicating with unprotected
|
||||
devices. It will transition some protected memory to RIPAS EMPTY and
|
||||
expect to be able to access unprotected pages at the same IPA address
|
||||
but with the highest valid IPA bit set. The expectation is that the
|
||||
VMM will remove the physical pages from the protected mapping and
|
||||
provide those pages as unprotected pages.
|
||||
|
||||
References
|
||||
----------
|
||||
[1] https://developer.arm.com/documentation/den0137/
|
||||
@ -153,3 +153,11 @@ asymmetric system, a broken guest at EL1 could still attempt to execute
|
||||
mode will return to host userspace with an ``exit_reason`` of
|
||||
``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
|
||||
re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
|
||||
|
||||
NOHZ FULL
|
||||
---------
|
||||
|
||||
To avoid perturbing an adaptive-ticks CPU (specified using
|
||||
``nohz_full=``) when a 32-bit task is forcefully migrated, these CPUs
|
||||
are treated as 64-bit-only when support for asymmetric 32-bit systems
|
||||
is enabled.
|
||||
|
||||
@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of
|
||||
the RAM in the machine, or any other method the boot loader designer
|
||||
sees fit.)
|
||||
|
||||
For Arm Confidential Compute Realms this includes ensuring that all
|
||||
protected RAM has a Realm IPA state (RIPAS) of "RAM".
|
||||
|
||||
|
||||
2. Setup the device tree
|
||||
-------------------------
|
||||
@ -285,6 +288,12 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Fine Grained Traps 2 (FEAT_FGT2) extension present:
|
||||
|
||||
- If EL3 is present and the kernel is entered at EL2:
|
||||
|
||||
- SCR_EL3.FGTEn2 (bit 59) must be initialised to 0b1.
|
||||
|
||||
For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
|
||||
|
||||
- If EL3 is present and the kernel is entered at EL2:
|
||||
@ -379,12 +388,31 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
|
||||
|
||||
For CPUs with the Performance Monitors Extension (FEAT_PMUv3p9):
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- MDCR_EL3.EnPM2 (bit 7) must be initialised to 0b1.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- HDFGRTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
|
||||
- HDFGRTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
|
||||
- HDFGRTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
|
||||
|
||||
- HDFGWTR2_EL2.nPMICNTR_EL0 (bit 2) must be initialised to 0b1.
|
||||
- HDFGWTR2_EL2.nPMICFILTR_EL0 (bit 3) must be initialised to 0b1.
|
||||
- HDFGWTR2_EL2.nPMUACR_EL1 (bit 4) must be initialised to 0b1.
|
||||
|
||||
For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
|
||||
|
||||
- HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
|
||||
must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
|
||||
|
||||
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
|
||||
|
||||
- If EL3 is present:
|
||||
@ -411,6 +439,38 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
- HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
|
||||
|
||||
- For CPUs with Guarded Control Stacks (FEAT_GCS):
|
||||
|
||||
- GCSCR_EL1 must be initialised to 0.
|
||||
|
||||
- GCSCRE0_EL1 must be initialised to 0.
|
||||
|
||||
- If EL3 is present:
|
||||
|
||||
- SCR_EL3.GCSEn (bit 39) must be initialised to 0b1.
|
||||
|
||||
- If EL2 is present:
|
||||
|
||||
- GCSCR_EL2 must be initialised to 0.
|
||||
|
||||
- If the kernel is entered at EL1 and EL2 is present:
|
||||
|
||||
- HCRX_EL2.GCSEn must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1.
|
||||
|
||||
- HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1.
|
||||
|
||||
- HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
|
||||
|
||||
- HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
|
||||
|
||||
- HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1.
|
||||
|
||||
- HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1.
|
||||
|
||||
The requirements described above for CPU mode, caches, MMUs, architected
|
||||
timers, coherency and system registers apply to all CPUs. All CPUs must
|
||||
enter the kernel in the same exception level. Where the values documented
|
||||
|
||||
@ -152,6 +152,8 @@ infrastructure:
|
||||
+------------------------------+---------+---------+
|
||||
| DIT | [51-48] | y |
|
||||
+------------------------------+---------+---------+
|
||||
| MPAM | [43-40] | n |
|
||||
+------------------------------+---------+---------+
|
||||
| SVE | [35-32] | y |
|
||||
+------------------------------+---------+---------+
|
||||
| GIC | [27-24] | n |
|
||||
|
||||
@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The
|
||||
kernel exposes the presence of these features to userspace through a set
|
||||
of flags called hwcaps, exposed in the auxiliary vector.
|
||||
|
||||
Userspace software can test for features by acquiring the AT_HWCAP or
|
||||
AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
|
||||
flags are set, e.g.::
|
||||
Userspace software can test for features by acquiring the AT_HWCAP,
|
||||
AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing
|
||||
whether the relevant flags are set, e.g.::
|
||||
|
||||
bool floating_point_is_present(void)
|
||||
{
|
||||
@ -170,26 +170,86 @@ HWCAP_PACG
|
||||
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
|
||||
Documentation/arch/arm64/pointer-authentication.rst.
|
||||
|
||||
HWCAP_GCS
|
||||
Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as
|
||||
described by Documentation/arch/arm64/gcs.rst.
|
||||
|
||||
HWCAP_CMPBR
|
||||
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0010.
|
||||
|
||||
HWCAP_FPRCVT
|
||||
Functionality implied by ID_AA64ISAR3_EL1.FPRCVT == 0b0001.
|
||||
|
||||
HWCAP_F8MM8
|
||||
Functionality implied by ID_AA64FPFR0_EL1.F8MM8 == 0b0001.
|
||||
|
||||
HWCAP_F8MM4
|
||||
Functionality implied by ID_AA64FPFR0_EL1.F8MM4 == 0b0001.
|
||||
|
||||
HWCAP_SVE_F16MM
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F16MM == 0b0001.
|
||||
|
||||
HWCAP_SVE_ELTPERM
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.ELTPERM == 0b0001.
|
||||
|
||||
HWCAP_SVE_AES2
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0011.
|
||||
|
||||
HWCAP_SVE_BFSCALE
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.B16B16 == 0b0010.
|
||||
|
||||
HWCAP_SVE2P2
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0011.
|
||||
|
||||
HWCAP_SME2P2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0011.
|
||||
|
||||
HWCAP_SME_SBITPERM
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SBitPerm == 0b1.
|
||||
|
||||
HWCAP_SME_AES
|
||||
Functionality implied by ID_AA64SMFR0_EL1.AES == 0b1.
|
||||
|
||||
HWCAP_SME_SFEXPA
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SFEXPA == 0b1.
|
||||
|
||||
HWCAP_SME_STMOP
|
||||
Functionality implied by ID_AA64SMFR0_EL1.STMOP == 0b1.
|
||||
|
||||
HWCAP_SME_SMOP4
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMOP4 == 0b1.
|
||||
|
||||
HWCAP2_DCPODP
|
||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
||||
|
||||
HWCAP2_SVE2
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0001.
|
||||
|
||||
HWCAP2_SVEAES
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||
|
||||
HWCAP2_SVEPMULL
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||
|
||||
HWCAP2_SVEBITPERM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||
|
||||
HWCAP2_SVESHA3
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||
|
||||
HWCAP2_SVESM4
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||
|
||||
HWCAP2_FLAGM2
|
||||
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
|
||||
@ -198,16 +258,20 @@ HWCAP2_FRINT
|
||||
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
|
||||
|
||||
HWCAP2_SVEI8MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEF32MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEF64MM
|
||||
Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
||||
|
||||
HWCAP2_SVEBF16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
||||
|
||||
HWCAP2_I8MM
|
||||
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
|
||||
@ -273,7 +337,8 @@ HWCAP2_EBF16
|
||||
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
|
||||
|
||||
HWCAP2_SVE_EBF16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.BF16 == 0b0010.
|
||||
|
||||
HWCAP2_CSSC
|
||||
Functionality implied by ID_AA64ISAR2_EL1.CSSC == 0b0001.
|
||||
@ -282,7 +347,8 @@ HWCAP2_RPRFM
|
||||
Functionality implied by ID_AA64ISAR2_EL1.RPRFM == 0b0001.
|
||||
|
||||
HWCAP2_SVE2P1
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.SVEver == 0b0010.
|
||||
|
||||
HWCAP2_SME2
|
||||
Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001.
|
||||
@ -309,7 +375,8 @@ HWCAP2_HBC
|
||||
Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
|
||||
|
||||
HWCAP2_SVE_B16B16
|
||||
Functionality implied by ID_AA64ZFR0_EL1.B16B16 == 0b0001.
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001 and
|
||||
ID_AA64ZFR0_EL1.B16B16 == 0b0001.
|
||||
|
||||
HWCAP2_LRCPC3
|
||||
Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0011.
|
||||
|
||||
227
Documentation/arch/arm64/gcs.rst
Normal file
227
Documentation/arch/arm64/gcs.rst
Normal file
@ -0,0 +1,227 @@
|
||||
===============================================
|
||||
Guarded Control Stack support for AArch64 Linux
|
||||
===============================================
|
||||
|
||||
This document outlines briefly the interface provided to userspace by Linux in
|
||||
order to support use of the ARM Guarded Control Stack (GCS) feature.
|
||||
|
||||
This is an outline of the most important features and issues only and not
|
||||
intended to be exhaustive.
|
||||
|
||||
|
||||
|
||||
1. General
|
||||
-----------
|
||||
|
||||
* GCS is an architecture feature intended to provide greater protection
|
||||
against return oriented programming (ROP) attacks and to simplify the
|
||||
implementation of features that need to collect stack traces such as
|
||||
profiling.
|
||||
|
||||
* When GCS is enabled a separate guarded control stack is maintained by the
|
||||
PE which is writeable only through specific GCS operations. This
|
||||
stores the call stack only, when a procedure call instruction is
|
||||
performed the current PC is pushed onto the GCS and on RET the
|
||||
address in the LR is verified against that on the top of the GCS.
|
||||
|
||||
* When active the current GCS pointer is stored in the system register
|
||||
GCSPR_EL0. This is readable by userspace but can only be updated
|
||||
via specific GCS instructions.
|
||||
|
||||
* The architecture provides instructions for switching between guarded
|
||||
control stacks with checks to ensure that the new stack is a valid
|
||||
target for switching.
|
||||
|
||||
* The functionality of GCS is similar to that provided by the x86 Shadow
|
||||
Stack feature, due to sharing of userspace interfaces the ABI refers to
|
||||
shadow stacks rather than GCS.
|
||||
|
||||
* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector
|
||||
AT_HWCAP entry.
|
||||
|
||||
* GCS is enabled per thread. While there is support for disabling GCS
|
||||
at runtime this should be done with great care.
|
||||
|
||||
* GCS memory access faults are reported as normal memory access faults.
|
||||
|
||||
* GCS specific errors (those reported with EC 0x2d) will be reported as
|
||||
SIGSEGV with a si_code of SEGV_CPERR (control protection error).
|
||||
|
||||
* GCS is supported only for AArch64.
|
||||
|
||||
* On systems where GCS is supported GCSPR_EL0 is always readable by EL0
|
||||
regardless of the GCS configuration for the thread.
|
||||
|
||||
* The architecture supports enabling GCS without verifying that return values
|
||||
in LR match those in the GCS, the LR will be ignored. This is not supported
|
||||
by Linux.
|
||||
|
||||
|
||||
|
||||
2. Enabling and disabling Guarded Control Stacks
|
||||
-------------------------------------------------
|
||||
|
||||
* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS
|
||||
prctl(), this takes a single flags argument specifying which GCS features
|
||||
should be used.
|
||||
|
||||
* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack
|
||||
and enables GCS for the thread, enabling the functionality controlled by
|
||||
GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}.
|
||||
|
||||
* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled
|
||||
by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes.
|
||||
|
||||
* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled
|
||||
by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack.
|
||||
|
||||
* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL.
|
||||
|
||||
* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same
|
||||
values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the
|
||||
status of the specified GCS mode bits will be rejected.
|
||||
|
||||
* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows
|
||||
userspace to prevent changes to any future features.
|
||||
|
||||
* There is no support for a process to remove a lock that has been set for
|
||||
it.
|
||||
|
||||
* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the
|
||||
thread that called them, any other running threads will be unaffected.
|
||||
|
||||
* New threads inherit the GCS configuration of the thread that created them.
|
||||
|
||||
* GCS is disabled on exec().
|
||||
|
||||
* The current GCS configuration for a thread may be read with the
|
||||
PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that
|
||||
are passed to PR_SET_SHADOW_STACK_STATUS.
|
||||
|
||||
* If GCS is disabled for a thread after having previously been enabled then
|
||||
the stack will remain allocated for the lifetime of the thread. At present
|
||||
any attempt to reenable GCS for the thread will be rejected, this may be
|
||||
revisited in future.
|
||||
|
||||
* It should be noted that since enabling GCS will result in GCS becoming
|
||||
active immediately it is not normally possible to return from the function
|
||||
that invoked the prctl() that enabled GCS. It is expected that the normal
|
||||
usage will be that GCS is enabled very early in execution of a program.
|
||||
|
||||
|
||||
|
||||
3. Allocation of Guarded Control Stacks
|
||||
----------------------------------------
|
||||
|
||||
* When GCS is enabled for a thread a new Guarded Control Stack will be
|
||||
allocated for it of half the standard stack size or 2 gigabytes,
|
||||
whichever is smaller.
|
||||
|
||||
* When a new thread is created by a thread which has GCS enabled then a
|
||||
new Guarded Control Stack will be allocated for the new thread with
|
||||
half the size of the standard stack.
|
||||
|
||||
* When a stack is allocated by enabling GCS or during thread creation then
|
||||
the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will
|
||||
be set to point to the address of this 0 value, this can be used to
|
||||
detect the top of the stack.
|
||||
|
||||
* Additional Guarded Control Stacks can be allocated using the
|
||||
map_shadow_stack() system call.
|
||||
|
||||
* Stacks allocated using map_shadow_stack() can optionally have an end of
|
||||
stack marker and cap placed at the top of the stack. If the flag
|
||||
SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack,
|
||||
if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8
|
||||
bytes of the stack and if it is specified then the cap will be the next
|
||||
8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is
|
||||
valid since the marker is all bits 0 it has no observable effect.
|
||||
|
||||
* Stacks allocated using map_shadow_stack() must have a size which is a
|
||||
multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned.
|
||||
|
||||
* An address can be specified to map_shadow_stack(), if one is provided then
|
||||
it must be aligned to a page boundary.
|
||||
|
||||
* When a thread is freed the Guarded Control Stack initially allocated for
|
||||
that thread will be freed. Note carefully that if the stack has been
|
||||
switched this may not be the stack currently in use by the thread.
|
||||
|
||||
|
||||
4. Signal handling
|
||||
--------------------
|
||||
|
||||
* A new signal frame record gcs_context encodes the current GCS mode and
|
||||
pointer for the interrupted context on signal delivery. This will always
|
||||
be present on systems that support GCS.
|
||||
|
||||
* The record contains a flag field which reports the current GCS configuration
|
||||
for the interrupted context as PR_GET_SHADOW_STACK_STATUS would.
|
||||
|
||||
* The signal handler is run with the same GCS configuration as the interrupted
|
||||
context.
|
||||
|
||||
* When GCS is enabled for the interrupted thread a signal handling specific
|
||||
GCS cap token will be written to the GCS, this is an architectural GCS cap
|
||||
with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the
|
||||
signal frame will point to this cap token.
|
||||
|
||||
* The signal handler will use the same GCS as the interrupted context.
|
||||
|
||||
* When GCS is enabled on signal entry a frame with the address of the signal
|
||||
return handler will be pushed onto the GCS, allowing return from the signal
|
||||
handler via RET as normal. This will not be reported in the gcs_context in
|
||||
the signal frame.
|
||||
|
||||
|
||||
5. Signal return
|
||||
-----------------
|
||||
|
||||
When returning from a signal handler:
|
||||
|
||||
* If there is a gcs_context record in the signal frame then the GCS flags
|
||||
and GCSPR_EL0 will be restored from that context prior to further
|
||||
validation.
|
||||
|
||||
* If there is no gcs_context record in the signal frame then the GCS
|
||||
configuration will be unchanged.
|
||||
|
||||
* If GCS is enabled on return from a signal handler then GCSPR_EL0 must
|
||||
point to a valid GCS signal cap record, this will be popped from the
|
||||
GCS prior to signal return.
|
||||
|
||||
* If the GCS configuration is locked when returning from a signal then any
|
||||
attempt to change the GCS configuration will be treated as an error. This
|
||||
is true even if GCS was not enabled prior to signal entry.
|
||||
|
||||
* GCS may be disabled via signal return but any attempt to enable GCS via
|
||||
signal return will be rejected.
|
||||
|
||||
|
||||
6. ptrace extensions
|
||||
---------------------
|
||||
|
||||
* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and
|
||||
PTRACE_SETREGSET.
|
||||
|
||||
* The GCS mode, including enable and disable, may be configured via ptrace.
|
||||
If GCS is enabled via ptrace no new GCS will be allocated for the thread.
|
||||
|
||||
* Configuration via ptrace ignores locking of GCS mode bits.
|
||||
|
||||
|
||||
7. ELF coredump extensions
|
||||
---------------------------
|
||||
|
||||
* NT_ARM_GCS notes will be added to each coredump for each thread of the
|
||||
dumped process. The contents will be equivalent to the data that would
|
||||
have been read if a PTRACE_GETREGSET of the corresponding type were
|
||||
executed for each thread when the coredump was generated.
|
||||
|
||||
|
||||
|
||||
8. /proc extensions
|
||||
--------------------
|
||||
|
||||
* Guarded Control Stack pages will include "ss" in their VmFlags in
|
||||
/proc/<pid>/smaps.
|
||||
@ -10,16 +10,19 @@ ARM64 Architecture
|
||||
acpi_object_usage
|
||||
amu
|
||||
arm-acpi
|
||||
arm-cca
|
||||
asymmetric-32bit
|
||||
booting
|
||||
cpu-feature-registers
|
||||
cpu-hotplug
|
||||
elf_hwcaps
|
||||
gcs
|
||||
hugetlbpage
|
||||
kdump
|
||||
legacy_instructions
|
||||
memory
|
||||
memory-tagging-extension
|
||||
mops
|
||||
perf
|
||||
pointer-authentication
|
||||
ptdump
|
||||
|
||||
44
Documentation/arch/arm64/mops.rst
Normal file
44
Documentation/arch/arm64/mops.rst
Normal file
@ -0,0 +1,44 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Memory copy/set instructions (MOPS)
|
||||
===================================
|
||||
|
||||
A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
|
||||
instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
|
||||
|
||||
A main or epilogue instruction can take a MOPS exception for various reasons,
|
||||
for example when a task is migrated to a CPU with a different MOPS
|
||||
implementation, or when the instruction's alignment and size requirements are
|
||||
not met. The software exception handler is then expected to reset the registers
|
||||
and restart execution from the prologue instruction. Normally this is handled
|
||||
by the kernel.
|
||||
|
||||
For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
|
||||
the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
|
||||
|
||||
.. _arm64_mops_hyp:
|
||||
|
||||
Hypervisor requirements
|
||||
-----------------------
|
||||
|
||||
A hypervisor running a Linux guest must handle all MOPS exceptions from the
|
||||
guest kernel, as Linux may not be able to handle the exception at all times.
|
||||
For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
|
||||
to another physical CPU with a different MOPS implementation.
|
||||
|
||||
To do this, the hypervisor must:
|
||||
|
||||
- Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
|
||||
|
||||
- Have an exception handler that implements the algorithm from the Arm ARM
|
||||
rules CNTMJ and MWFQH.
|
||||
|
||||
- Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
|
||||
potential step of the current instruction.
|
||||
|
||||
Note: Clearing PSTATE.SS is needed so that a single step exception is taken
|
||||
on the next instruction (the prologue instruction). Otherwise prologue
|
||||
would get silently stepped over and the single step exception taken on the
|
||||
main instruction. Note that if the guest instruction is not being stepped
|
||||
then clearing PSTATE.SS has no effect.
|
||||
@ -57,6 +57,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Ampere | AmpereOne AC04 | AC04_CPU_23 | AMPERE_ERRATUM_AC04_CPU_23 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
@ -198,7 +200,8 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-500 | #841119,826419 | N/A |
|
||||
| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
|
||||
| | | #562869,1047329 | |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| ARM | MMU-600 | #1076982,1209401| N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
@ -255,8 +258,11 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A |
|
||||
| | ,11} SMMU PMCG | | |
|
||||
| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A |
|
||||
| | ,10C,11} | | |
|
||||
| | SMMU PMCG | | |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
|
||||
|
||||
@ -346,6 +346,10 @@ The regset data starts with struct user_za_header, containing:
|
||||
|
||||
* Writes to NT_ARM_ZT will set PSTATE.ZA to 1.
|
||||
|
||||
* If any register data is provided along with SME_PT_VL_ONEXEC then the
|
||||
registers data will be interpreted with the current vector length, not
|
||||
the vector length configured for use on exec.
|
||||
|
||||
|
||||
8. ELF coredump extensions
|
||||
---------------------------
|
||||
|
||||
@ -402,6 +402,10 @@ The regset data starts with struct user_sve_header, containing:
|
||||
streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode
|
||||
if the target was not in streaming mode.
|
||||
|
||||
* If any register data is provided along with SVE_PT_VL_ONEXEC then the
|
||||
registers data will be interpreted with the current vector length, not
|
||||
the vector length configured for use on exec.
|
||||
|
||||
* The effect of writing a partial, incomplete payload is unspecified.
|
||||
|
||||
|
||||
|
||||
@ -244,7 +244,7 @@ information about the interrupt from the irb parameter.
|
||||
--------------------
|
||||
|
||||
The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
|
||||
devices, like lcs or ctc.
|
||||
devices, like qeth or ctc.
|
||||
|
||||
The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
|
||||
this attributes creates a ccwgroup device consisting of these ccw devices (if
|
||||
|
||||
368
Documentation/arch/x86/amd-debugging.rst
Normal file
368
Documentation/arch/x86/amd-debugging.rst
Normal file
@ -0,0 +1,368 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Debugging AMD Zen systems
|
||||
+++++++++++++++++++++++++
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes techniques that are useful for debugging issues with
|
||||
AMD Zen systems. It is intended for use by developers and technical users
|
||||
to help identify and resolve issues.
|
||||
|
||||
S3 vs s2idle
|
||||
============
|
||||
|
||||
On AMD systems, it's not possible to simultaneously support suspend-to-RAM (S3)
|
||||
and suspend-to-idle (s2idle). To confirm which mode your system supports you
|
||||
can look at ``cat /sys/power/mem_sleep``. If it shows ``s2idle [deep]`` then
|
||||
*S3* is supported. If it shows ``[s2idle]`` then *s2idle* is
|
||||
supported.
|
||||
|
||||
On systems that support *S3*, the firmware will be utilized to put all hardware into
|
||||
the appropriate low power state.
|
||||
|
||||
On systems that support *s2idle*, the kernel will be responsible for transitioning devices
|
||||
into the appropriate low power state. When all devices are in the appropriate low
|
||||
power state, the hardware will transition into a hardware sleep state.
|
||||
|
||||
After a suspend cycle you can tell how much time was spent in a hardware sleep
|
||||
state by looking at ``cat /sys/power/suspend_stats/last_hw_sleep``.
|
||||
|
||||
This flowchart explains how the AMD s2idle suspend flow works.
|
||||
|
||||
.. kernel-figure:: suspend.svg
|
||||
|
||||
This flowchart explains how the amd s2idle resume flow works.
|
||||
|
||||
.. kernel-figure:: resume.svg
|
||||
|
||||
s2idle debugging tool
|
||||
=====================
|
||||
|
||||
As there are a lot of places that problems can occur, a debugging tool has been
|
||||
created at
|
||||
`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
|
||||
that can help test for common problems and offer suggestions.
|
||||
|
||||
If you have an s2idle issue, it's best to start with this and follow instructions
|
||||
from its findings. If you continue to have an issue, raise a bug with the
|
||||
report generated from this script to
|
||||
`drm/amd gitlab <https://gitlab.freedesktop.org/drm/amd/-/issues/new?issuable_template=s2idle_BUG_TEMPLATE>`_.
|
||||
|
||||
Spurious s2idle wakeups from an IRQ
|
||||
===================================
|
||||
|
||||
Spurious wakeups will generally have an IRQ set to ``/sys/power/pm_wakeup_irq``.
|
||||
This can be matched to ``/proc/interrupts`` to determine what device woke the system.
|
||||
|
||||
If this isn't enough to debug the problem, then the following sysfs files
|
||||
can be set to add more verbosity to the wakeup process: ::
|
||||
|
||||
# echo 1 | sudo tee /sys/power/pm_debug_messages
|
||||
# echo 1 | sudo tee /sys/power/pm_print_times
|
||||
|
||||
After making those changes, the kernel will display messages that can
|
||||
be traced back to kernel s2idle loop code as well as display any active
|
||||
GPIO sources while waking up.
|
||||
|
||||
If the wakeup is caused by the ACPI SCI, additional ACPI debugging may be
|
||||
needed. These commands can enable additional trace data: ::
|
||||
|
||||
# echo enable | sudo tee /sys/module/acpi/parameters/trace_state
|
||||
# echo 1 | sudo tee /sys/module/acpi/parameters/aml_debug_output
|
||||
# echo 0x0800000f | sudo tee /sys/module/acpi/parameters/debug_level
|
||||
# echo 0xffff0000 | sudo tee /sys/module/acpi/parameters/debug_layer
|
||||
|
||||
Spurious s2idle wakeups from a GPIO
|
||||
===================================
|
||||
|
||||
If a GPIO is active when waking up the system ideally you would look at the
|
||||
schematic to determine what device it is associated with. If the schematic
|
||||
is not available, another tactic is to look at the ACPI _EVT() entry
|
||||
to determine what device is notified when that GPIO is active.
|
||||
|
||||
For a hypothetical example, say that GPIO 59 woke up the system. You can
|
||||
look at the SSDT to determine what device is notified when GPIO 59 is active.
|
||||
|
||||
First convert the GPIO number into hex. ::
|
||||
|
||||
$ python3 -c "print(hex(59))"
|
||||
0x3b
|
||||
|
||||
Next determine which ACPI table has the ``_EVT`` entry. For example: ::
|
||||
|
||||
$ sudo grep EVT /sys/firmware/acpi/tables/SSDT*
|
||||
grep: /sys/firmware/acpi/tables/SSDT27: binary file matches
|
||||
|
||||
Decode this table::
|
||||
|
||||
$ sudo cp /sys/firmware/acpi/tables/SSDT27 .
|
||||
$ sudo iasl -d SSDT27
|
||||
|
||||
Then look at the table and find the matching entry for GPIO 0x3b. ::
|
||||
|
||||
Case (0x3B)
|
||||
{
|
||||
M000 (0x393B)
|
||||
M460 (" Notify (\\_SB.PCI0.GP17.XHC1, 0x02)\n", Zero, Zero, Zero, Zero, Zero, Zero)
|
||||
Notify (\_SB.PCI0.GP17.XHC1, 0x02) // Device Wake
|
||||
}
|
||||
|
||||
You can see in this case that the device ``\_SB.PCI0.GP17.XHC1`` is notified
|
||||
when GPIO 59 is active. It's obvious this is an XHCI controller, but to go a
|
||||
step further you can figure out which XHCI controller it is by matching it to
|
||||
ACPI.::
|
||||
|
||||
$ grep "PCI0.GP17.XHC1" /sys/bus/acpi/devices/*/path
|
||||
/sys/bus/acpi/devices/device:2d/path:\_SB_.PCI0.GP17.XHC1
|
||||
/sys/bus/acpi/devices/device:2e/path:\_SB_.PCI0.GP17.XHC1.RHUB
|
||||
/sys/bus/acpi/devices/device:2f/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1
|
||||
/sys/bus/acpi/devices/device:30/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM0
|
||||
/sys/bus/acpi/devices/device:31/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT1.CAM1
|
||||
/sys/bus/acpi/devices/device:32/path:\_SB_.PCI0.GP17.XHC1.RHUB.PRT2
|
||||
/sys/bus/acpi/devices/LNXPOWER:0d/path:\_SB_.PCI0.GP17.XHC1.PWRS
|
||||
|
||||
Here you can see it matches to ``device:2d``. Look at the ``physical_node``
|
||||
to determine what PCI device that actually is. ::
|
||||
|
||||
$ ls -l /sys/bus/acpi/devices/device:2d/physical_node
|
||||
lrwxrwxrwx 1 root root 0 Feb 12 13:22 /sys/bus/acpi/devices/device:2d/physical_node -> ../../../../../pci0000:00/0000:00:08.1/0000:c2:00.4
|
||||
|
||||
So there you have it: the PCI device associated with this GPIO wakeup was ``0000:c2:00.4``.
|
||||
|
||||
The ``amd_s2idle.py`` script will capture most of these artifacts for you.
|
||||
|
||||
s2idle PM debug messages
|
||||
========================
|
||||
|
||||
During the s2idle flow on AMD systems, the ACPI LPS0 driver is responsible
|
||||
to check all uPEP constraints. Failing uPEP constraints does not prevent
|
||||
s0i3 entry. This means that if some constraints are not met, it is possible
|
||||
the kernel may attempt to enter s2idle even if there are some known issues.
|
||||
|
||||
To activate PM debugging, either specify ``pm_debug_messagess`` kernel
|
||||
command-line option at boot or write to ``/sys/power/pm_debug_messages``.
|
||||
Unmet constraints will be displayed in the kernel log and can be
|
||||
viewed by logging tools that process kernel ring buffer like ``dmesg`` or
|
||||
``journalctl``."
|
||||
|
||||
If the system freezes on entry/exit before these messages are flushed, a
|
||||
useful debugging tactic is to unbind the ``amd_pmc`` driver to prevent
|
||||
notification to the platform to start s0i3 entry. This will stop the
|
||||
system from freezing on entry or exit and let you view all the failed
|
||||
constraints. ::
|
||||
|
||||
cd /sys/bus/platform/drivers/amd_pmc
|
||||
ls | grep AMD | sudo tee unbind
|
||||
|
||||
After doing this, run the suspend cycle and look specifically for errors around: ::
|
||||
|
||||
ACPI: LPI: Constraint not met; min power state:%s current power state:%s
|
||||
|
||||
Historical examples of s2idle issues
|
||||
====================================
|
||||
|
||||
To help understand the types of issues that can occur and how to debug them,
|
||||
here are some historical examples of s2idle issues that have been resolved.
|
||||
|
||||
Core offlining
|
||||
--------------
|
||||
An end user had reported that taking a core offline would prevent the system
|
||||
from properly entering s0i3. This was debugged using internal AMD tools
|
||||
to capture and display a stream of metrics from the hardware showing what changed
|
||||
when a core was offlined. It was determined that the hardware didn't get
|
||||
notification the offline cores were in the deepest state, and so it prevented
|
||||
CPU from going into the deepest state. The issue was debugged to a missing
|
||||
command to put cores into C3 upon offline.
|
||||
|
||||
`commit d6b88ce2eb9d2 ("ACPI: processor idle: Allow playing dead in C3 state") <https://git.kernel.org/torvalds/c/d6b88ce2eb9d2>`_
|
||||
|
||||
Corruption after resume
|
||||
-----------------------
|
||||
A big problem that occurred with Rembrandt was that there was graphical
|
||||
corruption after resume. This happened because of a misalignment of PSP
|
||||
and driver responsibility. The PSP will save and restore DMCUB, but the
|
||||
driver assumed it needed to reset DMCUB on resume.
|
||||
This actually was a misalignment for earlier silicon as well, but was not
|
||||
observed.
|
||||
|
||||
`commit 79d6b9351f086 ("drm/amd/display: Don't reinitialize DMCUB on s0ix resume") <https://git.kernel.org/torvalds/c/79d6b9351f086>`_
|
||||
|
||||
Back to Back suspends fail
|
||||
--------------------------
|
||||
When using a wakeup source that triggers the IRQ to wakeup, a bug in the
|
||||
pinctrl-amd driver may capture the wrong state of the IRQ and prevent the
|
||||
system going back to sleep properly.
|
||||
|
||||
`commit b8c824a869f22 ("pinctrl: amd: Don't save/restore interrupt status and wake status bits") <https://git.kernel.org/torvalds/c/b8c824a869f22>`_
|
||||
|
||||
Spurious timer based wakeup after 5 minutes
|
||||
-------------------------------------------
|
||||
The HPET was being used to program the wakeup source for the system, however
|
||||
this was causing a spurious wakeup after 5 minutes. The correct alarm to use
|
||||
was the ACPI alarm.
|
||||
|
||||
`commit 3d762e21d5637 ("rtc: cmos: Use ACPI alarm for non-Intel x86 systems too") <https://git.kernel.org/torvalds/c/3d762e21d5637>`_
|
||||
|
||||
Disk disappears after resume
|
||||
----------------------------
|
||||
After resuming from s2idle, the NVME disk would disappear. This was due to the
|
||||
BIOS not specifying the _DSD StorageD3Enable property. This caused the NVME
|
||||
driver not to put the disk into the expected state at suspend and to fail
|
||||
on resume.
|
||||
|
||||
`commit e79a10652bbd3 ("ACPI: x86: Force StorageD3Enable on more products") <https://git.kernel.org/torvalds/c/e79a10652bbd3>`_
|
||||
|
||||
Spurious IRQ1
|
||||
-------------
|
||||
A number of Renoir, Lucienne, Cezanne, & Barcelo platforms have a
|
||||
platform firmware bug where IRQ1 is triggered during s0i3 resume.
|
||||
|
||||
This was fixed in the platform firmware, but a number of systems didn't
|
||||
receive any more platform firmware updates.
|
||||
|
||||
`commit 8e60615e89321 ("platform/x86/amd: pmc: Disable IRQ1 wakeup for RN/CZN") <https://git.kernel.org/torvalds/c/8e60615e89321>`_
|
||||
|
||||
Hardware timeout
|
||||
----------------
|
||||
The hardware performs many actions besides accepting the values from
|
||||
amd-pmc driver. As the communication path with the hardware is a mailbox,
|
||||
it's possible that it might not respond quickly enough.
|
||||
This issue manifested as a failure to suspend: ::
|
||||
|
||||
PM: dpm_run_callback(): acpi_subsys_suspend_noirq+0x0/0x50 returns -110
|
||||
amd_pmc AMDI0005:00: PM: failed to suspend noirq: error -110
|
||||
|
||||
The timing problem was identified by comparing the values of the idle mask.
|
||||
|
||||
`commit 3c3c8e88c8712 ("platform/x86: amd-pmc: Increase the response register timeout") <https://git.kernel.org/torvalds/c/3c3c8e88c8712>`_
|
||||
|
||||
Failed to reach hardware sleep state with panel on
|
||||
--------------------------------------------------
|
||||
On some Strix systems certain panels were observed to block the system from
|
||||
entering a hardware sleep state if the internal panel was on during the sequence.
|
||||
|
||||
Even though the panel got turned off during suspend it exposed a timing problem
|
||||
where an interrupt caused the display hardware to wake up and block low power
|
||||
state entry.
|
||||
|
||||
`commit 40b8c14936bd2 ("drm/amd/display: Disable unneeded hpd interrupts during dm_init") <https://git.kernel.org/torvalds/c/40b8c14936bd2>`_
|
||||
|
||||
Runtime power consumption issues
|
||||
================================
|
||||
|
||||
Runtime power consumption is influenced by many factors, including but not
|
||||
limited to the configuration of the PCIe Active State Power Management (ASPM),
|
||||
the display brightness, the EPP policy of the CPU, and the power management
|
||||
of the devices.
|
||||
|
||||
ASPM
|
||||
----
|
||||
For the best runtime power consumption, ASPM should be programmed as intended
|
||||
by the BIOS from the hardware vendor. To accomplish this the Linux kernel
|
||||
should be compiled with ``CONFIG_PCIEASPM_DEFAULT`` set to ``y`` and the
|
||||
sysfs file ``/sys/module/pcie_aspm/parameters/policy`` should not be modified.
|
||||
|
||||
Most notably, if L1.2 is not configured properly for any devices, the SoC
|
||||
will not be able to enter the deepest idle state.
|
||||
|
||||
EPP Policy
|
||||
----------
|
||||
The ``energy_performance_preference`` sysfs file can be used to set a bias
|
||||
of efficiency or performance for a CPU. This has a direct relationship on
|
||||
the battery life when more heavily biased towards performance.
|
||||
|
||||
|
||||
BIOS debug messages
|
||||
===================
|
||||
|
||||
Most OEM machines don't have a serial UART for outputting kernel or BIOS
|
||||
debug messages. However BIOS debug messages are useful for understanding
|
||||
both BIOS bugs and bugs with the Linux kernel drivers that call BIOS AML.
|
||||
|
||||
As the BIOS on most OEM AMD systems are based off an AMD reference BIOS,
|
||||
the infrastructure used for exporting debugging messages is often the same
|
||||
as AMD reference BIOS.
|
||||
|
||||
Manually Parsing
|
||||
----------------
|
||||
There is generally an ACPI method ``\M460`` that different paths of the AML
|
||||
will call to emit a message to the BIOS serial log. This method takes
|
||||
7 arguments, with the first being a string and the rest being optional
|
||||
integers::
|
||||
|
||||
Method (M460, 7, Serialized)
|
||||
|
||||
Here is an example of a string that BIOS AML may call out using ``\M460``::
|
||||
|
||||
M460 (" OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", DADR, Arg0, Arg1, PCSA, Zero, Zero)
|
||||
|
||||
Normally when executed, the ``\M460`` method would populate the additional
|
||||
arguments into the string. In order to get these messages from the Linux
|
||||
kernel a hook has been added into ACPICA that can capture the *arguments*
|
||||
sent to ``\M460`` and print them to the kernel ring buffer.
|
||||
For example the following message could be emitted into kernel ring buffer::
|
||||
|
||||
extrace-0174 ex_trace_args : " OEM-ASL-PCIe Address (0x%X)._REG (%d %d) PCSA = %d\n", ec106000, 2, 1, 1, 0, 0
|
||||
|
||||
In order to get these messages, you need to compile with ``CONFIG_ACPI_DEBUG``
|
||||
and then turn on the following ACPICA tracing parameters.
|
||||
This can be done either on the kernel command line or at runtime:
|
||||
|
||||
* ``acpi.trace_method_name=\M460``
|
||||
* ``acpi.trace_state=method``
|
||||
|
||||
NOTE: These can be very noisy at bootup. If you turn these parameters on
|
||||
the kernel command, please also consider turning up ``CONFIG_LOG_BUF_SHIFT``
|
||||
to a larger size such as 17 to avoid losing early boot messages.
|
||||
|
||||
Tool assisted Parsing
|
||||
---------------------
|
||||
As mentioned above, parsing by hand can be tedious, especially with a lot of
|
||||
messages. To help with this, a tool has been created at
|
||||
`amd-debug-tools <https://git.kernel.org/pub/scm/linux/kernel/git/superm1/amd-debug-tools.git/about/>`_
|
||||
to help parse the messages.
|
||||
|
||||
Random reboot issues
|
||||
====================
|
||||
|
||||
When a random reboot occurs, the high-level reason for the reboot is stored
|
||||
in a register that will persist onto the next boot.
|
||||
|
||||
There are 6 classes of reasons for the reboot:
|
||||
* Software induced
|
||||
* Power state transition
|
||||
* Pin induced
|
||||
* Hardware induced
|
||||
* Remote reset
|
||||
* Internal CPU event
|
||||
|
||||
.. csv-table::
|
||||
:header: "Bit", "Type", "Reason"
|
||||
:align: left
|
||||
|
||||
"0", "Pin", "thermal pin BP_THERMTRIP_L was tripped"
|
||||
"1", "Pin", "power button was pressed for 4 seconds"
|
||||
"2", "Pin", "shutdown pin was tripped"
|
||||
"4", "Remote", "remote ASF power off command was received"
|
||||
"9", "Internal", "internal CPU thermal limit was tripped"
|
||||
"16", "Pin", "system reset pin BP_SYS_RST_L was tripped"
|
||||
"17", "Software", "software issued PCI reset"
|
||||
"18", "Software", "software wrote 0x4 to reset control register 0xCF9"
|
||||
"19", "Software", "software wrote 0x6 to reset control register 0xCF9"
|
||||
"20", "Software", "software wrote 0xE to reset control register 0xCF9"
|
||||
"21", "ACPI-state", "ACPI power state transition occurred"
|
||||
"22", "Pin", "keyboard reset pin KB_RST_L was tripped"
|
||||
"23", "Internal", "internal CPU shutdown event occurred"
|
||||
"24", "Hardware", "system failed to boot before failed boot timer expired"
|
||||
"25", "Hardware", "hardware watchdog timer expired"
|
||||
"26", "Remote", "remote ASF reset command was received"
|
||||
"27", "Internal", "an uncorrected error caused a data fabric sync flood event"
|
||||
"29", "Internal", "FCH and MP1 failed warm reset handshake"
|
||||
"30", "Internal", "a parity error occurred"
|
||||
"31", "Internal", "a software sync flood event occurred"
|
||||
|
||||
This information is read by the kernel at bootup and printed into
|
||||
the syslog. When a random reboot occurs this message can be helpful
|
||||
to determine the next component to debug.
|
||||
@ -25,6 +25,7 @@ x86-specific Documentation
|
||||
shstk
|
||||
iommu
|
||||
intel_txt
|
||||
amd-debugging
|
||||
amd-memory-encryption
|
||||
amd_hsmp
|
||||
tdx
|
||||
|
||||
@ -93,7 +93,7 @@ enters a C-state.
|
||||
|
||||
The kernel provides a function to invoke the buffer clearing:
|
||||
|
||||
mds_clear_cpu_buffers()
|
||||
x86_clear_cpu_buffers()
|
||||
|
||||
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
|
||||
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
|
||||
@ -185,9 +185,9 @@ Mitigation points
|
||||
idle clearing would be a window dressing exercise and is therefore not
|
||||
activated.
|
||||
|
||||
The invocation is controlled by the static key mds_idle_clear which is
|
||||
switched depending on the chosen mitigation mode and the SMT state of
|
||||
the system.
|
||||
The invocation is controlled by the static key cpu_buf_idle_clear which is
|
||||
switched depending on the chosen mitigation mode and the SMT state of the
|
||||
system.
|
||||
|
||||
The buffer clear is only invoked before entering the C-State to prevent
|
||||
that stale data from the idling CPU from spilling to the Hyper-Thread
|
||||
|
||||
4
Documentation/arch/x86/resume.svg
Normal file
4
Documentation/arch/x86/resume.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 350 KiB |
4
Documentation/arch/x86/suspend.svg
Normal file
4
Documentation/arch/x86/suspend.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 318 KiB |
@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
|
||||
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
|
||||
"core_id."
|
||||
|
||||
- topology_logical_core_id();
|
||||
|
||||
The logical core ID to which a thread belongs.
|
||||
|
||||
|
||||
|
||||
System topology examples
|
||||
|
||||
@ -39,13 +39,16 @@ blkdevparts=<blkdev-def>[;<blkdev-def>]
|
||||
create a link to block device partition with the name "PARTNAME".
|
||||
User space application can access partition by partition name.
|
||||
|
||||
ro
|
||||
read-only. Flag the partition as read-only.
|
||||
|
||||
Example:
|
||||
|
||||
eMMC disk names are "mmcblk0" and "mmcblk0boot0".
|
||||
|
||||
bootargs::
|
||||
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
|
||||
'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)'
|
||||
|
||||
dmesg::
|
||||
|
||||
|
||||
@ -77,10 +77,10 @@ Basic design
|
||||
============
|
||||
|
||||
We introduce ``struct blk_crypto_key`` to represent an inline encryption key and
|
||||
how it will be used. This includes the actual bytes of the key; the size of the
|
||||
key; the algorithm and data unit size the key will be used with; and the number
|
||||
of bytes needed to represent the maximum data unit number the key will be used
|
||||
with.
|
||||
how it will be used. This includes the type of the key (raw or
|
||||
hardware-wrapped); the actual bytes of the key; the size of the key; the
|
||||
algorithm and data unit size the key will be used with; and the number of bytes
|
||||
needed to represent the maximum data unit number the key will be used with.
|
||||
|
||||
We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It
|
||||
contains a data unit number and a pointer to a blk_crypto_key. We add pointers
|
||||
@ -301,3 +301,250 @@ kernel will pretend that the device does not support hardware inline encryption
|
||||
When the crypto API fallback is enabled, this means that all bios with and
|
||||
encryption context will use the fallback, and IO will complete as usual. When
|
||||
the fallback is disabled, a bio with an encryption context will be failed.
|
||||
|
||||
.. _hardware_wrapped_keys:
|
||||
|
||||
Hardware-wrapped keys
|
||||
=====================
|
||||
|
||||
Motivation and threat model
|
||||
---------------------------
|
||||
|
||||
Linux storage encryption (dm-crypt, fscrypt, eCryptfs, etc.) traditionally
|
||||
relies on the raw encryption key(s) being present in kernel memory so that the
|
||||
encryption can be performed. This traditionally isn't seen as a problem because
|
||||
the key(s) won't be present during an offline attack, which is the main type of
|
||||
attack that storage encryption is intended to protect from.
|
||||
|
||||
However, there is an increasing desire to also protect users' data from other
|
||||
types of attacks (to the extent possible), including:
|
||||
|
||||
- Cold boot attacks, where an attacker with physical access to a system suddenly
|
||||
powers it off, then immediately dumps the system memory to extract recently
|
||||
in-use encryption keys, then uses these keys to decrypt user data on-disk.
|
||||
|
||||
- Online attacks where the attacker is able to read kernel memory without fully
|
||||
compromising the system, followed by an offline attack where any extracted
|
||||
keys can be used to decrypt user data on-disk. An example of such an online
|
||||
attack would be if the attacker is able to run some code on the system that
|
||||
exploits a Meltdown-like vulnerability but is unable to escalate privileges.
|
||||
|
||||
- Online attacks where the attacker fully compromises the system, but their data
|
||||
exfiltration is significantly time-limited and/or bandwidth-limited, so in
|
||||
order to completely exfiltrate the data they need to extract the encryption
|
||||
keys to use in a later offline attack.
|
||||
|
||||
Hardware-wrapped keys are a feature of inline encryption hardware that is
|
||||
designed to protect users' data from the above attacks (to the extent possible),
|
||||
without introducing limitations such as a maximum number of keys.
|
||||
|
||||
Note that it is impossible to **fully** protect users' data from these attacks.
|
||||
Even in the attacks where the attacker "just" gets read access to kernel memory,
|
||||
they can still extract any user data that is present in memory, including
|
||||
plaintext pagecache pages of encrypted files. The focus here is just on
|
||||
protecting the encryption keys, as those instantly give access to **all** user
|
||||
data in any following offline attack, rather than just some of it (where which
|
||||
data is included in that "some" might not be controlled by the attacker).
|
||||
|
||||
Solution overview
|
||||
-----------------
|
||||
|
||||
Inline encryption hardware typically has "keyslots" into which software can
|
||||
program keys for the hardware to use; the contents of keyslots typically can't
|
||||
be read back by software. As such, the above security goals could be achieved
|
||||
if the kernel simply erased its copy of the key(s) after programming them into
|
||||
keyslot(s) and thereafter only referred to them via keyslot number.
|
||||
|
||||
However, that naive approach runs into a couple problems:
|
||||
|
||||
- It limits the number of unlocked keys to the number of keyslots, which
|
||||
typically is a small number. In cases where there is only one encryption key
|
||||
system-wide (e.g., a full-disk encryption key), that can be tolerable.
|
||||
However, in general there can be many logged-in users with many different
|
||||
keys, and/or many running applications with application-specific encrypted
|
||||
storage areas. This is especially true if file-based encryption (e.g.
|
||||
fscrypt) is being used.
|
||||
|
||||
- Inline crypto engines typically lose the contents of their keyslots if the
|
||||
storage controller (usually UFS or eMMC) is reset. Resetting the storage
|
||||
controller is a standard error recovery procedure that is executed if certain
|
||||
types of storage errors occur, and such errors can occur at any time.
|
||||
Therefore, when inline crypto is being used, the operating system must always
|
||||
be ready to reprogram the keyslots without user intervention.
|
||||
|
||||
Thus, it is important for the kernel to still have a way to "remind" the
|
||||
hardware about a key, without actually having the raw key itself.
|
||||
|
||||
Somewhat less importantly, it is also desirable that the raw keys are never
|
||||
visible to software at all, even while being initially unlocked. This would
|
||||
ensure that a read-only compromise of system memory will never allow a key to be
|
||||
extracted to be used off-system, even if it occurs when a key is being unlocked.
|
||||
|
||||
To solve all these problems, some vendors of inline encryption hardware have
|
||||
made their hardware support *hardware-wrapped keys*. Hardware-wrapped keys
|
||||
are encrypted keys that can only be unwrapped (decrypted) and used by hardware
|
||||
-- either by the inline encryption hardware itself, or by a dedicated hardware
|
||||
block that can directly provision keys to the inline encryption hardware.
|
||||
|
||||
(We refer to them as "hardware-wrapped keys" rather than simply "wrapped keys"
|
||||
to add some clarity in cases where there could be other types of wrapped keys,
|
||||
such as in file-based encryption. Key wrapping is a commonly used technique.)
|
||||
|
||||
The key which wraps (encrypts) hardware-wrapped keys is a hardware-internal key
|
||||
that is never exposed to software; it is either a persistent key (a "long-term
|
||||
wrapping key") or a per-boot key (an "ephemeral wrapping key"). The long-term
|
||||
wrapped form of the key is what is initially unlocked, but it is erased from
|
||||
memory as soon as it is converted into an ephemerally-wrapped key. In-use
|
||||
hardware-wrapped keys are always ephemerally-wrapped, not long-term wrapped.
|
||||
|
||||
As inline encryption hardware can only be used to encrypt/decrypt data on-disk,
|
||||
the hardware also includes a level of indirection; it doesn't use the unwrapped
|
||||
key directly for inline encryption, but rather derives both an inline encryption
|
||||
key and a "software secret" from it. Software can use the "software secret" for
|
||||
tasks that can't use the inline encryption hardware, such as filenames
|
||||
encryption. The software secret is not protected from memory compromise.
|
||||
|
||||
Key hierarchy
|
||||
-------------
|
||||
|
||||
Here is the key hierarchy for a hardware-wrapped key::
|
||||
|
||||
Hardware-wrapped key
|
||||
|
|
||||
|
|
||||
<Hardware KDF>
|
||||
|
|
||||
-----------------------------
|
||||
| |
|
||||
Inline encryption key Software secret
|
||||
|
||||
The components are:
|
||||
|
||||
- *Hardware-wrapped key*: a key for the hardware's KDF (Key Derivation
|
||||
Function), in ephemerally-wrapped form. The key wrapping algorithm is a
|
||||
hardware implementation detail that doesn't impact kernel operation, but a
|
||||
strong authenticated encryption algorithm such as AES-256-GCM is recommended.
|
||||
|
||||
- *Hardware KDF*: a KDF (Key Derivation Function) which the hardware uses to
|
||||
derive subkeys after unwrapping the wrapped key. The hardware's choice of KDF
|
||||
doesn't impact kernel operation, but it does need to be known for testing
|
||||
purposes, and it's also assumed to have at least a 256-bit security strength.
|
||||
All known hardware uses the SP800-108 KDF in Counter Mode with AES-256-CMAC,
|
||||
with a particular choice of labels and contexts; new hardware should use this
|
||||
already-vetted KDF.
|
||||
|
||||
- *Inline encryption key*: a derived key which the hardware directly provisions
|
||||
to a keyslot of the inline encryption hardware, without exposing it to
|
||||
software. In all known hardware, this will always be an AES-256-XTS key.
|
||||
However, in principle other encryption algorithms could be supported too.
|
||||
Hardware must derive distinct subkeys for each supported encryption algorithm.
|
||||
|
||||
- *Software secret*: a derived key which the hardware returns to software so
|
||||
that software can use it for cryptographic tasks that can't use inline
|
||||
encryption. This value is cryptographically isolated from the inline
|
||||
encryption key, i.e. knowing one doesn't reveal the other. (The KDF ensures
|
||||
this.) Currently, the software secret is always 32 bytes and thus is suitable
|
||||
for cryptographic applications that require up to a 256-bit security strength.
|
||||
Some use cases (e.g. full-disk encryption) won't require the software secret.
|
||||
|
||||
Example: in the case of fscrypt, the fscrypt master key (the key that protects a
|
||||
particular set of encrypted directories) is made hardware-wrapped. The inline
|
||||
encryption key is used as the file contents encryption key, while the software
|
||||
secret (rather than the master key directly) is used to key fscrypt's KDF
|
||||
(HKDF-SHA512) to derive other subkeys such as filenames encryption keys.
|
||||
|
||||
Note that currently this design assumes a single inline encryption key per
|
||||
hardware-wrapped key, without any further key derivation. Thus, in the case of
|
||||
fscrypt, currently hardware-wrapped keys are only compatible with the "inline
|
||||
encryption optimized" settings, which use one file contents encryption key per
|
||||
encryption policy rather than one per file. This design could be extended to
|
||||
make the hardware derive per-file keys using per-file nonces passed down the
|
||||
storage stack, and in fact some hardware already supports this; future work is
|
||||
planned to remove this limitation by adding the corresponding kernel support.
|
||||
|
||||
Kernel support
|
||||
--------------
|
||||
|
||||
The inline encryption support of the kernel's block layer ("blk-crypto") has
|
||||
been extended to support hardware-wrapped keys as an alternative to raw keys,
|
||||
when hardware support is available. This works in the following way:
|
||||
|
||||
- A ``key_types_supported`` field is added to the crypto capabilities in
|
||||
``struct blk_crypto_profile``. This allows device drivers to declare that
|
||||
they support raw keys, hardware-wrapped keys, or both.
|
||||
|
||||
- ``struct blk_crypto_key`` can now contain a hardware-wrapped key as an
|
||||
alternative to a raw key; a ``key_type`` field is added to
|
||||
``struct blk_crypto_config`` to distinguish between the different key types.
|
||||
This allows users of blk-crypto to en/decrypt data using a hardware-wrapped
|
||||
key in a way very similar to using a raw key.
|
||||
|
||||
- A new method ``blk_crypto_ll_ops::derive_sw_secret`` is added. Device drivers
|
||||
that support hardware-wrapped keys must implement this method. Users of
|
||||
blk-crypto can call ``blk_crypto_derive_sw_secret()`` to access this method.
|
||||
|
||||
- The programming and eviction of hardware-wrapped keys happens via
|
||||
``blk_crypto_ll_ops::keyslot_program`` and
|
||||
``blk_crypto_ll_ops::keyslot_evict``, just like it does for raw keys. If a
|
||||
driver supports hardware-wrapped keys, then it must handle hardware-wrapped
|
||||
keys being passed to these methods.
|
||||
|
||||
blk-crypto-fallback doesn't support hardware-wrapped keys. Therefore,
|
||||
hardware-wrapped keys can only be used with actual inline encryption hardware.
|
||||
|
||||
All the above deals with hardware-wrapped keys in ephemerally-wrapped form only.
|
||||
To get such keys in the first place, new block device ioctls have been added to
|
||||
provide a generic interface to creating and preparing such keys:
|
||||
|
||||
- ``BLKCRYPTOIMPORTKEY`` converts a raw key to long-term wrapped form. It takes
|
||||
in a pointer to a ``struct blk_crypto_import_key_arg``. The caller must set
|
||||
``raw_key_ptr`` and ``raw_key_size`` to the pointer and size (in bytes) of the
|
||||
raw key to import. On success, ``BLKCRYPTOIMPORTKEY`` returns 0 and writes
|
||||
the resulting long-term wrapped key blob to the buffer pointed to by
|
||||
``lt_key_ptr``, which is of maximum size ``lt_key_size``. It also updates
|
||||
``lt_key_size`` to be the actual size of the key. On failure, it returns -1
|
||||
and sets errno. An errno of ``EOPNOTSUPP`` indicates that the block device
|
||||
does not support hardware-wrapped keys. An errno of ``EOVERFLOW`` indicates
|
||||
that the output buffer did not have enough space for the key blob.
|
||||
|
||||
- ``BLKCRYPTOGENERATEKEY`` is like ``BLKCRYPTOIMPORTKEY``, but it has the
|
||||
hardware generate the key instead of importing one. It takes in a pointer to
|
||||
a ``struct blk_crypto_generate_key_arg``.
|
||||
|
||||
- ``BLKCRYPTOPREPAREKEY`` converts a key from long-term wrapped form to
|
||||
ephemerally-wrapped form. It takes in a pointer to a ``struct
|
||||
blk_crypto_prepare_key_arg``. The caller must set ``lt_key_ptr`` and
|
||||
``lt_key_size`` to the pointer and size (in bytes) of the long-term wrapped
|
||||
key blob to convert. On success, ``BLKCRYPTOPREPAREKEY`` returns 0 and writes
|
||||
the resulting ephemerally-wrapped key blob to the buffer pointed to by
|
||||
``eph_key_ptr``, which is of maximum size ``eph_key_size``. It also updates
|
||||
``eph_key_size`` to be the actual size of the key. On failure, it returns -1
|
||||
and sets errno. Errno values of ``EOPNOTSUPP`` and ``EOVERFLOW`` mean the
|
||||
same as they do for ``BLKCRYPTOIMPORTKEY``. An errno of ``EBADMSG`` indicates
|
||||
that the long-term wrapped key is invalid.
|
||||
|
||||
Userspace needs to use either ``BLKCRYPTOIMPORTKEY`` or ``BLKCRYPTOGENERATEKEY``
|
||||
once to create a key, and then ``BLKCRYPTOPREPAREKEY`` each time the key is
|
||||
unlocked and added to the kernel. Note that these ioctls have no relevance for
|
||||
raw keys; they are only for hardware-wrapped keys.
|
||||
|
||||
Testability
|
||||
-----------
|
||||
|
||||
Both the hardware KDF and the inline encryption itself are well-defined
|
||||
algorithms that don't depend on any secrets other than the unwrapped key.
|
||||
Therefore, if the unwrapped key is known to software, these algorithms can be
|
||||
reproduced in software in order to verify the ciphertext that is written to disk
|
||||
by the inline encryption hardware.
|
||||
|
||||
However, the unwrapped key will only be known to software for testing if the
|
||||
"import" functionality is used. Proper testing is not possible in the
|
||||
"generate" case where the hardware generates the key itself. The correct
|
||||
operation of the "generate" mode thus relies on the security and correctness of
|
||||
the hardware RNG and its use to generate the key, as well as the testing of the
|
||||
"import" mode as that should cover all parts other than the key generation.
|
||||
|
||||
For an example of a test that verifies the ciphertext written to disk in the
|
||||
"import" mode, see the fscrypt hardware-wrapped key tests in xfstests, or
|
||||
`Android's vts_kernel_encryption_test
|
||||
<https://android.googlesource.com/platform/test/vts-testcase/kernel/+/refs/heads/main/encryption/>`_.
|
||||
|
||||
@ -199,24 +199,36 @@ managing and controlling ublk devices with help of several control commands:
|
||||
|
||||
- user recovery feature description
|
||||
|
||||
Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``.
|
||||
Three new features are added for user recovery: ``UBLK_F_USER_RECOVERY``,
|
||||
``UBLK_F_USER_RECOVERY_REISSUE``, and ``UBLK_F_USER_RECOVERY_FAIL_IO``. To
|
||||
enable recovery of ublk devices after the ublk server exits, the ublk server
|
||||
should specify the ``UBLK_F_USER_RECOVERY`` flag when creating the device. The
|
||||
ublk server may additionally specify at most one of
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` and ``UBLK_F_USER_RECOVERY_FAIL_IO`` to
|
||||
modify how I/O is handled while the ublk server is dying/dead (this is called
|
||||
the ``nosrv`` case in the driver code).
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
With just ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
|
||||
handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
|
||||
recovery stage and ublk device ID is kept. It is ublk server's
|
||||
responsibility to recover the device context by its own knowledge.
|
||||
Requests which have not been issued to userspace are requeued. Requests
|
||||
which have been issued to userspace are aborted.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
|
||||
server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
With ``UBLK_F_USER_RECOVERY_REISSUE`` additionally set, after one ubq_daemon
|
||||
(ublk server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
|
||||
requests which have been issued to userspace are requeued and will be
|
||||
re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
|
||||
``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
|
||||
double-write since the driver may issue the same I/O request twice. It
|
||||
might be useful to a read-only FS or a VM backend.
|
||||
|
||||
With ``UBLK_F_USER_RECOVERY_FAIL_IO`` additionally set, after the ublk server
|
||||
exits, requests which have issued to userspace are failed, as are any
|
||||
subsequently issued requests. Applications continuously issuing I/O against
|
||||
devices with this flag set will see a stream of I/O errors until a new ublk
|
||||
server recovers the device.
|
||||
|
||||
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
|
||||
Once the flag is set, all control commands can be sent by unprivileged
|
||||
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
|
||||
|
||||
@ -835,7 +835,7 @@ section named by ``btf_ext_info_sec->sec_name_off``.
|
||||
See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
|
||||
for more information on CO-RE relocations.
|
||||
|
||||
4.2 .BTF_ids section
|
||||
4.3 .BTF_ids section
|
||||
--------------------
|
||||
|
||||
The .BTF_ids section encodes BTF ID values that are used within the kernel.
|
||||
@ -896,6 +896,81 @@ and is used as a filter when resolving the BTF ID value.
|
||||
All the BTF ID lists and sets are compiled in the .BTF_ids section and
|
||||
resolved during the linking phase of kernel build by ``resolve_btfids`` tool.
|
||||
|
||||
4.4 .BTF.base section
|
||||
---------------------
|
||||
Split BTF - where the .BTF section only contains types not in the associated
|
||||
base .BTF section - is an extremely efficient way to encode type information
|
||||
for kernel modules, since they generally consist of a few module-specific
|
||||
types along with a large set of shared kernel types. The former are encoded
|
||||
in split BTF, while the latter are encoded in base BTF, resulting in more
|
||||
compact representations. A type in split BTF that refers to a type in
|
||||
base BTF refers to it using its base BTF ID, and split BTF IDs start
|
||||
at last_base_BTF_ID + 1.
|
||||
|
||||
The downside of this approach however is that this makes the split BTF
|
||||
somewhat brittle - when the base BTF changes, base BTF ID references are
|
||||
no longer valid and the split BTF itself becomes useless. The role of the
|
||||
.BTF.base section is to make split BTF more resilient for cases where
|
||||
the base BTF may change, as is the case for kernel modules not built every
|
||||
time the kernel is for example. .BTF.base contains named base types; INTs,
|
||||
FLOATs, STRUCTs, UNIONs, ENUM[64]s and FWDs. INTs and FLOATs are fully
|
||||
described in .BTF.base sections, while composite types like structs
|
||||
and unions are not fully defined - the .BTF.base type simply serves as
|
||||
a description of the type the split BTF referred to, so structs/unions
|
||||
have 0 members in the .BTF.base section. ENUM[64]s are similarly recorded
|
||||
with 0 members. Any other types are added to the split BTF. This
|
||||
distillation process then leaves us with a .BTF.base section with
|
||||
such minimal descriptions of base types and .BTF split section which refers
|
||||
to those base types. Later, we can relocate the split BTF using both the
|
||||
information stored in the .BTF.base section and the new .BTF base; the type
|
||||
information in the .BTF.base section allows us to update the split BTF
|
||||
references to point at the corresponding new base BTF IDs.
|
||||
|
||||
BTF relocation happens on kernel module load when a kernel module has a
|
||||
.BTF.base section, and libbpf also provides a btf__relocate() API to
|
||||
accomplish this.
|
||||
|
||||
As an example consider the following base BTF::
|
||||
|
||||
[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
|
||||
[2] STRUCT 'foo' size=8 vlen=2
|
||||
'f1' type_id=1 bits_offset=0
|
||||
'f2' type_id=1 bits_offset=32
|
||||
|
||||
...and associated split BTF::
|
||||
|
||||
[3] PTR '(anon)' type_id=2
|
||||
|
||||
i.e. split BTF describes a pointer to struct foo { int f1; int f2 };
|
||||
|
||||
.BTF.base will consist of::
|
||||
|
||||
[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
|
||||
[2] STRUCT 'foo' size=8 vlen=0
|
||||
|
||||
If we relocate the split BTF later using the following new base BTF::
|
||||
|
||||
[1] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none)
|
||||
[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
|
||||
[3] STRUCT 'foo' size=8 vlen=2
|
||||
'f1' type_id=2 bits_offset=0
|
||||
'f2' type_id=2 bits_offset=32
|
||||
|
||||
...we can use our .BTF.base description to know that the split BTF reference
|
||||
is to struct foo, and relocation results in new split BTF::
|
||||
|
||||
[4] PTR '(anon)' type_id=3
|
||||
|
||||
Note that we had to update BTF ID and start BTF ID for the split BTF.
|
||||
|
||||
So we see how .BTF.base plays the role of facilitating later relocation,
|
||||
leading to more resilient split BTF.
|
||||
|
||||
.BTF.base sections will be generated automatically for out-of-tree kernel module
|
||||
builds - i.e. where KBUILD_EXTMOD is set (as it would be for "make M=path/2/mod"
|
||||
cases). .BTF.base generation requires pahole support for the "distilled_base"
|
||||
BTF feature; this is available in pahole v1.28 and later.
|
||||
|
||||
5. Using BTF
|
||||
============
|
||||
|
||||
|
||||
9
Documentation/core-api/cgroup.rst
Normal file
9
Documentation/core-api/cgroup.rst
Normal file
@ -0,0 +1,9 @@
|
||||
==================
|
||||
Cgroup Kernel APIs
|
||||
==================
|
||||
|
||||
Device Memory Cgroup API (dmemcg)
|
||||
=================================
|
||||
.. kernel-doc:: kernel/cgroup/dmem.c
|
||||
:export:
|
||||
|
||||
@ -151,19 +151,16 @@ The marks can be set by::
|
||||
|
||||
void folioq_mark(struct folio_queue *folioq, unsigned int slot);
|
||||
void folioq_mark2(struct folio_queue *folioq, unsigned int slot);
|
||||
void folioq_mark3(struct folio_queue *folioq, unsigned int slot);
|
||||
|
||||
Cleared by::
|
||||
|
||||
void folioq_unmark(struct folio_queue *folioq, unsigned int slot);
|
||||
void folioq_unmark2(struct folio_queue *folioq, unsigned int slot);
|
||||
void folioq_unmark3(struct folio_queue *folioq, unsigned int slot);
|
||||
|
||||
And the marks can be queried by::
|
||||
|
||||
bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot);
|
||||
bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot);
|
||||
bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot);
|
||||
|
||||
The marks can be used for any purpose and are not interpreted by this API.
|
||||
|
||||
|
||||
@ -108,6 +108,7 @@ more memory-management documentation in Documentation/mm/index.rst.
|
||||
dma-isa-lpc
|
||||
swiotlb
|
||||
mm-api
|
||||
cgroup
|
||||
genalloc
|
||||
pin_user_pages
|
||||
boot-time-mm
|
||||
|
||||
@ -295,9 +295,9 @@ slot set.
|
||||
|
||||
Fourth, the io_tlb_slot array keeps track of any "padding slots" allocated to
|
||||
meet alloc_align_mask requirements described above. When
|
||||
swiotlb_tlb_map_single() allocates bounce buffer space to meet alloc_align_mask
|
||||
swiotlb_tbl_map_single() allocates bounce buffer space to meet alloc_align_mask
|
||||
requirements, it may allocate pre-padding space across zero or more slots. But
|
||||
when swiotbl_tlb_unmap_single() is called with the bounce buffer address, the
|
||||
when swiotlb_tbl_unmap_single() is called with the bounce buffer address, the
|
||||
alloc_align_mask value that governed the allocation, and therefore the
|
||||
allocation of any padding slots, is not known. The "pad_slots" field records
|
||||
the number of padding slots so that swiotlb_tbl_unmap_single() can free them.
|
||||
|
||||
@ -41,12 +41,12 @@ entries.
|
||||
In addition to the macros EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(), that allow
|
||||
exporting of kernel symbols to the kernel symbol table, variants of these are
|
||||
available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
|
||||
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
|
||||
Please note that due to macro expansion that argument needs to be a
|
||||
preprocessor symbol. E.g. to export the symbol ``usb_stor_suspend`` into the
|
||||
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace as a
|
||||
string constant. Note that this string must not contain whitespaces.
|
||||
E.g. to export the symbol ``usb_stor_suspend`` into the
|
||||
namespace ``USB_STORAGE``, use::
|
||||
|
||||
EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
|
||||
EXPORT_SYMBOL_NS(usb_stor_suspend, "USB_STORAGE");
|
||||
|
||||
The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
|
||||
``namespace`` set accordingly. A symbol that is exported without a namespace will
|
||||
@ -68,7 +68,7 @@ is to define the default namespace in the ``Makefile`` of the subsystem. E.g. to
|
||||
export all symbols defined in usb-common into the namespace USB_COMMON, add a
|
||||
line like this to drivers/usb/common/Makefile::
|
||||
|
||||
ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
|
||||
ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"USB_COMMON"'
|
||||
|
||||
That will affect all EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL() statements. A
|
||||
symbol exported with EXPORT_SYMBOL_NS() while this definition is present, will
|
||||
@ -79,7 +79,7 @@ A second option to define the default namespace is directly in the compilation
|
||||
unit as preprocessor statement. The above example would then read::
|
||||
|
||||
#undef DEFAULT_SYMBOL_NAMESPACE
|
||||
#define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
|
||||
#define DEFAULT_SYMBOL_NAMESPACE "USB_COMMON"
|
||||
|
||||
within the corresponding compilation unit before any EXPORT_SYMBOL macro is
|
||||
used.
|
||||
@ -94,7 +94,7 @@ for the namespaces it uses symbols from. E.g. a module using the
|
||||
usb_stor_suspend symbol from above, needs to import the namespace USB_STORAGE
|
||||
using a statement like::
|
||||
|
||||
MODULE_IMPORT_NS(USB_STORAGE);
|
||||
MODULE_IMPORT_NS("USB_STORAGE");
|
||||
|
||||
This will create a ``modinfo`` tag in the module for each imported namespace.
|
||||
This has the side effect, that the imported namespaces of a module can be
|
||||
|
||||
168
Documentation/dev-tools/autofdo.rst
Normal file
168
Documentation/dev-tools/autofdo.rst
Normal file
@ -0,0 +1,168 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Using AutoFDO with the Linux kernel
|
||||
===================================
|
||||
|
||||
This enables AutoFDO build support for the kernel when using
|
||||
the Clang compiler. AutoFDO (Auto-Feedback-Directed Optimization)
|
||||
is a type of profile-guided optimization (PGO) used to enhance the
|
||||
performance of binary executables. It gathers information about the
|
||||
frequency of execution of various code paths within a binary using
|
||||
hardware sampling. This data is then used to guide the compiler's
|
||||
optimization decisions, resulting in a more efficient binary. AutoFDO
|
||||
is a powerful optimization technique, and data indicates that it can
|
||||
significantly improve kernel performance. It's especially beneficial
|
||||
for workloads affected by front-end stalls.
|
||||
|
||||
For AutoFDO builds, unlike non-FDO builds, the user must supply a
|
||||
profile. Acquiring an AutoFDO profile can be done in several ways.
|
||||
AutoFDO profiles are created by converting hardware sampling using
|
||||
the "perf" tool. It is crucial that the workload used to create these
|
||||
perf files is representative; they must exhibit runtime
|
||||
characteristics similar to the workloads that are intended to be
|
||||
optimized. Failure to do so will result in the compiler optimizing
|
||||
for the wrong objective.
|
||||
|
||||
The AutoFDO profile often encapsulates the program's behavior. If the
|
||||
performance-critical codes are architecture-independent, the profile
|
||||
can be applied across platforms to achieve performance gains. For
|
||||
instance, using the profile generated on Intel architecture to build
|
||||
a kernel for AMD architecture can also yield performance improvements.
|
||||
|
||||
There are two methods for acquiring a representative profile:
|
||||
(1) Sample real workloads using a production environment.
|
||||
(2) Generate the profile using a representative load test.
|
||||
When enabling the AutoFDO build configuration without providing an
|
||||
AutoFDO profile, the compiler only modifies the dwarf information in
|
||||
the kernel without impacting runtime performance. It's advisable to
|
||||
use a kernel binary built with the same AutoFDO configuration to
|
||||
collect the perf profile. While it's possible to use a kernel built
|
||||
with different options, it may result in inferior performance.
|
||||
|
||||
One can collect profiles using AutoFDO build for the previous kernel.
|
||||
AutoFDO employs relative line numbers to match the profiles, offering
|
||||
some tolerance for source changes. This mode is commonly used in a
|
||||
production environment for profile collection.
|
||||
|
||||
In a profile collection based on a load test, the AutoFDO collection
|
||||
process consists of the following steps:
|
||||
|
||||
#. Initial build: The kernel is built with AutoFDO options
|
||||
without a profile.
|
||||
|
||||
#. Profiling: The above kernel is then run with a representative
|
||||
workload to gather execution frequency data. This data is
|
||||
collected using hardware sampling, via perf. AutoFDO is most
|
||||
effective on platforms supporting advanced PMU features like
|
||||
LBR on Intel machines.
|
||||
|
||||
#. AutoFDO profile generation: Perf output file is converted to
|
||||
the AutoFDO profile via offline tools.
|
||||
|
||||
The support requires a Clang compiler LLVM 17 or later.
|
||||
|
||||
Preparation
|
||||
===========
|
||||
|
||||
Configure the kernel with::
|
||||
|
||||
CONFIG_AUTOFDO_CLANG=y
|
||||
|
||||
Customization
|
||||
=============
|
||||
|
||||
The default CONFIG_AUTOFDO_CLANG setting covers kernel space objects for
|
||||
AutoFDO builds. One can, however, enable or disable AutoFDO build for
|
||||
individual files and directories by adding a line similar to the following
|
||||
to the respective kernel Makefile:
|
||||
|
||||
- For enabling a single file (e.g. foo.o) ::
|
||||
|
||||
AUTOFDO_PROFILE_foo.o := y
|
||||
|
||||
- For enabling all files in one directory ::
|
||||
|
||||
AUTOFDO_PROFILE := y
|
||||
|
||||
- For disabling one file ::
|
||||
|
||||
AUTOFDO_PROFILE_foo.o := n
|
||||
|
||||
- For disabling all files in one directory ::
|
||||
|
||||
AUTOFDO_PROFILE := n
|
||||
|
||||
Workflow
|
||||
========
|
||||
|
||||
Here is an example workflow for AutoFDO kernel:
|
||||
|
||||
1) Build the kernel on the host machine with LLVM enabled,
|
||||
for example, ::
|
||||
|
||||
$ make menuconfig LLVM=1
|
||||
|
||||
Turn on AutoFDO build config::
|
||||
|
||||
CONFIG_AUTOFDO_CLANG=y
|
||||
|
||||
With a configuration that with LLVM enabled, use the following command::
|
||||
|
||||
$ scripts/config -e AUTOFDO_CLANG
|
||||
|
||||
After getting the config, build with ::
|
||||
|
||||
$ make LLVM=1
|
||||
|
||||
2) Install the kernel on the test machine.
|
||||
|
||||
3) Run the load tests. The '-c' option in perf specifies the sample
|
||||
event period. We suggest using a suitable prime number, like 500009,
|
||||
for this purpose.
|
||||
|
||||
- For Intel platforms::
|
||||
|
||||
$ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
|
||||
|
||||
- For AMD platforms:
|
||||
|
||||
The supported systems are: Zen3 with BRS, or Zen4 with amd_lbr_v2. To check,
|
||||
|
||||
For Zen3::
|
||||
|
||||
$ cat proc/cpuinfo | grep " brs"
|
||||
|
||||
For Zen4::
|
||||
|
||||
$ cat proc/cpuinfo | grep amd_lbr_v2
|
||||
|
||||
The following command generated the perf data file::
|
||||
|
||||
$ perf record --pfm-events RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
|
||||
|
||||
4) (Optional) Download the raw perf file to the host machine.
|
||||
|
||||
5) To generate an AutoFDO profile, two offline tools are available:
|
||||
create_llvm_prof and llvm_profgen. The create_llvm_prof tool is part
|
||||
of the AutoFDO project and can be found on GitHub
|
||||
(https://github.com/google/autofdo), version v0.30.1 or later.
|
||||
The llvm_profgen tool is included in the LLVM compiler itself. It's
|
||||
important to note that the version of llvm_profgen doesn't need to match
|
||||
the version of Clang. It needs to be the LLVM 19 release of Clang
|
||||
or later, or just from the LLVM trunk. ::
|
||||
|
||||
$ llvm-profgen --kernel --binary=<vmlinux> --perfdata=<perf_file> -o <profile_file>
|
||||
|
||||
or ::
|
||||
|
||||
$ create_llvm_prof --binary=<vmlinux> --profile=<perf_file> --format=extbinary --out=<profile_file>
|
||||
|
||||
Note that multiple AutoFDO profile files can be merged into one via::
|
||||
|
||||
$ llvm-profdata merge -o <profile_file> <profile_1> <profile_2> ... <profile_n>
|
||||
|
||||
6) Rebuild the kernel using the AutoFDO profile file with the same config as step 1,
|
||||
(Note CONFIG_AUTOFDO_CLANG needs to be enabled)::
|
||||
|
||||
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file>
|
||||
@ -34,6 +34,8 @@ Documentation/dev-tools/testing-overview.rst
|
||||
ktap
|
||||
checkuapi
|
||||
gpio-sloppy-logic-analyzer
|
||||
autofdo
|
||||
propeller
|
||||
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
@ -511,19 +511,14 @@ Tests
|
||||
~~~~~
|
||||
|
||||
There are KASAN tests that allow verifying that KASAN works and can detect
|
||||
certain types of memory corruptions. The tests consist of two parts:
|
||||
certain types of memory corruptions.
|
||||
|
||||
1. Tests that are integrated with the KUnit Test Framework. Enabled with
|
||||
``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified
|
||||
All KASAN tests are integrated with the KUnit Test Framework and can be enabled
|
||||
via ``CONFIG_KASAN_KUNIT_TEST``. The tests can be run and partially verified
|
||||
automatically in a few different ways; see the instructions below.
|
||||
|
||||
2. Tests that are currently incompatible with KUnit. Enabled with
|
||||
``CONFIG_KASAN_MODULE_TEST`` and can only be run as a module. These tests can
|
||||
only be verified manually by loading the kernel module and inspecting the
|
||||
kernel log for KASAN reports.
|
||||
|
||||
Each KUnit-compatible KASAN test prints one of multiple KASAN reports if an
|
||||
error is detected. Then the test prints its number and status.
|
||||
Each KASAN test prints one of multiple KASAN reports if an error is detected.
|
||||
Then the test prints its number and status.
|
||||
|
||||
When a test passes::
|
||||
|
||||
@ -550,16 +545,16 @@ Or, if one of the tests failed::
|
||||
|
||||
not ok 1 - kasan
|
||||
|
||||
There are a few ways to run KUnit-compatible KASAN tests.
|
||||
There are a few ways to run the KASAN tests.
|
||||
|
||||
1. Loadable module
|
||||
|
||||
With ``CONFIG_KUNIT`` enabled, KASAN-KUnit tests can be built as a loadable
|
||||
module and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.
|
||||
With ``CONFIG_KUNIT`` enabled, the tests can be built as a loadable module
|
||||
and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.
|
||||
|
||||
2. Built-In
|
||||
|
||||
With ``CONFIG_KUNIT`` built-in, KASAN-KUnit tests can be built-in as well.
|
||||
With ``CONFIG_KUNIT`` built-in, the tests can be built-in as well.
|
||||
In this case, the tests will run at boot as a late-init call.
|
||||
|
||||
3. Using kunit_tool
|
||||
|
||||
@ -161,6 +161,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
|
||||
- ``kmemleak_free_percpu`` - notify of a percpu memory block freeing
|
||||
- ``kmemleak_update_trace`` - update object allocation stack trace
|
||||
- ``kmemleak_not_leak`` - mark an object as not a leak
|
||||
- ``kmemleak_transient_leak`` - mark an object as a transient leak
|
||||
- ``kmemleak_ignore`` - do not scan or report an object as leak
|
||||
- ``kmemleak_scan_area`` - add scan areas inside a memory block
|
||||
- ``kmemleak_no_scan`` - do not scan a memory block
|
||||
|
||||
162
Documentation/dev-tools/propeller.rst
Normal file
162
Documentation/dev-tools/propeller.rst
Normal file
@ -0,0 +1,162 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================================
|
||||
Using Propeller with the Linux kernel
|
||||
=====================================
|
||||
|
||||
This enables Propeller build support for the kernel when using Clang
|
||||
compiler. Propeller is a profile-guided optimization (PGO) method used
|
||||
to optimize binary executables. Like AutoFDO, it utilizes hardware
|
||||
sampling to gather information about the frequency of execution of
|
||||
different code paths within a binary. Unlike AutoFDO, this information
|
||||
is then used right before linking phase to optimize (among others)
|
||||
block layout within and across functions.
|
||||
|
||||
A few important notes about adopting Propeller optimization:
|
||||
|
||||
#. Although it can be used as a standalone optimization step, it is
|
||||
strongly recommended to apply Propeller on top of AutoFDO,
|
||||
AutoFDO+ThinLTO or Instrument FDO. The rest of this document
|
||||
assumes this paradigm.
|
||||
|
||||
#. Propeller uses another round of profiling on top of
|
||||
AutoFDO/AutoFDO+ThinLTO/iFDO. The whole build process involves
|
||||
"build-afdo - train-afdo - build-propeller - train-propeller -
|
||||
build-optimized".
|
||||
|
||||
#. Propeller requires LLVM 19 release or later for Clang/Clang++
|
||||
and the linker(ld.lld).
|
||||
|
||||
#. In addition to LLVM toolchain, Propeller requires a profiling
|
||||
conversion tool: https://github.com/google/autofdo with a release
|
||||
after v0.30.1: https://github.com/google/autofdo/releases/tag/v0.30.1.
|
||||
|
||||
The Propeller optimization process involves the following steps:
|
||||
|
||||
#. Initial building: Build the AutoFDO or AutoFDO+ThinLTO binary as
|
||||
you would normally do, but with a set of compile-time / link-time
|
||||
flags, so that a special metadata section is created within the
|
||||
kernel binary. The special section is only intend to be used by the
|
||||
profiling tool, it is not part of the runtime image, nor does it
|
||||
change kernel run time text sections.
|
||||
|
||||
#. Profiling: The above kernel is then run with a representative
|
||||
workload to gather execution frequency data. This data is collected
|
||||
using hardware sampling, via perf. Propeller is most effective on
|
||||
platforms supporting advanced PMU features like LBR on Intel
|
||||
machines. This step is the same as profiling the kernel for AutoFDO
|
||||
(the exact perf parameters can be different).
|
||||
|
||||
#. Propeller profile generation: Perf output file is converted to a
|
||||
pair of Propeller profiles via an offline tool.
|
||||
|
||||
#. Optimized build: Build the AutoFDO or AutoFDO+ThinLTO optimized
|
||||
binary as you would normally do, but with a compile-time /
|
||||
link-time flag to pick up the Propeller compile time and link time
|
||||
profiles. This build step uses 3 profiles - the AutoFDO profile,
|
||||
the Propeller compile-time profile and the Propeller link-time
|
||||
profile.
|
||||
|
||||
#. Deployment: The optimized kernel binary is deployed and used
|
||||
in production environments, providing improved performance
|
||||
and reduced latency.
|
||||
|
||||
Preparation
|
||||
===========
|
||||
|
||||
Configure the kernel with::
|
||||
|
||||
CONFIG_AUTOFDO_CLANG=y
|
||||
CONFIG_PROPELLER_CLANG=y
|
||||
|
||||
Customization
|
||||
=============
|
||||
|
||||
The default CONFIG_PROPELLER_CLANG setting covers kernel space objects
|
||||
for Propeller builds. One can, however, enable or disable Propeller build
|
||||
for individual files and directories by adding a line similar to the
|
||||
following to the respective kernel Makefile:
|
||||
|
||||
- For enabling a single file (e.g. foo.o)::
|
||||
|
||||
PROPELLER_PROFILE_foo.o := y
|
||||
|
||||
- For enabling all files in one directory::
|
||||
|
||||
PROPELLER_PROFILE := y
|
||||
|
||||
- For disabling one file::
|
||||
|
||||
PROPELLER_PROFILE_foo.o := n
|
||||
|
||||
- For disabling all files in one directory::
|
||||
|
||||
PROPELLER__PROFILE := n
|
||||
|
||||
|
||||
Workflow
|
||||
========
|
||||
|
||||
Here is an example workflow for building an AutoFDO+Propeller kernel:
|
||||
|
||||
1) Assuming an AutoFDO profile is already collected following
|
||||
instructions in the AutoFDO document, build the kernel on the host
|
||||
machine, with AutoFDO and Propeller build configs ::
|
||||
|
||||
CONFIG_AUTOFDO_CLANG=y
|
||||
CONFIG_PROPELLER_CLANG=y
|
||||
|
||||
and ::
|
||||
|
||||
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<autofdo-profile-name>
|
||||
|
||||
2) Install the kernel on the test machine.
|
||||
|
||||
3) Run the load tests. The '-c' option in perf specifies the sample
|
||||
event period. We suggest using a suitable prime number, like 500009,
|
||||
for this purpose.
|
||||
|
||||
- For Intel platforms::
|
||||
|
||||
$ perf record -e BR_INST_RETIRED.NEAR_TAKEN:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
|
||||
|
||||
- For AMD platforms::
|
||||
|
||||
$ perf record --pfm-event RETIRED_TAKEN_BRANCH_INSTRUCTIONS:k -a -N -b -c <count> -o <perf_file> -- <loadtest>
|
||||
|
||||
Note you can repeat the above steps to collect multiple <perf_file>s.
|
||||
|
||||
4) (Optional) Download the raw perf file(s) to the host machine.
|
||||
|
||||
5) Use the create_llvm_prof tool (https://github.com/google/autofdo) to
|
||||
generate Propeller profile. ::
|
||||
|
||||
$ create_llvm_prof --binary=<vmlinux> --profile=<perf_file>
|
||||
--format=propeller --propeller_output_module_name
|
||||
--out=<propeller_profile_prefix>_cc_profile.txt
|
||||
--propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
|
||||
|
||||
"<propeller_profile_prefix>" can be something like "/home/user/dir/any_string".
|
||||
|
||||
This command generates a pair of Propeller profiles:
|
||||
"<propeller_profile_prefix>_cc_profile.txt" and
|
||||
"<propeller_profile_prefix>_ld_profile.txt".
|
||||
|
||||
If there are more than 1 perf_file collected in the previous step,
|
||||
you can create a temp list file "<perf_file_list>" with each line
|
||||
containing one perf file name and run::
|
||||
|
||||
$ create_llvm_prof --binary=<vmlinux> --profile=@<perf_file_list>
|
||||
--format=propeller --propeller_output_module_name
|
||||
--out=<propeller_profile_prefix>_cc_profile.txt
|
||||
--propeller_symorder=<propeller_profile_prefix>_ld_profile.txt
|
||||
|
||||
6) Rebuild the kernel using the AutoFDO and Propeller
|
||||
profiles. ::
|
||||
|
||||
CONFIG_AUTOFDO_CLANG=y
|
||||
CONFIG_PROPELLER_CLANG=y
|
||||
|
||||
and ::
|
||||
|
||||
$ make LLVM=1 CLANG_AUTOFDO_PROFILE=<profile_file> CLANG_PROPELLER_PROFILE_PREFIX=<propeller_profile_prefix>
|
||||
@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
title: Ceva AHCI SATA Controller
|
||||
|
||||
maintainers:
|
||||
- Mubin Sayyed <mubin.sayyed@amd.com>
|
||||
- Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
|
||||
|
||||
description: |
|
||||
|
||||
@ -253,6 +253,53 @@ properties:
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
sink-wait-cap-time-ms:
|
||||
description: Represents the max time in ms that USB Type-C port (in sink
|
||||
role) should wait for the port partner (source role) to send source caps.
|
||||
SinkWaitCap timer starts when port in sink role attaches to the source.
|
||||
This timer will stop when sink receives PD source cap advertisement before
|
||||
timeout in which case it'll move to capability negotiation stage. A
|
||||
timeout leads to a hard reset message by the port.
|
||||
minimum: 310
|
||||
maximum: 620
|
||||
default: 310
|
||||
|
||||
ps-source-off-time-ms:
|
||||
description: Represents the max time in ms that a DRP in source role should
|
||||
take to turn off power after the PsSourceOff timer starts. PsSourceOff
|
||||
timer starts when a sink's PHY layer receives EOP of the GoodCRC message
|
||||
(corresponding to an Accept message sent in response to a PR_Swap or a
|
||||
FR_Swap request). This timer stops when last bit of GoodCRC EOP
|
||||
corresponding to the received PS_RDY message is transmitted by the PHY
|
||||
layer. A timeout shall lead to error recovery in the type-c port.
|
||||
minimum: 750
|
||||
maximum: 920
|
||||
default: 920
|
||||
|
||||
cc-debounce-time-ms:
|
||||
description: Represents the max time in ms that a port shall wait to
|
||||
determine if it's attached to a partner.
|
||||
minimum: 100
|
||||
maximum: 200
|
||||
default: 200
|
||||
|
||||
sink-bc12-completion-time-ms:
|
||||
description: Represents the max time in ms that a port in sink role takes
|
||||
to complete Battery Charger (BC1.2) Detection. BC1.2 detection is a
|
||||
hardware mechanism, which in some TCPC implementations, can run in
|
||||
parallel once the Type-C connection state machine reaches the "potential
|
||||
connect as sink" state. In TCPCs where this causes delays to respond to
|
||||
the incoming PD messages, sink-bc12-completion-time-ms is used to delay
|
||||
PD negotiation till BC1.2 detection completes.
|
||||
default: 0
|
||||
|
||||
pd-revision:
|
||||
description: Specifies the maximum USB PD revision and version supported by
|
||||
the connector. This property is specified in the following order;
|
||||
<revision_major, revision_minor, version_major, version_minor>.
|
||||
$ref: /schemas/types.yaml#/definitions/uint8-array
|
||||
maxItems: 4
|
||||
|
||||
dependencies:
|
||||
sink-vdos-v1: [ sink-vdos ]
|
||||
sink-vdos: [ sink-vdos-v1 ]
|
||||
@ -380,7 +427,7 @@ examples:
|
||||
};
|
||||
|
||||
# USB-C connector attached to a typec port controller(ptn5110), which has
|
||||
# power delivery support and enables drp.
|
||||
# power delivery support, explicitly defines time properties and enables drp.
|
||||
- |
|
||||
#include <dt-bindings/usb/pd.h>
|
||||
typec: ptn5110 {
|
||||
@ -393,6 +440,10 @@ examples:
|
||||
sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
|
||||
PDO_VAR(5000, 12000, 2000)>;
|
||||
op-sink-microwatt = <10000000>;
|
||||
sink-wait-cap-time-ms = <465>;
|
||||
ps-source-off-time-ms = <835>;
|
||||
cc-debounce-time-ms = <101>;
|
||||
sink-bc12-completion-time-ms = <500>;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -12,7 +12,6 @@ description:
|
||||
PS_MODE). Every pin can be configured as input/output.
|
||||
|
||||
maintainers:
|
||||
- Mubin Sayyed <mubin.sayyed@amd.com>
|
||||
- Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
|
||||
|
||||
properties:
|
||||
|
||||
@ -36,10 +36,12 @@ properties:
|
||||
items:
|
||||
- enum:
|
||||
- qcom,qcm2290-smmu-500
|
||||
- qcom,qcs615-smmu-500
|
||||
- qcom,qcs8300-smmu-500
|
||||
- qcom,qdu1000-smmu-500
|
||||
- qcom,sa8255p-smmu-500
|
||||
- qcom,sa8775p-smmu-500
|
||||
- qcom,sar2130p-smmu-500
|
||||
- qcom,sc7180-smmu-500
|
||||
- qcom,sc7280-smmu-500
|
||||
- qcom,sc8180x-smmu-500
|
||||
@ -88,6 +90,7 @@ properties:
|
||||
- qcom,qcm2290-smmu-500
|
||||
- qcom,sa8255p-smmu-500
|
||||
- qcom,sa8775p-smmu-500
|
||||
- qcom,sar2130p-smmu-500
|
||||
- qcom,sc7280-smmu-500
|
||||
- qcom,sc8180x-smmu-500
|
||||
- qcom,sc8280xp-smmu-500
|
||||
@ -524,6 +527,7 @@ allOf:
|
||||
compatible:
|
||||
items:
|
||||
- enum:
|
||||
- qcom,sar2130p-smmu-500
|
||||
- qcom,sm8550-smmu-500
|
||||
- qcom,sm8650-smmu-500
|
||||
- qcom,x1e80100-smmu-500
|
||||
@ -555,6 +559,7 @@ allOf:
|
||||
- cavium,smmu-v2
|
||||
- marvell,ap806-smmu-500
|
||||
- nvidia,smmu-500
|
||||
- qcom,qcs615-smmu-500
|
||||
- qcom,qcs8300-smmu-500
|
||||
- qcom,qdu1000-smmu-500
|
||||
- qcom,sa8255p-smmu-500
|
||||
|
||||
@ -210,6 +210,27 @@ properties:
|
||||
lane-polarities property is omitted, the value must be interpreted as 0
|
||||
(normal). This property is valid for serial busses only.
|
||||
|
||||
line-orders:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 1
|
||||
maxItems: 8
|
||||
items:
|
||||
enum:
|
||||
- 0 # ABC
|
||||
- 1 # ACB
|
||||
- 2 # BAC
|
||||
- 3 # BCA
|
||||
- 4 # CAB
|
||||
- 5 # CBA
|
||||
description:
|
||||
An array of line orders of the CSI-2 C-PHY data lanes. The order of the
|
||||
lanes are the same as in data-lanes property. Valid values are 0-5 as
|
||||
defined in the MIPI Discovery and Configuration (DisCo) Specification for
|
||||
Imaging. The length of the array must be the same length as the
|
||||
data-lanes property. If the line-orders property is omitted, the value
|
||||
shall be interpreted as 0 (ABC). This property is valid for CSI-2 C-PHY
|
||||
busses only.
|
||||
|
||||
strobe:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
enum: [ 0, 1 ]
|
||||
|
||||
@ -31,6 +31,10 @@ node must be named "audio-codec".
|
||||
Required properties for the audio-codec subnode:
|
||||
|
||||
- #sound-dai-cells = <1>;
|
||||
- interrupts : should contain jack detection interrupts, with headset
|
||||
detect interrupt matching "hs" and microphone bias 2
|
||||
detect interrupt matching "mb2" in interrupt-names.
|
||||
- interrupt-names : Contains "hs", "mb2"
|
||||
|
||||
The audio-codec provides two DAIs. The first one is connected to the
|
||||
Stereo HiFi DAC and the second one is connected to the Voice DAC.
|
||||
@ -52,6 +56,8 @@ Example:
|
||||
|
||||
audio-codec {
|
||||
#sound-dai-cells = <1>;
|
||||
interrupts-extended = <&cpcap 9 0>, <&cpcap 10 0>;
|
||||
interrupt-names = "hs", "mb2";
|
||||
|
||||
/* HiFi */
|
||||
port@0 {
|
||||
|
||||
@ -13,6 +13,10 @@ description: |
|
||||
This documents describes the devicetree bindings for a mmc-host controller
|
||||
child node describing a mmc-card / an eMMC.
|
||||
|
||||
It's possible to define a fixed partition table for an eMMC for the user
|
||||
partition, the 2 BOOT partition (boot1/2) and the 4 GP (gp1/2/3/4) if supported
|
||||
by the eMMC.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: mmc-card
|
||||
@ -26,6 +30,24 @@ properties:
|
||||
Use this to indicate that the mmc-card has a broken hpi
|
||||
implementation, and that hpi should not be used.
|
||||
|
||||
patternProperties:
|
||||
"^partitions(-boot[12]|-gp[14])?$":
|
||||
$ref: /schemas/mtd/partitions/partitions.yaml
|
||||
|
||||
patternProperties:
|
||||
"^partition@[0-9a-f]+$":
|
||||
$ref: /schemas/mtd/partitions/partition.yaml
|
||||
|
||||
properties:
|
||||
reg:
|
||||
description: Must be multiple of 512 as it's converted
|
||||
internally from bytes to SECTOR_SIZE (512 bytes)
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
@ -42,6 +64,36 @@ examples:
|
||||
compatible = "mmc-card";
|
||||
reg = <0>;
|
||||
broken-hpi;
|
||||
|
||||
partitions {
|
||||
compatible = "fixed-partitions";
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "kernel"; /* Kernel */
|
||||
reg = <0x0 0x2000000>; /* 32 MB */
|
||||
};
|
||||
|
||||
partition@2000000 {
|
||||
label = "rootfs";
|
||||
reg = <0x2000000 0x40000000>; /* 1GB */
|
||||
};
|
||||
};
|
||||
|
||||
partitions-boot1 {
|
||||
compatible = "fixed-partitions";
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
|
||||
partition@0 {
|
||||
label = "bl";
|
||||
reg = <0x0 0x2000000>; /* 32MB */
|
||||
read-only;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
122
Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml
Normal file
122
Documentation/devicetree/bindings/net/aeonsemi,as21xxx.yaml
Normal file
@ -0,0 +1,122 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/aeonsemi,as21xxx.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Aeonsemi AS21XXX Ethernet PHY
|
||||
|
||||
maintainers:
|
||||
- Christian Marangi <ansuelsmth@gmail.com>
|
||||
|
||||
description: |
|
||||
Aeonsemi AS21xxx Ethernet PHYs requires a firmware to be loaded to actually
|
||||
work. The same firmware is compatible with various PHYs of the same family.
|
||||
|
||||
A PHY with not firmware loaded will be exposed on the MDIO bus with ID
|
||||
0x7500 0x7500 or 0x7500 0x9410 on C45 registers.
|
||||
|
||||
This can be done and is implemented by OEM in 2 different way:
|
||||
- Attached SPI flash directly to the PHY with the firmware. The PHY
|
||||
will self load the firmware in the presence of this configuration.
|
||||
- Manually provided firmware loaded from a file in the filesystem.
|
||||
|
||||
Each PHY can support up to 5 LEDs.
|
||||
|
||||
AS2xxx PHY Name logic:
|
||||
|
||||
AS21x1xxB1
|
||||
^ ^^
|
||||
| |J: Supports SyncE/PTP
|
||||
| |P: No SyncE/PTP support
|
||||
| 1: Supports 2nd Serdes
|
||||
| 2: Not 2nd Serdes support
|
||||
0: 10G, 5G, 2.5G
|
||||
5: 5G, 2.5G
|
||||
2: 2.5G
|
||||
|
||||
allOf:
|
||||
- $ref: ethernet-phy.yaml#
|
||||
|
||||
select:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- ethernet-phy-id7500.9410
|
||||
- ethernet-phy-id7500.9402
|
||||
- ethernet-phy-id7500.9412
|
||||
- ethernet-phy-id7500.9422
|
||||
- ethernet-phy-id7500.9432
|
||||
- ethernet-phy-id7500.9442
|
||||
- ethernet-phy-id7500.9452
|
||||
- ethernet-phy-id7500.9462
|
||||
- ethernet-phy-id7500.9472
|
||||
- ethernet-phy-id7500.9482
|
||||
- ethernet-phy-id7500.9492
|
||||
required:
|
||||
- compatible
|
||||
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
firmware-name:
|
||||
description: specify the name of PHY firmware to load
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: ethernet-phy-id7500.9410
|
||||
then:
|
||||
required:
|
||||
- firmware-name
|
||||
else:
|
||||
properties:
|
||||
firmware-name: false
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/leds/common.h>
|
||||
|
||||
mdio {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ethernet-phy@1f {
|
||||
compatible = "ethernet-phy-id7500.9410",
|
||||
"ethernet-phy-ieee802.3-c45";
|
||||
|
||||
reg = <31>;
|
||||
firmware-name = "as21x1x_fw.bin";
|
||||
|
||||
leds {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
led@0 {
|
||||
reg = <0>;
|
||||
color = <LED_COLOR_ID_GREEN>;
|
||||
function = LED_FUNCTION_LAN;
|
||||
function-enumerator = <0>;
|
||||
default-state = "keep";
|
||||
};
|
||||
|
||||
led@1 {
|
||||
reg = <1>;
|
||||
color = <LED_COLOR_ID_GREEN>;
|
||||
function = LED_FUNCTION_LAN;
|
||||
function-enumerator = <1>;
|
||||
default-state = "keep";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
@ -19,6 +19,7 @@ properties:
|
||||
- qcom,qca2066-bt
|
||||
- qcom,qca6174-bt
|
||||
- qcom,qca9377-bt
|
||||
- qcom,wcn3950-bt
|
||||
- qcom,wcn3988-bt
|
||||
- qcom,wcn3990-bt
|
||||
- qcom,wcn3991-bt
|
||||
@ -101,7 +102,10 @@ properties:
|
||||
max-speed: true
|
||||
|
||||
firmware-name:
|
||||
description: specify the name of nvm firmware to load
|
||||
minItems: 1
|
||||
items:
|
||||
- description: specify the name of nvm firmware to load
|
||||
- description: specify the name of rampatch firmware to load
|
||||
|
||||
local-bd-address: true
|
||||
|
||||
@ -135,6 +139,7 @@ allOf:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,wcn3950-bt
|
||||
- qcom,wcn3988-bt
|
||||
- qcom,wcn3990-bt
|
||||
- qcom,wcn3991-bt
|
||||
@ -154,16 +159,11 @@ allOf:
|
||||
- qcom,wcn6750-bt
|
||||
then:
|
||||
required:
|
||||
- enable-gpios
|
||||
- swctrl-gpios
|
||||
- vddio-supply
|
||||
- vddaon-supply
|
||||
- vddbtcxmx-supply
|
||||
- vddrfacmn-supply
|
||||
- vddrfa0p8-supply
|
||||
- vddrfa1p7-supply
|
||||
- vddrfa1p2-supply
|
||||
- vddasd-supply
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
|
||||
@ -158,6 +158,27 @@ properties:
|
||||
Mark the corresponding energy efficient ethernet mode as
|
||||
broken and request the ethernet to stop advertising it.
|
||||
|
||||
timing-role:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
enum:
|
||||
- forced-master
|
||||
- forced-slave
|
||||
- preferred-master
|
||||
- preferred-slave
|
||||
description: |
|
||||
Specifies the timing role of the PHY in the network link. This property is
|
||||
required for setups where the role must be explicitly assigned via the
|
||||
device tree due to limitations in hardware strapping or incorrect strap
|
||||
configurations.
|
||||
It is applicable to Single Pair Ethernet (1000/100/10Base-T1) and other
|
||||
PHY types, including 1000Base-T, where it controls whether the PHY should
|
||||
be a master (clock source) or a slave (clock receiver).
|
||||
|
||||
- 'forced-master': The PHY is forced to operate as a master.
|
||||
- 'forced-slave': The PHY is forced to operate as a slave.
|
||||
- 'preferred-master': Prefer the PHY to be master but allow negotiation.
|
||||
- 'preferred-slave': Prefer the PHY to be slave but allow negotiation.
|
||||
|
||||
pses:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
maxItems: 1
|
||||
@ -211,6 +232,22 @@ properties:
|
||||
PHY's that have configurable TX internal delays. If this property is
|
||||
present then the PHY applies the TX delay.
|
||||
|
||||
tx-amplitude-100base-tx-percent:
|
||||
description:
|
||||
Transmit amplitude gain applied for 100BASE-TX. 100% matches 2V
|
||||
peak-to-peak specified in ANSI X3.263. When omitted, the PHYs default
|
||||
will be left as is.
|
||||
|
||||
mac-termination-ohms:
|
||||
maximum: 200
|
||||
description:
|
||||
The xMII signals need series termination on the driver side to match both
|
||||
the output driver impedance and the line characteristic impedance, to
|
||||
prevent reflections and EMI problems. Select a resistance value which is
|
||||
supported by the builtin resistors of the PHY, otherwise the resistors may
|
||||
have to be placed on board. When omitted, the PHYs default will be left as
|
||||
is.
|
||||
|
||||
leds:
|
||||
type: object
|
||||
|
||||
|
||||
@ -48,6 +48,12 @@ properties:
|
||||
firmware-name:
|
||||
description: specify the name of PHY firmware to load
|
||||
|
||||
marvell,mdi-cfg-order:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
enum: [0, 1]
|
||||
description:
|
||||
force normal (0) or reverse (1) order of MDI pairs, overriding MDI_CFG bootstrap pin.
|
||||
|
||||
nvmem-cells:
|
||||
description: phandle to the firmware nvmem cell
|
||||
maxItems: 1
|
||||
|
||||
@ -62,6 +62,22 @@ allOf:
|
||||
reference clock output when RMII mode enabled.
|
||||
Only supported on TJA1100 and TJA1101.
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- ethernet-phy-id001b.b010
|
||||
- ethernet-phy-id001b.b013
|
||||
- ethernet-phy-id001b.b030
|
||||
- ethernet-phy-id001b.b031
|
||||
|
||||
then:
|
||||
properties:
|
||||
nxp,rmii-refclk-out:
|
||||
type: boolean
|
||||
description: Enable 50MHz RMII reference clock output on REF_CLK pin.
|
||||
|
||||
patternProperties:
|
||||
"^ethernet-phy@[0-9a-f]+$":
|
||||
type: object
|
||||
|
||||
@ -32,6 +32,10 @@ properties:
|
||||
shutdown-gpios:
|
||||
maxItems: 1
|
||||
|
||||
default-blocked:
|
||||
$ref: /schemas/types.yaml#/definitions/flag
|
||||
description: configure rfkill state as blocked at boot
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- radio-type
|
||||
@ -48,4 +52,5 @@ examples:
|
||||
label = "rfkill-pcie-wlan";
|
||||
radio-type = "wlan";
|
||||
shutdown-gpios = <&gpio2 25 GPIO_ACTIVE_HIGH>;
|
||||
default-blocked;
|
||||
};
|
||||
|
||||
@ -96,6 +96,35 @@ properties:
|
||||
- master
|
||||
- slave
|
||||
|
||||
ti,gpio2-clk-out:
|
||||
description: |
|
||||
DP83822 PHY only.
|
||||
The GPIO2 pin on the DP83822 can be configured as clock output. When
|
||||
omitted, the PHY's default will be left as is.
|
||||
|
||||
- 'mac-if': In MII mode the clock frequency is 25-MHz, in RMII Mode the
|
||||
clock frequency is 50-MHz and in RGMII Mode the clock frequency is
|
||||
25-MHz.
|
||||
- 'xi': XI clock(pass-through clock from XI pin).
|
||||
- 'int-ref': Internal reference clock 25-MHz.
|
||||
- 'rmii-master-mode-ref': RMII master mode reference clock 50-MHz. RMII
|
||||
master mode reference clock is identical to MAC IF clock in RMII master
|
||||
mode.
|
||||
- 'free-running': Free running clock 125-MHz.
|
||||
- 'recovered': Recovered clock is a 125-MHz recovered clock from a
|
||||
connected link partner.
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
enum:
|
||||
- mac-if
|
||||
- xi
|
||||
- int-ref
|
||||
- rmii-master-mode-ref
|
||||
- free-running
|
||||
- recovered
|
||||
|
||||
mac-termination-ohms:
|
||||
enum: [43, 44, 46, 48, 50, 53, 55, 58, 61, 65, 69, 73, 78, 84, 91, 99]
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
||||
@ -110,6 +139,8 @@ examples:
|
||||
reg = <0>;
|
||||
rx-internal-delay-ps = <1>;
|
||||
tx-internal-delay-ps = <1>;
|
||||
ti,gpio2-clk-out = "xi";
|
||||
mac-termination-ohms = <43>;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@ properties:
|
||||
- pci14e4,4488 # BCM4377
|
||||
- pci14e4,4425 # BCM4378
|
||||
- pci14e4,4433 # BCM4387
|
||||
- pci14e4,449d # BCM43752
|
||||
|
||||
reg:
|
||||
description: SDIO function number for the device (for most cases
|
||||
@ -121,6 +122,14 @@ properties:
|
||||
NVRAM. This would normally be filled in by the bootloader from platform
|
||||
configuration data.
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: External Low Power Clock input (32.768KHz)
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: lpo
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
@ -93,20 +93,41 @@ properties:
|
||||
|
||||
ieee80211-freq-limit: true
|
||||
|
||||
qcom,ath10k-calibration-data:
|
||||
qcom,calibration-data:
|
||||
$ref: /schemas/types.yaml#/definitions/uint8-array
|
||||
description:
|
||||
Calibration data + board-specific data as a byte array. The length
|
||||
can vary between hardware versions.
|
||||
|
||||
qcom,ath10k-calibration-variant:
|
||||
qcom,ath10k-calibration-data:
|
||||
$ref: /schemas/types.yaml#/definitions/uint8-array
|
||||
deprecated: true
|
||||
description:
|
||||
Calibration data + board-specific data as a byte array. The length
|
||||
can vary between hardware versions.
|
||||
|
||||
qcom,calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
description:
|
||||
Unique variant identifier of the calibration data in board-2.bin
|
||||
for designs with colliding bus and device specific ids
|
||||
|
||||
qcom,ath10k-calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
deprecated: true
|
||||
description:
|
||||
Unique variant identifier of the calibration data in board-2.bin
|
||||
for designs with colliding bus and device specific ids
|
||||
|
||||
qcom,pre-calibration-data:
|
||||
$ref: /schemas/types.yaml#/definitions/uint8-array
|
||||
description:
|
||||
Pre-calibration data as a byte array. The length can vary between
|
||||
hardware versions.
|
||||
|
||||
qcom,ath10k-pre-calibration-data:
|
||||
$ref: /schemas/types.yaml#/definitions/uint8-array
|
||||
deprecated: true
|
||||
description:
|
||||
Pre-calibration data as a byte array. The length can vary between
|
||||
hardware versions.
|
||||
|
||||
@ -23,8 +23,15 @@ properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
qcom,calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
description: |
|
||||
string to uniquely identify variant of the calibration data for designs
|
||||
with colliding bus and device ids
|
||||
|
||||
qcom,ath11k-calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
deprecated: true
|
||||
description: |
|
||||
string to uniquely identify variant of the calibration data for designs
|
||||
with colliding bus and device ids
|
||||
@ -50,6 +57,9 @@ properties:
|
||||
vddrfa1p7-supply:
|
||||
description: VDD_RFA_1P7 supply regulator handle
|
||||
|
||||
vddrfa1p8-supply:
|
||||
description: VDD_RFA_1P8 supply regulator handle
|
||||
|
||||
vddpcie0p9-supply:
|
||||
description: VDD_PCIE_0P9 supply regulator handle
|
||||
|
||||
@ -77,6 +87,22 @@ allOf:
|
||||
- vddrfa1p7-supply
|
||||
- vddpcie0p9-supply
|
||||
- vddpcie1p8-supply
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: pci17cb,1103
|
||||
then:
|
||||
required:
|
||||
- vddrfacmn-supply
|
||||
- vddaon-supply
|
||||
- vddwlcx-supply
|
||||
- vddwlmx-supply
|
||||
- vddrfa0p8-supply
|
||||
- vddrfa1p2-supply
|
||||
- vddrfa1p8-supply
|
||||
- vddpcie0p9-supply
|
||||
- vddpcie1p8-supply
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
@ -99,7 +125,17 @@ examples:
|
||||
compatible = "pci17cb,1103";
|
||||
reg = <0x10000 0x0 0x0 0x0 0x0>;
|
||||
|
||||
qcom,ath11k-calibration-variant = "LE_X13S";
|
||||
vddrfacmn-supply = <&vreg_pmu_rfa_cmn_0p8>;
|
||||
vddaon-supply = <&vreg_pmu_aon_0p8>;
|
||||
vddwlcx-supply = <&vreg_pmu_wlcx_0p8>;
|
||||
vddwlmx-supply = <&vreg_pmu_wlmx_0p8>;
|
||||
vddpcie1p8-supply = <&vreg_pmu_pcie_1p8>;
|
||||
vddpcie0p9-supply = <&vreg_pmu_pcie_0p9>;
|
||||
vddrfa0p8-supply = <&vreg_pmu_rfa_0p8>;
|
||||
vddrfa1p2-supply = <&vreg_pmu_rfa_1p2>;
|
||||
vddrfa1p8-supply = <&vreg_pmu_rfa_1p7>;
|
||||
|
||||
qcom,calibration-variant = "LE_X13S";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@ -42,8 +42,15 @@ properties:
|
||||
* reg
|
||||
* reg-names
|
||||
|
||||
qcom,calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
description:
|
||||
string to uniquely identify variant of the calibration data in the
|
||||
board-2.bin for designs with colliding bus and device specific ids
|
||||
|
||||
qcom,ath11k-calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
deprecated: true
|
||||
description:
|
||||
string to uniquely identify variant of the calibration data in the
|
||||
board-2.bin for designs with colliding bus and device specific ids
|
||||
|
||||
@ -0,0 +1,211 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/wireless/qcom,ath12k-wsi.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface
|
||||
|
||||
maintainers:
|
||||
- Jeff Johnson <jjohnson@kernel.org>
|
||||
- Kalle Valo <kvalo@kernel.org>
|
||||
|
||||
description: |
|
||||
Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface.
|
||||
|
||||
The ath12k devices (QCN9274) feature WSI support. WSI stands for
|
||||
WLAN Serial Interface. It is used for the exchange of specific
|
||||
control information across radios based on the doorbell mechanism.
|
||||
This WSI connection is essential to exchange control information
|
||||
among these devices.
|
||||
|
||||
The WSI interface includes TX and RX ports, which are used to connect
|
||||
multiple WSI-supported devices together, forming a WSI group.
|
||||
|
||||
Diagram to represent one WSI connection (one WSI group) among
|
||||
three devices.
|
||||
|
||||
+-------+ +-------+ +-------+
|
||||
| pcie1 | | pcie2 | | pcie3 |
|
||||
| | | | | |
|
||||
+----->| wsi |------->| wsi |------->| wsi |-----+
|
||||
| | grp 0 | | grp 0 | | grp 0 | |
|
||||
| +-------+ +-------+ +-------+ |
|
||||
+------------------------------------------------------+
|
||||
|
||||
Diagram to represent two WSI connections (two separate WSI groups)
|
||||
among four devices.
|
||||
|
||||
+-------+ +-------+ +-------+ +-------+
|
||||
| pcie0 | | pcie1 | | pcie2 | | pcie3 |
|
||||
| | | | | | | |
|
||||
+-->| wsi |--->| wsi |--+ +-->| wsi |--->| wsi |--+
|
||||
| | grp 0 | | grp 0 | | | | grp 1 | | grp 1 | |
|
||||
| +-------+ +-------+ | | +-------+ +-------+ |
|
||||
+---------------------------+ +---------------------------+
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- pci17cb,1109 # QCN9274
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
qcom,calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
description:
|
||||
String to uniquely identify variant of the calibration data for designs
|
||||
with colliding bus and device ids
|
||||
|
||||
qcom,ath12k-calibration-variant:
|
||||
$ref: /schemas/types.yaml#/definitions/string
|
||||
deprecated: true
|
||||
description:
|
||||
String to uniquely identify variant of the calibration data for designs
|
||||
with colliding bus and device ids
|
||||
|
||||
qcom,wsi-controller:
|
||||
$ref: /schemas/types.yaml#/definitions/flag
|
||||
description:
|
||||
The WSI controller device in the WSI group aids (is capable) to
|
||||
synchronize the Timing Synchronization Function (TSF) clock across
|
||||
all devices in the WSI group.
|
||||
|
||||
ports:
|
||||
$ref: /schemas/graph.yaml#/properties/ports
|
||||
properties:
|
||||
port@0:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description:
|
||||
This is the TX port of WSI interface. It is attached to the RX
|
||||
port of the next device in the WSI connection.
|
||||
|
||||
port@1:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description:
|
||||
This is the RX port of WSI interface. It is attached to the TX
|
||||
port of the previous device in the WSI connection.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
pcie {
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
|
||||
pcie@0 {
|
||||
device_type = "pci";
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
|
||||
wifi@0 {
|
||||
compatible = "pci17cb,1109";
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
|
||||
qcom,calibration-variant = "RDP433_1";
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
|
||||
wifi1_wsi_tx: endpoint {
|
||||
remote-endpoint = <&wifi2_wsi_rx>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
|
||||
wifi1_wsi_rx: endpoint {
|
||||
remote-endpoint = <&wifi3_wsi_tx>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pcie@1 {
|
||||
device_type = "pci";
|
||||
reg = <0x0 0x0 0x1 0x0 0x0>;
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
|
||||
wifi@0 {
|
||||
compatible = "pci17cb,1109";
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
|
||||
qcom,calibration-variant = "RDP433_2";
|
||||
qcom,wsi-controller;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
|
||||
wifi2_wsi_tx: endpoint {
|
||||
remote-endpoint = <&wifi3_wsi_rx>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
|
||||
wifi2_wsi_rx: endpoint {
|
||||
remote-endpoint = <&wifi1_wsi_tx>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pcie@2 {
|
||||
device_type = "pci";
|
||||
reg = <0x0 0x0 0x2 0x0 0x0>;
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
ranges;
|
||||
|
||||
wifi@0 {
|
||||
compatible = "pci17cb,1109";
|
||||
reg = <0x0 0x0 0x0 0x0 0x0>;
|
||||
|
||||
qcom,calibration-variant = "RDP433_3";
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
|
||||
wifi3_wsi_tx: endpoint {
|
||||
remote-endpoint = <&wifi1_wsi_rx>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
|
||||
wifi3_wsi_rx: endpoint {
|
||||
remote-endpoint = <&wifi2_wsi_tx>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
@ -17,11 +17,11 @@ description:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 3
|
||||
maxItems: 4
|
||||
maxItems: 5
|
||||
|
||||
clock-names:
|
||||
minItems: 3
|
||||
maxItems: 4
|
||||
maxItems: 5
|
||||
|
||||
num-lanes:
|
||||
const: 1
|
||||
|
||||
@ -22,6 +22,7 @@ properties:
|
||||
- fsl,imx8mm-pcie-ep
|
||||
- fsl,imx8mq-pcie-ep
|
||||
- fsl,imx8mp-pcie-ep
|
||||
- fsl,imx8q-pcie-ep
|
||||
- fsl,imx95-pcie-ep
|
||||
|
||||
clocks:
|
||||
@ -74,6 +75,20 @@ allOf:
|
||||
- const: dbi2
|
||||
- const: atu
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,imx8q-pcie-ep
|
||||
then:
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 2
|
||||
reg-names:
|
||||
items:
|
||||
- const: dbi
|
||||
- const: addr_space
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
@ -103,13 +118,21 @@ allOf:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
clock-names:
|
||||
items:
|
||||
- const: pcie
|
||||
- const: pcie_bus
|
||||
- const: pcie_phy
|
||||
- const: pcie_aux
|
||||
else:
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,imx8mm-pcie-ep
|
||||
- fsl,imx8mp-pcie-ep
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
maxItems: 3
|
||||
@ -119,6 +142,20 @@ allOf:
|
||||
- const: pcie_bus
|
||||
- const: pcie_aux
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,imxq-pcie-ep
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
maxItems: 3
|
||||
clock-names:
|
||||
items:
|
||||
- const: dbi
|
||||
- const: mstr
|
||||
- const: slv
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
|
||||
@ -40,10 +40,11 @@ properties:
|
||||
- description: PCIe PHY clock.
|
||||
- description: Additional required clock entry for imx6sx-pcie,
|
||||
imx6sx-pcie-ep, imx8mq-pcie, imx8mq-pcie-ep.
|
||||
- description: PCIe reference clock.
|
||||
|
||||
clock-names:
|
||||
minItems: 3
|
||||
maxItems: 4
|
||||
maxItems: 5
|
||||
|
||||
interrupts:
|
||||
items:
|
||||
@ -127,7 +128,7 @@ allOf:
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
clock-names:
|
||||
items:
|
||||
- const: pcie
|
||||
@ -140,11 +141,10 @@ allOf:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,imx8mq-pcie
|
||||
- fsl,imx95-pcie
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
clock-names:
|
||||
items:
|
||||
- const: pcie
|
||||
@ -200,6 +200,23 @@ allOf:
|
||||
- const: mstr
|
||||
- const: slv
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,imx95-pcie
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
maxItems: 5
|
||||
clock-names:
|
||||
items:
|
||||
- const: pcie
|
||||
- const: pcie_bus
|
||||
- const: pcie_phy
|
||||
- const: pcie_aux
|
||||
- const: ref
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
|
||||
@ -1,52 +0,0 @@
|
||||
NXP Layerscape PCIe Gen4 controller
|
||||
|
||||
This PCIe controller is based on the Mobiveil PCIe IP and thus inherits all
|
||||
the common properties defined in mobiveil-pcie.txt.
|
||||
|
||||
Required properties:
|
||||
- compatible: should contain the platform identifier such as:
|
||||
"fsl,lx2160a-pcie"
|
||||
- reg: base addresses and lengths of the PCIe controller register blocks.
|
||||
"csr_axi_slave": Bridge config registers
|
||||
"config_axi_slave": PCIe controller registers
|
||||
- interrupts: A list of interrupt outputs of the controller. Must contain an
|
||||
entry for each entry in the interrupt-names property.
|
||||
- interrupt-names: It could include the following entries:
|
||||
"intr": The interrupt that is asserted for controller interrupts
|
||||
"aer": Asserted for aer interrupt when chip support the aer interrupt with
|
||||
none MSI/MSI-X/INTx mode,but there is interrupt line for aer.
|
||||
"pme": Asserted for pme interrupt when chip support the pme interrupt with
|
||||
none MSI/MSI-X/INTx mode,but there is interrupt line for pme.
|
||||
- dma-coherent: Indicates that the hardware IP block can ensure the coherency
|
||||
of the data transferred from/to the IP block. This can avoid the software
|
||||
cache flush/invalid actions, and improve the performance significantly.
|
||||
- msi-parent : See the generic MSI binding described in
|
||||
Documentation/devicetree/bindings/interrupt-controller/msi.txt.
|
||||
|
||||
Example:
|
||||
|
||||
pcie@3400000 {
|
||||
compatible = "fsl,lx2160a-pcie";
|
||||
reg = <0x00 0x03400000 0x0 0x00100000 /* controller registers */
|
||||
0x80 0x00000000 0x0 0x00001000>; /* configuration space */
|
||||
reg-names = "csr_axi_slave", "config_axi_slave";
|
||||
interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* AER interrupt */
|
||||
<GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* PME interrupt */
|
||||
<GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
|
||||
interrupt-names = "aer", "pme", "intr";
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
device_type = "pci";
|
||||
apio-wins = <8>;
|
||||
ppio-wins = <8>;
|
||||
dma-coherent;
|
||||
bus-range = <0x0 0xff>;
|
||||
msi-parent = <&its>;
|
||||
ranges = <0x82000000 0x0 0x40000000 0x80 0x40000000 0x0 0x40000000>;
|
||||
#interrupt-cells = <1>;
|
||||
interrupt-map-mask = <0 0 0 7>;
|
||||
interrupt-map = <0000 0 0 1 &gic 0 0 GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 2 &gic 0 0 GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 3 &gic 0 0 GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 4 &gic 0 0 GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
173
Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml
Normal file
173
Documentation/devicetree/bindings/pci/mbvl,gpex40-pcie.yaml
Normal file
@ -0,0 +1,173 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/pci/mbvl,gpex40-pcie.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Mobiveil AXI PCIe Host Bridge
|
||||
|
||||
maintainers:
|
||||
- Frank Li <Frank Li@nxp.com>
|
||||
|
||||
description:
|
||||
Mobiveil's GPEX 4.0 is a PCIe Gen4 host bridge IP. This configurable IP
|
||||
has up to 8 outbound and inbound windows for address translation.
|
||||
|
||||
NXP Layerscape PCIe Gen4 controller (Deprecated) base on Mobiveil's GPEX 4.0.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,lx2160a-pcie
|
||||
- mbvl,gpex40-pcie
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: PCIe controller registers
|
||||
- description: Bridge config registers
|
||||
- description: GPIO registers to control slot power
|
||||
- description: MSI registers
|
||||
minItems: 2
|
||||
|
||||
reg-names:
|
||||
items:
|
||||
- const: csr_axi_slave
|
||||
- const: config_axi_slave
|
||||
- const: gpio_slave
|
||||
- const: apb_csr
|
||||
minItems: 2
|
||||
|
||||
apio-wins:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description: |
|
||||
number of requested APIO outbound windows
|
||||
1. Config window
|
||||
2. Memory window
|
||||
default: 2
|
||||
maximum: 256
|
||||
|
||||
ppio-wins:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
description: number of requested PPIO inbound windows
|
||||
default: 1
|
||||
maximum: 256
|
||||
|
||||
interrupt-controller: true
|
||||
|
||||
"#interrupt-cells":
|
||||
const: 1
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
|
||||
interrupt-names:
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
|
||||
dma-coherent: true
|
||||
|
||||
msi-parent: true
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- reg-names
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/pci/pci-host-bridge.yaml#
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- fsl,lx2160a-pcie
|
||||
then:
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 2
|
||||
|
||||
reg-names:
|
||||
maxItems: 2
|
||||
|
||||
interrupts:
|
||||
minItems: 3
|
||||
|
||||
interrupt-names:
|
||||
items:
|
||||
- const: aer
|
||||
- const: pme
|
||||
- const: intr
|
||||
else:
|
||||
properties:
|
||||
dma-coherent: false
|
||||
msi-parent: false
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
interrupt-names: false
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
|
||||
pcie@b0000000 {
|
||||
compatible = "mbvl,gpex40-pcie";
|
||||
reg = <0xb0000000 0x00010000>,
|
||||
<0xa0000000 0x00001000>,
|
||||
<0xff000000 0x00200000>,
|
||||
<0xb0010000 0x00001000>;
|
||||
reg-names = "csr_axi_slave",
|
||||
"config_axi_slave",
|
||||
"gpio_slave",
|
||||
"apb_csr";
|
||||
ranges = <0x83000000 0 0x00000000 0xa8000000 0 0x8000000>;
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
device_type = "pci";
|
||||
apio-wins = <2>;
|
||||
ppio-wins = <1>;
|
||||
bus-range = <0x00 0xff>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
interrupt-parent = <&gic>;
|
||||
interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-map-mask = <0 0 0 7>;
|
||||
interrupt-map = <0 0 0 0 &pci_express 0>,
|
||||
<0 0 0 1 &pci_express 1>,
|
||||
<0 0 0 2 &pci_express 2>,
|
||||
<0 0 0 3 &pci_express 3>;
|
||||
};
|
||||
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
|
||||
soc {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
pcie@3400000 {
|
||||
compatible = "fsl,lx2160a-pcie";
|
||||
reg = <0x00 0x03400000 0x0 0x00100000 /* controller registers */
|
||||
0x80 0x00000000 0x0 0x00001000>; /* configuration space */
|
||||
reg-names = "csr_axi_slave", "config_axi_slave";
|
||||
ranges = <0x82000000 0x0 0x40000000 0x80 0x40000000 0x0 0x40000000>;
|
||||
interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* AER interrupt */
|
||||
<GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, /* PME interrupt */
|
||||
<GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
|
||||
interrupt-names = "aer", "pme", "intr";
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
device_type = "pci";
|
||||
apio-wins = <8>;
|
||||
ppio-wins = <8>;
|
||||
dma-coherent;
|
||||
bus-range = <0x00 0xff>;
|
||||
msi-parent = <&its>;
|
||||
#interrupt-cells = <1>;
|
||||
interrupt-map-mask = <0 0 0 7>;
|
||||
interrupt-map = <0000 0 0 1 &gic 0 0 GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 2 &gic 0 0 GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 3 &gic 0 0 GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<0000 0 0 4 &gic 0 0 GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
};
|
||||
@ -149,7 +149,7 @@ allOf:
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
minItems: 6
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
@ -178,7 +178,7 @@ allOf:
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
minItems: 6
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
@ -207,6 +207,7 @@ allOf:
|
||||
properties:
|
||||
clocks:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
|
||||
@ -17,6 +17,12 @@ properties:
|
||||
compatible:
|
||||
const: microchip,pcie-host-1.0 # PolarFire
|
||||
|
||||
reg:
|
||||
minItems: 3
|
||||
|
||||
reg-names:
|
||||
minItems: 3
|
||||
|
||||
clocks:
|
||||
description:
|
||||
Fabric Interface Controllers, FICs, are the interface between the FPGA
|
||||
@ -44,6 +50,8 @@ properties:
|
||||
items:
|
||||
pattern: '^fic[0-3]$'
|
||||
|
||||
dma-coherent: true
|
||||
|
||||
ranges:
|
||||
minItems: 1
|
||||
maxItems: 3
|
||||
@ -62,8 +70,9 @@ examples:
|
||||
pcie0: pcie@2030000000 {
|
||||
compatible = "microchip,pcie-host-1.0";
|
||||
reg = <0x0 0x70000000 0x0 0x08000000>,
|
||||
<0x0 0x43000000 0x0 0x00010000>;
|
||||
reg-names = "cfg", "apb";
|
||||
<0x0 0x43008000 0x0 0x00002000>,
|
||||
<0x0 0x4300a000 0x0 0x00002000>;
|
||||
reg-names = "cfg", "bridge", "ctrl";
|
||||
device_type = "pci";
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
* Mobiveil AXI PCIe Root Port Bridge DT description
|
||||
|
||||
Mobiveil's GPEX 4.0 is a PCIe Gen4 root port bridge IP. This configurable IP
|
||||
has up to 8 outbound and inbound windows for the address translation.
|
||||
|
||||
Required properties:
|
||||
- #address-cells: Address representation for root ports, set to <3>
|
||||
- #size-cells: Size representation for root ports, set to <2>
|
||||
- #interrupt-cells: specifies the number of cells needed to encode an
|
||||
interrupt source. The value must be 1.
|
||||
- compatible: Should contain "mbvl,gpex40-pcie"
|
||||
- reg: Should contain PCIe registers location and length
|
||||
Mandatory:
|
||||
"config_axi_slave": PCIe controller registers
|
||||
"csr_axi_slave" : Bridge config registers
|
||||
Optional:
|
||||
"gpio_slave" : GPIO registers to control slot power
|
||||
"apb_csr" : MSI registers
|
||||
|
||||
- device_type: must be "pci"
|
||||
- apio-wins : number of requested apio outbound windows
|
||||
default 2 outbound windows are configured -
|
||||
1. Config window
|
||||
2. Memory window
|
||||
- ppio-wins : number of requested ppio inbound windows
|
||||
default 1 inbound memory window is configured.
|
||||
- bus-range: PCI bus numbers covered
|
||||
- interrupt-controller: identifies the node as an interrupt controller
|
||||
- #interrupt-cells: specifies the number of cells needed to encode an
|
||||
interrupt source. The value must be 1.
|
||||
- interrupts: The interrupt line of the PCIe controller
|
||||
last cell of this field is set to 4 to
|
||||
denote it as IRQ_TYPE_LEVEL_HIGH type interrupt.
|
||||
- interrupt-map-mask,
|
||||
interrupt-map: standard PCI properties to define the mapping of the
|
||||
PCI interface to interrupt numbers.
|
||||
- ranges: ranges for the PCI memory regions (I/O space region is not
|
||||
supported by hardware)
|
||||
Please refer to the standard PCI bus binding document for a more
|
||||
detailed explanation
|
||||
|
||||
|
||||
Example:
|
||||
++++++++
|
||||
pcie0: pcie@a0000000 {
|
||||
#address-cells = <3>;
|
||||
#size-cells = <2>;
|
||||
compatible = "mbvl,gpex40-pcie";
|
||||
reg = <0xa0000000 0x00001000>,
|
||||
<0xb0000000 0x00010000>,
|
||||
<0xff000000 0x00200000>,
|
||||
<0xb0010000 0x00001000>;
|
||||
reg-names = "config_axi_slave",
|
||||
"csr_axi_slave",
|
||||
"gpio_slave",
|
||||
"apb_csr";
|
||||
device_type = "pci";
|
||||
apio-wins = <2>;
|
||||
ppio-wins = <1>;
|
||||
bus-range = <0x00000000 0x000000ff>;
|
||||
interrupt-controller;
|
||||
interrupt-parent = <&gic>;
|
||||
#interrupt-cells = <1>;
|
||||
interrupts = < 0 89 4 >;
|
||||
interrupt-map-mask = <0 0 0 7>;
|
||||
interrupt-map = <0 0 0 0 &pci_express 0>,
|
||||
<0 0 0 1 &pci_express 1>,
|
||||
<0 0 0 2 &pci_express 2>,
|
||||
<0 0 0 3 &pci_express 3>;
|
||||
ranges = < 0x83000000 0 0x00000000 0xa8000000 0 0x8000000>;
|
||||
|
||||
};
|
||||
@ -18,12 +18,18 @@ allOf:
|
||||
|
||||
properties:
|
||||
reg:
|
||||
maxItems: 2
|
||||
maxItems: 3
|
||||
minItems: 2
|
||||
|
||||
reg-names:
|
||||
items:
|
||||
oneOf:
|
||||
- items:
|
||||
- const: cfg
|
||||
- const: apb
|
||||
- items:
|
||||
- const: cfg
|
||||
- const: bridge
|
||||
- const: ctrl
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
|
||||
@ -81,6 +81,10 @@ properties:
|
||||
vddpe-3v3-supply:
|
||||
description: PCIe endpoint power supply
|
||||
|
||||
operating-points-v2: true
|
||||
opp-table:
|
||||
type: object
|
||||
|
||||
required:
|
||||
- reg
|
||||
- reg-names
|
||||
|
||||
@ -70,10 +70,6 @@ properties:
|
||||
- const: msi7
|
||||
- const: global
|
||||
|
||||
operating-points-v2: true
|
||||
opp-table:
|
||||
type: object
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user