From dc08c972db067ea53d3d86e9874224c3b7b739f4 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 6 Oct 2019 14:35:36 +0100 Subject: [PATCH] arm64: tegra: enabled nouveau on Jetson-TX2 --- drm-nouveau-Enable-GP10B-by-default.patch | 1275 +++++++++++++++++++++ kernel.spec | 2 + 2 files changed, 1277 insertions(+) create mode 100644 drm-nouveau-Enable-GP10B-by-default.patch diff --git a/drm-nouveau-Enable-GP10B-by-default.patch b/drm-nouveau-Enable-GP10B-by-default.patch new file mode 100644 index 000000000..e44a5b339 --- /dev/null +++ b/drm-nouveau-Enable-GP10B-by-default.patch @@ -0,0 +1,1275 @@ +From patchwork Mon Sep 16 15:04:02 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [01/11] drm/nouveau: tegra: Avoid pulsing reset twice +From: Thierry Reding +X-Patchwork-Id: 331044 +Message-Id: <20190916150412.10025-2-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:02 +0200 + +From: Thierry Reding + +When the GPU powergate is controlled by a generic power domain provider, +the reset will automatically be asserted and deasserted as part of the +power-ungating procedure. + +On some Jetson TX2 boards, doing an additional assert and deassert of +the GPU outside of the power-ungate procedure can cause the GPU to go +into a bad state where the memory interface can no longer access system +memory. + +Signed-off-by: Thierry Reding +--- + drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +index 0e372a190d3f..747a775121cf 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +@@ -52,18 +52,18 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) + clk_set_rate(tdev->clk_pwr, 204000000); + udelay(10); + +- reset_control_assert(tdev->rst); +- udelay(10); +- + if (!tdev->pdev->dev.pm_domain) { ++ reset_control_assert(tdev->rst); ++ udelay(10); ++ + ret = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D); + if (ret) + goto err_clamp; + udelay(10); +- } + +- reset_control_deassert(tdev->rst); +- udelay(10); ++ reset_control_deassert(tdev->rst); ++ udelay(10); ++ } + + return 0; + + +From patchwork Mon Sep 16 15:04:03 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [02/11] drm/nouveau: tegra: Set clock rate if not set +From: Thierry Reding +X-Patchwork-Id: 331046 +Message-Id: <20190916150412.10025-3-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:03 +0200 + +From: Thierry Reding + +If the GPU clock has not had a rate set, initialize it to the maximum +clock rate to make sure it does run. + +Signed-off-by: Thierry Reding +--- + drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +index 747a775121cf..d0d52c1d4aee 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +@@ -279,6 +279,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, + struct nvkm_device **pdevice) + { + struct nvkm_device_tegra *tdev; ++ unsigned long rate; + int ret; + + if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL))) +@@ -307,6 +308,17 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, + goto free; + } + ++ rate = clk_get_rate(tdev->clk); ++ if (rate == 0) { ++ ret = clk_set_rate(tdev->clk, ULONG_MAX); ++ if (ret < 0) ++ goto free; ++ ++ rate = clk_get_rate(tdev->clk); ++ ++ dev_dbg(&pdev->dev, "GPU clock set to %lu\n", rate); ++ } ++ + if (func->require_ref_clk) + tdev->clk_ref = devm_clk_get(&pdev->dev, "ref"); + if (IS_ERR(tdev->clk_ref)) { + +From patchwork Mon Sep 16 15:04:04 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [03/11] drm/nouveau: secboot: Read WPR configuration from GPU + registers +From: Thierry Reding +X-Patchwork-Id: 331048 +Message-Id: <20190916150412.10025-4-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:04 +0200 + +From: Thierry Reding + +The GPUs found on Tegra SoCs have registers that can be used to read the +WPR configuration. Use these registers instead of reaching into the +memory controller's register space to read the same information. + +Signed-off-by: Thierry Reding +--- + .../drm/nouveau/nvkm/subdev/secboot/gm200.h | 2 +- + .../drm/nouveau/nvkm/subdev/secboot/gm20b.c | 81 ++++++++++++------- + .../drm/nouveau/nvkm/subdev/secboot/gp10b.c | 4 +- + 3 files changed, 53 insertions(+), 34 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h +index 62c5e162099a..280b1448df88 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h +@@ -41,6 +41,6 @@ int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *, + struct nvkm_falcon *); + + /* Tegra-only */ +-int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *, u32); ++int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *); + + #endif +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +index df8b919dcf09..f8a543122219 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +@@ -23,39 +23,65 @@ + #include "acr.h" + #include "gm200.h" + +-#define TEGRA210_MC_BASE 0x70019000 +- + #ifdef CONFIG_ARCH_TEGRA +-#define MC_SECURITY_CARVEOUT2_CFG0 0xc58 +-#define MC_SECURITY_CARVEOUT2_BOM_0 0xc5c +-#define MC_SECURITY_CARVEOUT2_BOM_HI_0 0xc60 +-#define MC_SECURITY_CARVEOUT2_SIZE_128K 0xc64 +-#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED (1 << 1) + /** + * gm20b_secboot_tegra_read_wpr() - read the WPR registers on Tegra + * +- * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region +- * is reserved from system memory by the bootloader and irreversibly locked. +- * This function reads the address and size of the pre-configured WPR region. ++ * On dGPU, we can manage the WPR region ourselves, but on Tegra this region ++ * is allocated from system memory by the secure firmware. The region is then ++ * marked as a "secure carveout" and irreversibly locked. Furthermore, the WPR ++ * secure carveout is also configured to be sent to the GPU via a dedicated ++ * serial bus between the memory controller and the GPU. The GPU requests this ++ * information upon leaving reset and exposes it through a FIFO register at ++ * offset 0x100cd4. ++ * ++ * The FIFO register's lower 4 bits can be used to set the read index into the ++ * FIFO. After each read of the FIFO register, the read index is incremented. ++ * ++ * Indices 2 and 3 contain the lower and upper addresses of the WPR. These are ++ * stored in units of 256 B. The WPR is inclusive of both addresses. ++ * ++ * Unfortunately, for some reason the WPR info register doesn't contain the ++ * correct values for the secure carveout. It seems like the upper address is ++ * always too small by 128 KiB - 1. Given that the secure carvout size in the ++ * memory controller configuration is specified in units of 128 KiB, it's ++ * possible that the computation of the upper address of the WPR is wrong and ++ * causes this difference. + */ + int +-gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base) ++gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb) + { ++ struct nvkm_device *device = gsb->base.subdev.device; + struct nvkm_secboot *sb = &gsb->base; +- void __iomem *mc; +- u32 cfg; ++ u64 base, limit; ++ u32 value; + +- mc = ioremap(mc_base, 0xd00); +- if (!mc) { +- nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n"); +- return -ENOMEM; +- } +- sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) | +- ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32); +- sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K) +- << 17; +- cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0); +- iounmap(mc); ++ /* set WPR info register to point at WPR base address register */ ++ value = nvkm_rd32(device, 0x100cd4); ++ value &= ~0xf; ++ value |= 0x2; ++ nvkm_wr32(device, 0x100cd4, value); ++ ++ /* read base address */ ++ value = nvkm_rd32(device, 0x100cd4); ++ base = (u64)(value >> 4) << 12; ++ ++ /* read limit */ ++ value = nvkm_rd32(device, 0x100cd4); ++ limit = (u64)(value >> 4) << 12; ++ ++ /* ++ * The upper address of the WPR seems to be computed wrongly and is ++ * actually SZ_128K - 1 bytes lower than it should be. Adjust the ++ * value accordingly. ++ */ ++ limit += SZ_128K - 1; ++ ++ sb->wpr_size = limit - base + 1; ++ sb->wpr_addr = base; ++ ++ nvkm_info(&sb->subdev, "WPR: %016llx-%016llx\n", sb->wpr_addr, ++ sb->wpr_addr + sb->wpr_size - 1); + + /* Check that WPR settings are valid */ + if (sb->wpr_size == 0) { +@@ -63,11 +89,6 @@ gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base) + return -EINVAL; + } + +- if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) { +- nvkm_error(&sb->subdev, "WPR region not locked\n"); +- return -EINVAL; +- } +- + return 0; + } + #else +@@ -85,7 +106,7 @@ gm20b_secboot_oneinit(struct nvkm_secboot *sb) + struct gm200_secboot *gsb = gm200_secboot(sb); + int ret; + +- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA210_MC_BASE); ++ ret = gm20b_secboot_tegra_read_wpr(gsb); + if (ret) + return ret; + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c +index 28ca29d0eeee..d84e85825995 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c +@@ -23,15 +23,13 @@ + #include "acr.h" + #include "gm200.h" + +-#define TEGRA186_MC_BASE 0x02c10000 +- + static int + gp10b_secboot_oneinit(struct nvkm_secboot *sb) + { + struct gm200_secboot *gsb = gm200_secboot(sb); + int ret; + +- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA186_MC_BASE); ++ ret = gm20b_secboot_tegra_read_wpr(gsb); + if (ret) + return ret; + + +From patchwork Mon Sep 16 15:04:05 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [04/11] drm/nouveau: gp10b: Add custom L2 cache implementation +From: Thierry Reding +X-Patchwork-Id: 331049 +Message-Id: <20190916150412.10025-5-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:05 +0200 + +From: Thierry Reding + +There are extra registers that need to be programmed to make the level 2 +cache work on GP10B, such as the stream ID register that is used when an +SMMU is used to translate memory addresses. + +Signed-off-by: Thierry Reding +--- + .../gpu/drm/nouveau/include/nvkm/subdev/ltc.h | 1 + + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- + .../gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild | 1 + + .../gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c | 69 +++++++++++++++++++ + .../gpu/drm/nouveau/nvkm/subdev/ltc/priv.h | 2 + + 5 files changed, 74 insertions(+), 1 deletion(-) + create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c + +diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +index 644d527c3b96..d76f60d7d29a 100644 +--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h ++++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +@@ -40,4 +40,5 @@ int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); + int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); + int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); + int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); ++int gp10b_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); + #endif +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +index c3c7159f3411..d2d6d5f4028a 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +@@ -2380,7 +2380,7 @@ nv13b_chipset = { + .fuse = gm107_fuse_new, + .ibus = gp10b_ibus_new, + .imem = gk20a_instmem_new, +- .ltc = gp102_ltc_new, ++ .ltc = gp10b_ltc_new, + .mc = gp10b_mc_new, + .mmu = gp10b_mmu_new, + .secboot = gp10b_secboot_new, +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild +index 2b6d36ea7067..728d75010847 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild +@@ -6,3 +6,4 @@ nvkm-y += nvkm/subdev/ltc/gm107.o + nvkm-y += nvkm/subdev/ltc/gm200.o + nvkm-y += nvkm/subdev/ltc/gp100.o + nvkm-y += nvkm/subdev/ltc/gp102.o ++nvkm-y += nvkm/subdev/ltc/gp10b.o +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c +new file mode 100644 +index 000000000000..4d27c6ea1552 +--- /dev/null ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c +@@ -0,0 +1,69 @@ ++/* ++ * Copyright (c) 2019 NVIDIA Corporation. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: Thierry Reding ++ */ ++ ++#include "priv.h" ++ ++static void ++gp10b_ltc_init(struct nvkm_ltc *ltc) ++{ ++ struct nvkm_device *device = ltc->subdev.device; ++#ifdef CONFIG_IOMMU_API ++ struct iommu_fwspec *spec; ++#endif ++ ++ nvkm_wr32(device, 0x17e27c, ltc->ltc_nr); ++ nvkm_wr32(device, 0x17e000, ltc->ltc_nr); ++ nvkm_wr32(device, 0x100800, ltc->ltc_nr); ++ ++#ifdef CONFIG_IOMMU_API ++ spec = dev_iommu_fwspec_get(device->dev); ++ if (spec) { ++ u32 sid = spec->ids[0] & 0xffff; ++ ++ /* stream ID */ ++ nvkm_wr32(device, 0x160000, sid << 2); ++ } ++#endif ++} ++ ++static const struct nvkm_ltc_func ++gp10b_ltc = { ++ .oneinit = gp100_ltc_oneinit, ++ .init = gp10b_ltc_init, ++ .intr = gp100_ltc_intr, ++ .cbc_clear = gm107_ltc_cbc_clear, ++ .cbc_wait = gm107_ltc_cbc_wait, ++ .zbc = 16, ++ .zbc_clear_color = gm107_ltc_zbc_clear_color, ++ .zbc_clear_depth = gm107_ltc_zbc_clear_depth, ++ .zbc_clear_stencil = gp102_ltc_zbc_clear_stencil, ++ .invalidate = gf100_ltc_invalidate, ++ .flush = gf100_ltc_flush, ++}; ++ ++int ++gp10b_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc) ++{ ++ return nvkm_ltc_new_(&gp10b_ltc, device, index, pltc); ++} +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h +index 2fcf18e46ce3..eca5a711b1b8 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h +@@ -46,4 +46,6 @@ void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); + int gp100_ltc_oneinit(struct nvkm_ltc *); + void gp100_ltc_init(struct nvkm_ltc *); + void gp100_ltc_intr(struct nvkm_ltc *); ++ ++void gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *, int, const u32); + #endif + +From patchwork Mon Sep 16 15:04:06 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [05/11] drm/nouveau: gp10b: Use correct copy engine +From: Thierry Reding +X-Patchwork-Id: 331052 +Message-Id: <20190916150412.10025-6-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:06 +0200 + +From: Thierry Reding + +gp10b uses the new engine enumeration mechanism introduced in the Pascal +architecture. As a result, the copy engine, which used to be at index 2 +for prior Tegra GPU instantiations, has now moved to index 0. Fix up the +index and also use the gp100 variant of the copy engine class because on +gp10b the PASCAL_DMA_COPY_B class is not supported. + +Signed-off-by: Thierry Reding +--- + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +index d2d6d5f4028a..99d3fa3fad89 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +@@ -2387,7 +2387,7 @@ nv13b_chipset = { + .pmu = gm20b_pmu_new, + .timer = gk20a_timer_new, + .top = gk104_top_new, +- .ce[2] = gp102_ce_new, ++ .ce[0] = gp100_ce_new, + .dma = gf119_dma_new, + .fifo = gp10b_fifo_new, + .gr = gp10b_gr_new, + +From patchwork Mon Sep 16 15:04:07 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [06/11] drm/nouveau: gk20a: Set IOMMU bit for DMA API if appropriate +From: Thierry Reding +X-Patchwork-Id: 331053 +Message-Id: <20190916150412.10025-7-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:07 +0200 + +From: Thierry Reding + +Detect if the DMA API is backed by an IOMMU and set the IOMMU bit if so. +This is needed to make sure IOMMU addresses are properly translated even +the explicit IOMMU API is not used. + +Signed-off-by: Thierry Reding +--- + .../drm/nouveau/nvkm/subdev/instmem/gk20a.c | 35 +++++++++++++------ + 1 file changed, 25 insertions(+), 10 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +index b0493f8df1fe..1120a2a7d5f1 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +@@ -100,12 +100,14 @@ struct gk20a_instmem { + unsigned int vaddr_max; + struct list_head vaddr_lru; + ++ /* IOMMU mapping */ ++ unsigned int page_shift; ++ u64 iommu_mask; ++ + /* Only used if IOMMU if present */ + struct mutex *mm_mutex; + struct nvkm_mm *mm; + struct iommu_domain *domain; +- unsigned long iommu_pgshift; +- u16 iommu_bit; + + /* Only used by DMA API */ + unsigned long attrs; +@@ -357,12 +359,12 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) + mutex_unlock(&imem->lock); + + /* clear IOMMU bit to unmap pages */ +- r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); ++ r->offset &= ~imem->iommu_mask; + + /* Unmap pages from GPU address space and free them */ + for (i = 0; i < node->base.mn->length; i++) { + iommu_unmap(imem->domain, +- (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE); ++ (r->offset + i) << imem->page_shift, PAGE_SIZE); + dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(node->pages[i]); +@@ -440,7 +442,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, + + /* present memory for being mapped using small pages */ + node->r.type = 12; +- node->r.offset = node->handle >> 12; ++ node->r.offset = imem->iommu_mask | node->handle >> 12; + node->r.length = (npages << PAGE_SHIFT) >> 12; + + node->base.mn = &node->r; +@@ -493,7 +495,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, + mutex_lock(imem->mm_mutex); + /* Reserve area from GPU address space */ + ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages, +- align >> imem->iommu_pgshift, &r); ++ align >> imem->page_shift, &r); + mutex_unlock(imem->mm_mutex); + if (ret) { + nvkm_error(subdev, "IOMMU space is full!\n"); +@@ -502,7 +504,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, + + /* Map into GPU address space */ + for (i = 0; i < npages; i++) { +- u32 offset = (r->offset + i) << imem->iommu_pgshift; ++ u32 offset = (r->offset + i) << imem->page_shift; + + ret = iommu_map(imem->domain, offset, node->dma_addrs[i], + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); +@@ -518,7 +520,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, + } + + /* IOMMU bit tells that an address is to be resolved through the IOMMU */ +- r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); ++ r->offset |= imem->iommu_mask; + + node->base.mn = r; + return 0; +@@ -619,11 +621,12 @@ gk20a_instmem_new(struct nvkm_device *device, int index, + imem->mm_mutex = &tdev->iommu.mutex; + imem->mm = &tdev->iommu.mm; + imem->domain = tdev->iommu.domain; +- imem->iommu_pgshift = tdev->iommu.pgshift; +- imem->iommu_bit = tdev->func->iommu_bit; ++ imem->page_shift = tdev->iommu.pgshift; + + nvkm_info(&imem->base.subdev, "using IOMMU\n"); + } else { ++ imem->page_shift = PAGE_SHIFT; ++ + imem->attrs = DMA_ATTR_NON_CONSISTENT | + DMA_ATTR_WEAK_ORDERING | + DMA_ATTR_WRITE_COMBINE; +@@ -631,5 +634,17 @@ gk20a_instmem_new(struct nvkm_device *device, int index, + nvkm_info(&imem->base.subdev, "using DMA API\n"); + } + ++ /* ++ * The IOMMU mask needs to be set if an IOMMU is used explicitly (via ++ * direct IOMMU API usage) or implicitly (via the DMA API). In both ++ * cases the device will have been attached to an IOMMU domain. ++ */ ++ if (iommu_get_domain_for_dev(device->dev)) { ++ imem->iommu_mask = BIT_ULL(tdev->func->iommu_bit - ++ imem->page_shift); ++ nvkm_debug(&imem->base.subdev, "IOMMU mask: %016llx\n", ++ imem->iommu_mask); ++ } ++ + return 0; + } + +From patchwork Mon Sep 16 15:04:08 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [07/11] drm/nouveau: gk20a: Implement custom MMU class +From: Thierry Reding +X-Patchwork-Id: 331057 +Message-Id: <20190916150412.10025-8-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:08 +0200 + +From: Thierry Reding + +The GPU integrated in NVIDIA Tegra SoCs is connected to system memory +via two paths: one direct path to the memory controller and another path +that goes through a system MMU first. It's not typically necessary to go +through the system MMU because the GPU's MMU can already map buffers so +that they appear contiguous to the GPU. + +However, in order to support big pages, the system MMU has to be used to +combine multiple small pages into one virtually contiguous chunk so that +the GPU can then treat that as a single big page. + +In order to prepare for big page support, implement a custom MMU class +that takes care of setting the IOMMU bit when writing page tables and +when appropriate. + +This is also necessary to make sure that Nouveau works correctly on +Tegra devices where the GPU is connected to a system MMU and that IOMMU +is used to back the DMA API. Currently Nouveau assumes that the DMA API +is never backed by an IOMMU, so access to DMA-mapped buffers fault when +suddenly this assumption is no longer true. + +One situation where this can happen is on 32-bit Tegra SoCs where the +ARM architecture code automatically attaches the GPU with a DMA/IOMMU +domain. This is currently worked around by detaching the GPU from the +IOMMU domain at probe time. However, with Tegra186 and later this can +now also happen, but unfortunately no mechanism exists to detach from +the domain in the 64-bit ARM architecture code. + +Using this Tegra-specific MMU class ensures that DMA-mapped buffers are +properly mapped (with the IOMMU bit set) if the DMA API is backed by an +IOMMU domain. + +Signed-off-by: Thierry Reding +--- + .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c | 50 ++++++++++++++++++- + .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h | 44 ++++++++++++++++ + .../gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c | 6 ++- + .../gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c | 4 +- + drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 1 + + .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c | 22 +++++++- + .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c | 4 +- + .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 20 +++++++- + 8 files changed, 142 insertions(+), 9 deletions(-) + create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h + +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c +index ac74965a60d4..d9a5e05b7dc7 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c +@@ -19,11 +19,59 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++ ++#include "gk20a.h" + #include "mem.h" + #include "vmm.h" + ++#include + #include + ++static void ++gk20a_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device, ++ int index, struct gk20a_mmu *mmu) ++{ ++ struct iommu_domain *domain = iommu_get_domain_for_dev(device->dev); ++ struct nvkm_device_tegra *tegra = device->func->tegra(device); ++ ++ nvkm_mmu_ctor(func, device, index, &mmu->base); ++ ++ /* ++ * If the DMA API is backed by an IOMMU, make sure the IOMMU bit is ++ * set for all buffer accesses. If the IOMMU is explicitly used, it ++ * is only used for instance blocks and the MMU doesn't care, since ++ * buffer objects are only mapped through the MMU, not through the ++ * IOMMU. ++ * ++ * Big page support could be implemented using explicit IOMMU usage, ++ * but the DMA API already provides that for free, so we don't worry ++ * about it for now. ++ */ ++ if (domain && !tegra->iommu.domain) { ++ mmu->iommu_mask = BIT_ULL(tegra->func->iommu_bit); ++ nvkm_debug(&mmu->base.subdev, "IOMMU mask: %llx\n", ++ mmu->iommu_mask); ++ } ++} ++ ++int ++gk20a_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device, ++ int index, struct nvkm_mmu **pmmu) ++{ ++ struct gk20a_mmu *mmu; ++ ++ mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); ++ if (!mmu) ++ return -ENOMEM; ++ ++ gk20a_mmu_ctor(func, device, index, mmu); ++ ++ if (pmmu) ++ *pmmu = &mmu->base; ++ ++ return 0; ++} ++ + static const struct nvkm_mmu_func + gk20a_mmu = { + .dma_bits = 40, +@@ -37,5 +85,5 @@ gk20a_mmu = { + int + gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) + { +- return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu); ++ return gk20a_mmu_new_(&gk20a_mmu, device, index, pmmu); + } +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h +new file mode 100644 +index 000000000000..bb81fc62509c +--- /dev/null ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2019 NVIDIA Corporation. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef __NVKM_MMU_GK20A_H__ ++#define __NVKM_MMU_GK20A_H__ ++ ++#include "priv.h" ++ ++struct gk20a_mmu { ++ struct nvkm_mmu base; ++ ++ /* ++ * If an IOMMU is used, indicates which address bit will trigger an ++ * IOMMU translation when set (when this bit is not set, the IOMMU is ++ * bypassed). A value of 0 means an IOMMU is never used. ++ */ ++ u64 iommu_mask; ++}; ++ ++#define gk20a_mmu(mmu) container_of(mmu, struct gk20a_mmu, base) ++ ++int gk20a_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, ++ int index, struct nvkm_mmu **); ++ ++#endif +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c +index 7353a94b4091..7fccd4df52a8 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c +@@ -19,6 +19,8 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++ ++#include "gk20a.h" + #include "mem.h" + #include "vmm.h" + +@@ -50,6 +52,6 @@ int + gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) + { + if (device->fb->page) +- return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); +- return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu); ++ return gk20a_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); ++ return gk20a_mmu_new_(&gm20b_mmu, device, index, pmmu); + } +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c +index 0a50be9a785a..ae3cb47be3d8 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c +@@ -19,6 +19,8 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++ ++#include "gk20a.h" + #include "mem.h" + #include "vmm.h" + +@@ -41,5 +43,5 @@ gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) + { + if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) + return gm20b_mmu_new(device, index, pmmu); +- return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu); ++ return gk20a_mmu_new_(&gp10b_mmu, device, index, pmmu); + } +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +index 5e55ecbd8005..fb3a9e8bb9cd 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +@@ -213,6 +213,7 @@ void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type); + void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); + + int gk20a_vmm_aper(enum nvkm_memory_target); ++int gk20a_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); + + int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, + struct nvkm_mmu *, bool, u64, u64, void *, u32, +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c +index 5a9582dce970..16d7bf727292 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c +@@ -19,6 +19,8 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++ ++#include "gk20a.h" + #include "vmm.h" + + #include +@@ -33,12 +35,28 @@ gk20a_vmm_aper(enum nvkm_memory_target target) + } + } + ++int ++gk20a_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, ++ struct nvkm_vmm_map *map) ++{ ++ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu); ++ int ret; ++ ++ ret = gf100_vmm_valid(vmm, argv, argc, map); ++ if (ret < 0) ++ return ret; ++ ++ map->type |= mmu->iommu_mask >> 8; ++ ++ return 0; ++} ++ + static const struct nvkm_vmm_func + gk20a_vmm_17 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, +- .valid = gf100_vmm_valid, ++ .valid = gk20a_vmm_valid, + .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, + .page = { +@@ -53,7 +71,7 @@ gk20a_vmm_16 = { + .join = gf100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, +- .valid = gf100_vmm_valid, ++ .valid = gk20a_vmm_valid, + .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, + .page = { +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c +index 96b759695dd8..7a6066d886cd 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c +@@ -26,7 +26,7 @@ gm20b_vmm_17 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, +- .valid = gf100_vmm_valid, ++ .valid = gk20a_vmm_valid, + .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, + .page = { +@@ -42,7 +42,7 @@ gm20b_vmm_16 = { + .join = gm200_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, +- .valid = gf100_vmm_valid, ++ .valid = gk20a_vmm_valid, + .flush = gf100_vmm_flush, + .invalidate_pdb = gf100_vmm_invalidate_pdb, + .page = { +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +index e081239afe58..180c8f006e32 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c +@@ -19,14 +19,32 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ ++ ++#include "gk20a.h" + #include "vmm.h" + ++static int ++gp10b_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, ++ struct nvkm_vmm_map *map) ++{ ++ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu); ++ int ret; ++ ++ ret = gp100_vmm_valid(vmm, argv, argc, map); ++ if (ret < 0) ++ return ret; ++ ++ map->type |= mmu->iommu_mask >> 4; ++ ++ return 0; ++} ++ + static const struct nvkm_vmm_func + gp10b_vmm = { + .join = gp100_vmm_join, + .part = gf100_vmm_part, + .aper = gk20a_vmm_aper, +- .valid = gp100_vmm_valid, ++ .valid = gp10b_vmm_valid, + .flush = gp100_vmm_flush, + .mthd = gp100_vmm_mthd, + .invalidate_pdb = gp100_vmm_invalidate_pdb, + +From patchwork Mon Sep 16 15:04:09 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [08/11] drm/nouveau: tegra: Skip IOMMU initialization if already + attached +From: Thierry Reding +X-Patchwork-Id: 331060 +Message-Id: <20190916150412.10025-9-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:09 +0200 + +From: Thierry Reding + +If the GPU is already attached to an IOMMU, don't detach it and setup an +explicit IOMMU domain. Since Nouveau can now properly handle the case of +the DMA API being backed by an IOMMU, just continue using the DMA API. + +Signed-off-by: Thierry Reding +--- + .../drm/nouveau/nvkm/engine/device/tegra.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +index d0d52c1d4aee..fc652aaa41c7 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +@@ -23,10 +23,6 @@ + #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER + #include "priv.h" + +-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +-#include +-#endif +- + static int + nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) + { +@@ -109,14 +105,13 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) + unsigned long pgsize_bitmap; + int ret; + +-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +- if (dev->archdata.mapping) { +- struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); +- +- arm_iommu_detach_device(dev); +- arm_iommu_release_mapping(mapping); +- } +-#endif ++ /* ++ * Skip explicit IOMMU initialization if the GPU is already attached ++ * to an IOMMU domain. This can happen if the DMA API is backed by an ++ * IOMMU. ++ */ ++ if (iommu_get_domain_for_dev(dev)) ++ return; + + if (!tdev->func->iommu_bit) + return; + +From patchwork Mon Sep 16 15:04:10 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [09/11] drm/nouveau: tegra: Fall back to 32-bit DMA mask without IOMMU +From: Thierry Reding +X-Patchwork-Id: 331061 +Message-Id: <20190916150412.10025-10-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:10 +0200 + +From: Thierry Reding + +The GPU can usually address more than 32-bit, even without being +attached to an IOMMU. However, if the GPU is not attached to an IOMMU, +it's likely that there is no IOMMU in the system, in which case any +buffers allocated by Nouveau will likely end up in a region of memory +that cannot be accessed by host1x. + +Signed-off-by: Thierry Reding +--- + .../drm/nouveau/nvkm/engine/device/tegra.c | 111 +++++++++++------- + 1 file changed, 70 insertions(+), 41 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +index fc652aaa41c7..221238a2cf53 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c ++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +@@ -97,7 +97,7 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev) + return 0; + } + +-static void ++static int + nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) + { + #if IS_ENABLED(CONFIG_IOMMU_API) +@@ -111,47 +111,65 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) + * IOMMU. + */ + if (iommu_get_domain_for_dev(dev)) +- return; ++ return -ENODEV; + + if (!tdev->func->iommu_bit) +- return; ++ return -ENODEV; ++ ++ if (!iommu_present(&platform_bus_type)) ++ return -ENODEV; + + mutex_init(&tdev->iommu.mutex); + +- if (iommu_present(&platform_bus_type)) { +- tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); +- if (!tdev->iommu.domain) +- goto error; ++ tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); ++ if (!tdev->iommu.domain) ++ return -ENOMEM; + +- /* +- * A IOMMU is only usable if it supports page sizes smaller +- * or equal to the system's PAGE_SIZE, with a preference if +- * both are equal. +- */ +- pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; +- if (pgsize_bitmap & PAGE_SIZE) { +- tdev->iommu.pgshift = PAGE_SHIFT; +- } else { +- tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK); +- if (tdev->iommu.pgshift == 0) { +- dev_warn(dev, "unsupported IOMMU page size\n"); +- goto free_domain; +- } +- tdev->iommu.pgshift -= 1; ++ /* ++ * An IOMMU is only usable if it supports page sizes smaller or equal ++ * to the system's PAGE_SIZE, with a preference if both are equal. ++ */ ++ pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; ++ if (pgsize_bitmap & PAGE_SIZE) { ++ tdev->iommu.pgshift = PAGE_SHIFT; ++ } else { ++ tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK); ++ if (tdev->iommu.pgshift == 0) { ++ dev_warn(dev, "unsupported IOMMU page size\n"); ++ ret = -ENOTSUPP; ++ goto free_domain; + } + +- ret = iommu_attach_device(tdev->iommu.domain, dev); +- if (ret) +- goto free_domain; ++ tdev->iommu.pgshift -= 1; ++ } + +- ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, +- (1ULL << tdev->func->iommu_bit) >> +- tdev->iommu.pgshift, 1); +- if (ret) +- goto detach_device; ++ ret = iommu_attach_device(tdev->iommu.domain, dev); ++ if (ret) { ++ dev_warn(dev, "failed to attach to IOMMU: %d\n", ret); ++ goto free_domain; ++ } ++ ++ ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, ++ (1ULL << tdev->func->iommu_bit) >> ++ tdev->iommu.pgshift, 1); ++ if (ret) { ++ dev_warn(dev, "failed to initialize IOVA space: %d\n", ret); ++ goto detach_device; ++ } ++ ++ /* ++ * The IOMMU bit defines the upper limit of the GPU-addressable space. ++ */ ++ ret = dma_set_mask(dev, DMA_BIT_MASK(tdev->func->iommu_bit)); ++ if (ret) { ++ dev_warn(dev, "failed to set DMA mask: %d\n", ret); ++ goto fini_mm; + } + +- return; ++ return 0; ++ ++fini_mm: ++ nvkm_mm_fini(&tdev->iommu.mm); + + detach_device: + iommu_detach_device(tdev->iommu.domain, dev); +@@ -159,10 +177,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) + free_domain: + iommu_domain_free(tdev->iommu.domain); + +-error: ++ /* reset these so that the DMA API code paths are executed */ + tdev->iommu.domain = NULL; + tdev->iommu.pgshift = 0; +- dev_err(dev, "cannot initialize IOMMU MM\n"); ++ ++ dev_warn(dev, "cannot initialize IOMMU MM\n"); ++ ++ return ret; ++#else ++ return -ENOTSUPP; + #endif + } + +@@ -327,14 +350,20 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, + goto free; + } + +- /** +- * The IOMMU bit defines the upper limit of the GPU-addressable space. +- */ +- ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit)); +- if (ret) +- goto free; +- +- nvkm_device_tegra_probe_iommu(tdev); ++ ret = nvkm_device_tegra_probe_iommu(tdev); ++ if (ret) { ++ /* ++ * If we fail to set up an IOMMU, fall back to a 32-bit DMA ++ * mask. This is not necessary for the GPU to work because it ++ * can usually address all of system memory. However, if the ++ * buffers allocated by Nouveau are meant to be shared with ++ * the display controller, we need to restrict where they can ++ * come from. ++ */ ++ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); ++ if (ret) ++ goto free; ++ } + + ret = nvkm_device_tegra_power_up(tdev); + if (ret) + +From patchwork Mon Sep 16 15:04:11 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [10/11] arm64: tegra: Enable GPU on Jetson TX2 +From: Thierry Reding +X-Patchwork-Id: 331063 +Message-Id: <20190916150412.10025-11-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:11 +0200 + +From: Alexandre Courbot + +Enable the GPU node for the Jetson TX2 board. + +Signed-off-by: Alexandre Courbot +Signed-off-by: Thierry Reding +--- + arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +index bdace01561ba..6f7c7c4c5c29 100644 +--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts ++++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +@@ -276,6 +276,10 @@ + }; + }; + ++ gpu@17000000 { ++ status = "okay"; ++ }; ++ + gpio-keys { + compatible = "gpio-keys"; + + +From patchwork Mon Sep 16 15:04:12 2019 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: [11/11] arm64: tegra: Enable SMMU for GPU on Tegra186 +From: Thierry Reding +X-Patchwork-Id: 331062 +Message-Id: <20190916150412.10025-12-thierry.reding@gmail.com> +To: Ben Skeggs , Thierry Reding +Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, + dri-devel@lists.freedesktop.org +Date: Mon, 16 Sep 2019 17:04:12 +0200 + +From: Thierry Reding + +The GPU has a connection to the ARM SMMU found on Tegra186, which can be +used to support large pages. Make sure the GPU is attached to the SMMU +to take advantage of its capabilities. + +Signed-off-by: Thierry Reding +--- + arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi +index 47cd831fcf44..171fd4dfa58d 100644 +--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi ++++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi +@@ -1172,6 +1172,7 @@ + status = "disabled"; + + power-domains = <&bpmp TEGRA186_POWER_DOMAIN_GPU>; ++ iommus = <&smmu TEGRA186_SID_GPU>; + }; + + sysram@30000000 { diff --git a/kernel.spec b/kernel.spec index 5ce145554..5e25b197c 100644 --- a/kernel.spec +++ b/kernel.spec @@ -542,6 +542,8 @@ Patch321: arm64-tegra-Jetson-TX2-Allow-bootloader-to-configure.patch Patch322: mfd-max77620-Do-not-allocate-IRQs-upfront.patch # https://patchwork.ozlabs.org/patch/1170631/ Patch323: gpio-max77620-Use-correct-unit-for-debounce-times.patch +# https://patchwork.freedesktop.org/series/66762/ +Patch324: drm-nouveau-Enable-GP10B-by-default.patch # 400 - IBM (ppc/s390x) patches