69 lines
2.6 KiB
Diff
69 lines
2.6 KiB
Diff
From 37b556606d1217b4367e622d88cef11c65764386 Mon Sep 17 00:00:00 2001
|
|
From: Ben Skeggs <bskeggs@redhat.com>
|
|
Date: Tue, 31 Mar 2020 16:08:44 +1000
|
|
Subject: [PATCH 2/2] drm/nouveau/gr/gp107,gp108: implement workaround for HW
|
|
hanging during init
|
|
|
|
Certain boards with GP107/GP108 chipsets hang (often, but randomly) for
|
|
unknown reasons during GR initialisation.
|
|
|
|
The first tell-tale symptom of this issue is:
|
|
|
|
nouveau 0000:01:00.0: bus: MMIO read of 00000000 FAULT at 409800 [ TIMEOUT ]
|
|
|
|
appearing in dmesg, likely followed by many other failures being logged.
|
|
|
|
Karol found this WAR for the issue a while back, but efforts to isolate
|
|
the root cause and proper fix have not yielded success so far. I've
|
|
modified the original patch to include a few more details, limit it to
|
|
GP107/GP108 by default, and added a config option to override this choice.
|
|
|
|
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
|
|
Reviewed-by: Karol Herbst <kherbst@redhat.com>
|
|
---
|
|
.../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 26 +++++++++++++++++++
|
|
1 file changed, 26 insertions(+)
|
|
|
|
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
|
|
index dd8f85b8b3a7..f2f5636efac4 100644
|
|
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
|
|
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
|
|
@@ -1981,8 +1981,34 @@ gf100_gr_init_(struct nvkm_gr *base)
|
|
{
|
|
struct gf100_gr *gr = gf100_gr(base);
|
|
struct nvkm_subdev *subdev = &base->engine.subdev;
|
|
+ struct nvkm_device *device = subdev->device;
|
|
+ bool reset = device->chipset == 0x137 || device->chipset == 0x138;
|
|
u32 ret;
|
|
|
|
+ /* On certain GP107/GP108 boards, we trigger a weird issue where
|
|
+ * GR will stop responding to PRI accesses after we've asked the
|
|
+ * SEC2 RTOS to boot the GR falcons. This happens with far more
|
|
+ * frequency when cold-booting a board (ie. returning from D3).
|
|
+ *
|
|
+ * The root cause for this is not known and has proven difficult
|
|
+ * to isolate, with many avenues being dead-ends.
|
|
+ *
|
|
+ * A workaround was discovered by Karol, whereby putting GR into
|
|
+ * reset for an extended period right before initialisation
|
|
+ * prevents the problem from occuring.
|
|
+ *
|
|
+ * XXX: As RM does not require any such workaround, this is more
|
|
+ * of a hack than a true fix.
|
|
+ */
|
|
+ reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset);
|
|
+ if (reset) {
|
|
+ nvkm_mask(device, 0x000200, 0x00001000, 0x00000000);
|
|
+ nvkm_rd32(device, 0x000200);
|
|
+ msleep(50);
|
|
+ nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
|
|
+ nvkm_rd32(device, 0x000200);
|
|
+ }
|
|
+
|
|
nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
|
|
|
|
ret = nvkm_falcon_get(&gr->fecs.falcon, subdev);
|
|
--
|
|
2.25.1
|
|
|