From 8002493cec883f0be4b95350357f99e85d839fbc Mon Sep 17 00:00:00 2001 From: Igor Gnatenko Date: Thu, 14 Nov 2013 15:41:45 +0400 Subject: [PATCH 1/2] 9.2.3 upstream release Signed-off-by: Igor Gnatenko --- .gitignore | 1 + ...ix-color-inversion-on-mem-gmem-resto.patch | 31 + ...3xx-fix-viewport-on-gmem-mem-resolve.patch | 32 + ...bug-option-to-disable-scissor-optimi.patch | 113 ++ 0004-freedreno-update-register-headers.patch | 1238 +++++++++++++++++ 0005-freedreno-a3xx-some-texture-fixes.patch | 65 + 0006-freedreno-a3xx-compiler-fix-CMP.patch | 45 + ...a3xx-compiler-handle-saturate-on-dst.patch | 98 ++ ...ompiler-use-max_reg-rather-than-file.patch | 59 + ...ompiler-cat4-cannot-use-const-reg-as.patch | 104 ++ ...-segfault-when-no-color-buffer-bound.patch | 216 +++ ...ompiler-make-compiler-errors-more-us.patch | 172 +++ ...x-compiler-bit-of-re-arrange-cleanup.patch | 420 ++++++ ...edreno-a3xx-compiler-fix-SGT-SLT-etc.patch | 231 +++ 0014-freedreno-a3xx-don-t-leak-so-much.patch | 36 + ...-a3xx-compiler-better-const-handling.patch | 376 +++++ ...xx-compiler-handle-sync-flags-better.patch | 128 ++ ...dreno-updates-for-msm-drm-kms-driver.patch | 328 +++++ mesa.spec | 44 +- sources | 2 +- 20 files changed, 3736 insertions(+), 3 deletions(-) create mode 100644 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch create mode 100644 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch create mode 100644 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch create mode 100644 0004-freedreno-update-register-headers.patch create mode 100644 0005-freedreno-a3xx-some-texture-fixes.patch create mode 100644 0006-freedreno-a3xx-compiler-fix-CMP.patch create mode 100644 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch create mode 100644 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch create mode 100644 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch create mode 100644 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch create mode 100644 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch create mode 100644 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch create mode 100644 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch create mode 100644 0014-freedreno-a3xx-don-t-leak-so-much.patch create mode 100644 0015-freedreno-a3xx-compiler-better-const-handling.patch create mode 100644 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch create mode 100644 0017-freedreno-updates-for-msm-drm-kms-driver.patch diff --git a/.gitignore b/.gitignore index ebcc756..089b161 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ mesa-20100720.tar.bz2 /mesa-20130902.tar.xz /mesa-20130919.tar.xz /mesa-20131113.tar.xz +/mesa-20131114.tar.xz diff --git a/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch b/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch new file mode 100644 index 0000000..23b9bf6 --- /dev/null +++ b/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch @@ -0,0 +1,31 @@ +From 2d844be97fd5b6b0f02a94d4bb194c0bd19de6f9 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 13 Jul 2013 13:07:46 -0400 +Subject: [PATCH 01/17] freedreno/a3xx: fix color inversion on mem->gmem + restore + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +index a7a4bf7..b8436c9 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +@@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) | + 0x40000000 | // XXX +- fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN, +- PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA)); +- OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) | ++ fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, ++ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); ++ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | + A3XX_TEX_CONST_1_WIDTH(psurf->width) | + A3XX_TEX_CONST_1_HEIGHT(psurf->height)); + OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) | +-- +1.8.4.2 + diff --git a/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch b/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch new file mode 100644 index 0000000..eede425 --- /dev/null +++ b/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch @@ -0,0 +1,32 @@ +From b2a32254d65c356604bbffda6e771dca0509e9ed Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 13 Jul 2013 13:08:22 -0400 +Subject: [PATCH 02/17] freedreno/a3xx: fix viewport on gmem->mem resolve + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +index 1cb170a..9050166 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +@@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ + ++ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5)); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0)); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); ++ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); ++ + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); +-- +1.8.4.2 + diff --git a/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch b/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch new file mode 100644 index 0000000..e0bb580 --- /dev/null +++ b/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch @@ -0,0 +1,113 @@ +From 8b167d34bebcc9aaf67838be71cc3272728d4fe1 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Wed, 29 May 2013 10:16:33 -0400 +Subject: [PATCH 03/17] freedreno: add debug option to disable scissor + optimization + +Useful for testing and debugging. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/freedreno_gmem.c | 26 +++++++++++++++--------- + src/gallium/drivers/freedreno/freedreno_screen.c | 1 + + src/gallium/drivers/freedreno/freedreno_util.h | 9 ++++---- + 3 files changed, 22 insertions(+), 14 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c +index 12633bd..197d1d9 100644 +--- a/src/gallium/drivers/freedreno/freedreno_gmem.c ++++ b/src/gallium/drivers/freedreno/freedreno_gmem.c +@@ -71,7 +71,8 @@ calculate_tiles(struct fd_context *ctx) + { + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_scissor_state *scissor = &ctx->max_scissor; +- uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format); ++ struct pipe_framebuffer_state *pfb = &ctx->framebuffer; ++ uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format); + uint32_t gmem_size = ctx->screen->gmemsize_bytes; + uint32_t minx, miny, width, height; + uint32_t nbins_x = 1, nbins_y = 1; +@@ -84,10 +85,17 @@ calculate_tiles(struct fd_context *ctx) + return; + } + +- minx = scissor->minx & ~31; /* round down to multiple of 32 */ +- miny = scissor->miny & ~31; +- width = scissor->maxx - minx; +- height = scissor->maxy - miny; ++ if (fd_mesa_debug & FD_DBG_DSCIS) { ++ minx = 0; ++ miny = 0; ++ width = pfb->width; ++ height = pfb->height; ++ } else { ++ minx = scissor->minx & ~31; /* round down to multiple of 32 */ ++ miny = scissor->miny & ~31; ++ width = scissor->maxx - minx; ++ height = scissor->maxy - miny; ++ } + + // TODO we probably could optimize this a bit if we know that + // Z or stencil is not enabled for any of the draw calls.. +@@ -132,9 +140,7 @@ static void + render_tiles(struct fd_context *ctx) + { + struct fd_gmem_stateobj *gmem = &ctx->gmem; +- uint32_t i, yoff = 0; +- +- yoff= gmem->miny; ++ uint32_t i, yoff = gmem->miny; + + ctx->emit_tile_init(ctx); + +@@ -143,13 +149,13 @@ render_tiles(struct fd_context *ctx) + uint32_t bh = gmem->bin_h; + + /* clip bin height: */ +- bh = MIN2(bh, gmem->height - yoff); ++ bh = MIN2(bh, gmem->miny + gmem->height - yoff); + + for (j = 0; j < gmem->nbins_x; j++) { + uint32_t bw = gmem->bin_w; + + /* clip bin width: */ +- bw = MIN2(bw, gmem->width - xoff); ++ bw = MIN2(bw, gmem->minx + gmem->width - xoff); + + DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", + bh, yoff, bw, xoff); +diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c +index 52d51c2..36ef8b0 100644 +--- a/src/gallium/drivers/freedreno/freedreno_screen.c ++++ b/src/gallium/drivers/freedreno/freedreno_screen.c +@@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = { + {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"}, + {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, + {"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"}, ++ {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"}, + DEBUG_NAMED_VALUE_END + }; + +diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h +index f18f0fe..b49cdfc 100644 +--- a/src/gallium/drivers/freedreno/freedreno_util.h ++++ b/src/gallium/drivers/freedreno/freedreno_util.h +@@ -47,10 +47,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); + enum adreno_stencil_op fd_stencil_op(unsigned op); + + +-#define FD_DBG_MSGS 0x1 +-#define FD_DBG_DISASM 0x2 +-#define FD_DBG_DCLEAR 0x4 +-#define FD_DBG_DGMEM 0x8 ++#define FD_DBG_MSGS 0x01 ++#define FD_DBG_DISASM 0x02 ++#define FD_DBG_DCLEAR 0x04 ++#define FD_DBG_DGMEM 0x08 ++#define FD_DBG_DSCIS 0x10 + extern int fd_mesa_debug; + + #define DBG(fmt, ...) \ +-- +1.8.4.2 + diff --git a/0004-freedreno-update-register-headers.patch b/0004-freedreno-update-register-headers.patch new file mode 100644 index 0000000..4654619 --- /dev/null +++ b/0004-freedreno-update-register-headers.patch @@ -0,0 +1,1238 @@ +From e1e9f69d3c90803d3c0e2d9b9396c1a06b5f0bb2 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Mon, 5 Aug 2013 17:57:24 -0400 +Subject: [PATCH 04/17] freedreno: update register headers + +resync w/ rnndb database + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 93 ++--- + src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 439 +++++++++++++++++++--- + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 4 +- + src/gallium/drivers/freedreno/a3xx/fd3_program.c | 2 +- + src/gallium/drivers/freedreno/a3xx/fd3_util.c | 5 +- + src/gallium/drivers/freedreno/adreno_common.xml.h | 319 +++++++++++++++- + src/gallium/drivers/freedreno/adreno_pm4.xml.h | 6 +- + src/gallium/drivers/freedreno/freedreno_util.h | 1 + + 8 files changed, 758 insertions(+), 111 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +index bee01f1..3546386 100644 +--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h ++++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng + git clone git://0x04.net/rules-ng-ng + + The rules-ng-ng source files this header was generated from are: +-- /home/robclark/src/freedreno/envytools/rnndb/a2xx.xml ( 30127 bytes, from 2013-05-05 18:29:35) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) + - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) +-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) ++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) + - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) ++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) + + Copyright (C) 2013 by the following authors: + - Rob Clark (robclark) +@@ -236,56 +238,6 @@ enum sq_tex_filter { + + #define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1 + +-#define REG_A2XX_CP_RB_BASE 0x000001c0 +- +-#define REG_A2XX_CP_RB_CNTL 0x000001c1 +- +-#define REG_A2XX_CP_RB_RPTR_ADDR 0x000001c3 +- +-#define REG_A2XX_CP_RB_RPTR 0x000001c4 +- +-#define REG_A2XX_CP_RB_WPTR 0x000001c5 +- +-#define REG_A2XX_CP_RB_WPTR_DELAY 0x000001c6 +- +-#define REG_A2XX_CP_RB_RPTR_WR 0x000001c7 +- +-#define REG_A2XX_CP_RB_WPTR_BASE 0x000001c8 +- +-#define REG_A2XX_CP_QUEUE_THRESHOLDS 0x000001d5 +- +-#define REG_A2XX_SCRATCH_UMSK 0x000001dc +- +-#define REG_A2XX_SCRATCH_ADDR 0x000001dd +- +-#define REG_A2XX_CP_STATE_DEBUG_INDEX 0x000001ec +- +-#define REG_A2XX_CP_STATE_DEBUG_DATA 0x000001ed +- +-#define REG_A2XX_CP_INT_CNTL 0x000001f2 +- +-#define REG_A2XX_CP_INT_STATUS 0x000001f3 +- +-#define REG_A2XX_CP_INT_ACK 0x000001f4 +- +-#define REG_A2XX_CP_ME_CNTL 0x000001f6 +- +-#define REG_A2XX_CP_ME_STATUS 0x000001f7 +- +-#define REG_A2XX_CP_ME_RAM_WADDR 0x000001f8 +- +-#define REG_A2XX_CP_ME_RAM_RADDR 0x000001f9 +- +-#define REG_A2XX_CP_ME_RAM_DATA 0x000001fa +- +-#define REG_A2XX_CP_DEBUG 0x000001fc +- +-#define REG_A2XX_CP_CSQ_RB_STAT 0x000001fd +- +-#define REG_A2XX_CP_CSQ_IB1_STAT 0x000001fe +- +-#define REG_A2XX_CP_CSQ_IB2_STAT 0x000001ff +- + #define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 + + #define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 +@@ -338,11 +290,32 @@ enum sq_tex_filter { + + #define REG_A2XX_CP_STAT 0x0000047f + +-#define REG_A2XX_SCRATCH_REG0 0x00000578 +- +-#define REG_A2XX_SCRATCH_REG2 0x0000057a +- + #define REG_A2XX_RBBM_STATUS 0x000005d0 ++#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f ++#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0 ++static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val) ++{ ++ return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK; ++} ++#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020 ++#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100 ++#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200 ++#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400 ++#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800 ++#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000 ++#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000 ++#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000 ++#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000 ++#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000 ++#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000 ++#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000 ++#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000 ++#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000 ++#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000 ++#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000 ++#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000 ++#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000 ++#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000 + + #define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01 + #define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f +@@ -358,13 +331,13 @@ static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) + return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; + } + +-#define REG_A2XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) ++static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +-#define REG_A2XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) ++static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +-#define REG_A2XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) ++static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } + +-#define REG_A2XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) ++static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } + + #define REG_A2XX_PC_DEBUG_CNTL 0x00000c38 + +diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +index c7f5085..d183516 100644 +--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h ++++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng + git clone git://0x04.net/rules-ng-ng + + The rules-ng-ng source files this header was generated from are: +-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) + - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) +-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) ++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) + - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) ++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) + + Copyright (C) 2013 by the following authors: + - Rob Clark (robclark) +@@ -130,6 +132,13 @@ enum a3xx_tex_fmt { + TFMT_NORM_USHORT_5551 = 6, + TFMT_NORM_USHORT_4444 = 7, + TFMT_NORM_UINT_X8Z24 = 10, ++ TFMT_NORM_UINT_NV12_UV_TILED = 17, ++ TFMT_NORM_UINT_NV12_Y_TILED = 19, ++ TFMT_NORM_UINT_NV12_UV = 21, ++ TFMT_NORM_UINT_NV12_Y = 23, ++ TFMT_NORM_UINT_I420_Y = 24, ++ TFMT_NORM_UINT_I420_U = 26, ++ TFMT_NORM_UINT_I420_V = 27, + TFMT_NORM_UINT_2_10_10_10 = 41, + TFMT_NORM_UINT_A8 = 44, + TFMT_NORM_UINT_L8_A8 = 47, +@@ -207,6 +216,37 @@ enum a3xx_tex_swiz { + A3XX_TEX_ONE = 5, + }; + ++enum a3xx_tex_type { ++ A3XX_TEX_1D = 0, ++ A3XX_TEX_2D = 1, ++ A3XX_TEX_CUBE = 2, ++ A3XX_TEX_3D = 3, ++}; ++ ++#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 ++#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 ++#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 ++#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 ++#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 ++#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 ++#define A3XX_INT0_VFD_ERROR 0x00000040 ++#define A3XX_INT0_CP_SW_INT 0x00000080 ++#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 ++#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200 ++#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 ++#define A3XX_INT0_CP_HW_FAULT 0x00000800 ++#define A3XX_INT0_CP_DMA 0x00001000 ++#define A3XX_INT0_CP_IB2_INT 0x00002000 ++#define A3XX_INT0_CP_IB1_INT 0x00004000 ++#define A3XX_INT0_CP_RB_INT 0x00008000 ++#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 ++#define A3XX_INT0_CP_RB_DONE_TS 0x00020000 ++#define A3XX_INT0_CP_VS_DONE_TS 0x00040000 ++#define A3XX_INT0_CP_PS_DONE_TS 0x00080000 ++#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000 ++#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000 ++#define A3XX_INT0_MISC_HANG_DETECT 0x01000000 ++#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000 + #define REG_A3XX_RBBM_HW_VERSION 0x00000000 + + #define REG_A3XX_RBBM_HW_RELEASE 0x00000001 +@@ -230,6 +270,27 @@ enum a3xx_tex_swiz { + #define REG_A3XX_RBBM_GPR0_CTL 0x0000002e + + #define REG_A3XX_RBBM_STATUS 0x00000030 ++#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001 ++#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 ++#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 ++#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 ++#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000 ++#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000 ++#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000 ++#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000 ++#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 ++#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 ++#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000 ++#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000 ++#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000 ++#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000 ++#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000 ++#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000 ++#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000 ++#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000 ++#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 ++#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 ++#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000 + + #define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 + +@@ -251,20 +312,202 @@ enum a3xx_tex_swiz { + + #define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 + ++#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 ++ ++#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082 ++ ++#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084 ++ ++#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085 ++ ++#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086 ++ ++#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087 ++ + #define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 + ++#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090 ++ ++#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091 ++ ++#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092 ++ ++#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093 ++ ++#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094 ++ ++#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095 ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096 ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097 ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098 ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099 ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c ++ ++#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d ++ ++#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e ++ ++#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f ++ ++#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0 ++ ++#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9 ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac ++ ++#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad ++ ++#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae ++ ++#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af ++ ++#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0 ++ ++#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1 ++ ++#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2 ++ ++#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3 ++ ++#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4 ++ ++#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5 ++ ++#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6 ++ ++#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7 ++ ++#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8 ++ ++#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4 ++ ++#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0 ++ ++#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9 ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de ++ ++#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df ++ + #define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 + + #define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 + ++#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2 ++ ++#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3 ++ ++#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4 ++ ++#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5 ++ ++#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea ++ ++#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb ++ + #define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec + + #define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed + + #define REG_A3XX_RBBM_RBBM_CTL 0x00000100 + +-#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 +- + #define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 + + #define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 +@@ -287,22 +530,20 @@ enum a3xx_tex_swiz { + + #define REG_A3XX_CP_MEQ_DATA 0x000001db + ++#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445 ++ + #define REG_A3XX_CP_HW_FAULT 0x0000045c + + #define REG_A3XX_CP_PROTECT_CTRL 0x0000045e + + #define REG_A3XX_CP_PROTECT_STATUS 0x0000045f + +-#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; } + +-#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; } + + #define REG_A3XX_CP_AHB_FAULT 0x0000054d + +-#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a +- +-#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b +- + #define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 + #define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 + #define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 +@@ -528,9 +769,9 @@ static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) + + #define REG_A3XX_UNKNOWN_20C3 0x000020c3 + +-#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; } + +-#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; } + #define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 + #define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 + #define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 +@@ -553,7 +794,7 @@ static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) + return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + } + +-#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; } + #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f + #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 + static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) +@@ -579,7 +820,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) + return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; + } + +-#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; } + #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 + #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 + static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) +@@ -587,7 +828,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) + return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; + } + +-#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; } + #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f + #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 + static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +@@ -627,12 +868,60 @@ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r + #define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 + + #define REG_A3XX_RB_BLEND_RED 0x000020e4 ++#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff ++#define A3XX_RB_BLEND_RED_UINT__SHIFT 0 ++static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val) ++{ ++ return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK; ++} ++#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 ++#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16 ++static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val) ++{ ++ return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK; ++} + + #define REG_A3XX_RB_BLEND_GREEN 0x000020e5 ++#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff ++#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0 ++static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val) ++{ ++ return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK; ++} ++#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 ++#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 ++static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val) ++{ ++ return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK; ++} + + #define REG_A3XX_RB_BLEND_BLUE 0x000020e6 ++#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff ++#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0 ++static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val) ++{ ++ return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK; ++} ++#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 ++#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 ++static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val) ++{ ++ return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK; ++} + + #define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 ++#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff ++#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0 ++static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val) ++{ ++ return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK; ++} ++#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 ++#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 ++static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val) ++{ ++ return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK; ++} + + #define REG_A3XX_UNKNOWN_20E8 0x000020e8 + +@@ -1063,9 +1352,9 @@ static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) + + #define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 + +-#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0)) ++static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; } + +-#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0)) ++static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; } + #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f + #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 + static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) +@@ -1092,11 +1381,11 @@ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) + return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; + } + +-#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0)) ++static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; } + +-#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; } + +-#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; } + #define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f + #define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 + static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) +@@ -1173,13 +1462,13 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) + return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; + } + +-#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; } + +-#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; } + +-#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } + +-#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } + + #define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a + +@@ -1293,9 +1582,9 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) + return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; + } + +-#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } + +-#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } + #define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff + #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 + static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +@@ -1321,9 +1610,9 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) + return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; + } + +-#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } + +-#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } + #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff + #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 + static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +@@ -1480,9 +1769,9 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) + + #define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec + +-#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; } + +-#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; } + #define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff + #define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 + static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) +@@ -1491,9 +1780,9 @@ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) + } + #define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 + +-#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; } + +-#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0)) ++static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; } + #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f + #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 + static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) +@@ -1607,9 +1896,9 @@ static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) + + #define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 + +-#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) ++static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +-#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) ++static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + #define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff + #define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 + static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) +@@ -1635,26 +1924,46 @@ static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) + return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; + } + +-#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) ++static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } + +-#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) ++static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } + + #define REG_A3XX_UNKNOWN_0C3D 0x00000c3d + ++#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48 ++ ++#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49 ++ ++#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a ++ ++#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b ++ + #define REG_A3XX_UNKNOWN_0C81 0x00000c81 + +-#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0)) ++#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88 ++ ++#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89 + +-#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0)) ++#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a + +-#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0)) ++#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b + +-#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } + +-#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0)) ++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } ++ ++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; } ++ ++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; } ++ ++static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; } + + #define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 + ++#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6 ++ ++#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7 ++ + #define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0 + #define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff + #define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0 +@@ -1669,18 +1978,46 @@ static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val) + return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK; + } + +-#define REG_A3XX_UNKNOWN_0E00 0x00000e00 ++#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00 ++ ++#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01 ++ ++#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02 ++ ++#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03 ++ ++#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04 ++ ++#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05 + + #define REG_A3XX_UNKNOWN_0E43 0x00000e43 + + #define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 + ++#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45 ++ + #define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 + + #define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 + ++#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64 ++ ++#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65 ++ + #define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 + ++#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84 ++ ++#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85 ++ ++#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86 ++ ++#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87 ++ ++#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88 ++ ++#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89 ++ + #define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 + #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff + #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 +@@ -1724,6 +2061,18 @@ static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_op + + #define REG_A3XX_UNKNOWN_0F03 0x00000f03 + ++#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04 ++ ++#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05 ++ ++#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06 ++ ++#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07 ++ ++#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08 ++ ++#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09 ++ + #define REG_A3XX_TEX_SAMP_0 0x00000000 + #define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c + #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 +@@ -1791,6 +2140,12 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) + { + return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; + } ++#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 ++#define A3XX_TEX_CONST_0_TYPE__SHIFT 30 ++static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) ++{ ++ return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK; ++} + + #define REG_A3XX_TEX_CONST_1 0x00000001 + #define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +index b8436c9..5ffd561 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +@@ -536,8 +536,8 @@ fd3_emit_restore(struct fd_context *ctx) + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); + OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ + +- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1); +- OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */ ++ OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1); ++ OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */ + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); + OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c +index b5a027e..259c2dd 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c +@@ -249,7 +249,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, + */ + for (i = 0; i < 6; i++) { + OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1); +- OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ ++ OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER0_SELECT */ + + OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1); + OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c +index a08bc23..6537fb7 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c +@@ -306,10 +306,11 @@ fd3_pipe2swap(enum pipe_format format) + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return WXYZ; ++ ++ case PIPE_FORMAT_R8G8B8A8_UNORM: ++ case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: +- return WZYX; +- + default: + return WZYX; + } +diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h +index b119812..61979d4 100644 +--- a/src/gallium/drivers/freedreno/adreno_common.xml.h ++++ b/src/gallium/drivers/freedreno/adreno_common.xml.h +@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng + git clone git://0x04.net/rules-ng-ng + + The rules-ng-ng source files this header was generated from are: +-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) + - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) +-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) ++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) + - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) ++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) + + Copyright (C) 2013 by the following authors: + - Rob Clark (robclark) +@@ -113,5 +115,318 @@ enum adreno_rb_depth_format { + DEPTHX_24_8 = 1, + }; + ++enum adreno_mmu_clnt_beh { ++ BEH_NEVR = 0, ++ BEH_TRAN_RNG = 1, ++ BEH_TRAN_FLT = 2, ++}; ++ ++#define REG_AXXX_MH_MMU_CONFIG 0x00000040 ++#define AXXX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001 ++#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002 ++#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030 ++#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4 ++static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0 ++#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300 ++#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00 ++#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000 ++#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000 ++#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000 ++#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16 ++static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000 ++#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18 ++static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000 ++#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20 ++static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000 ++#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22 ++static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK; ++} ++#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000 ++#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24 ++static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) ++{ ++ return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK; ++} ++ ++#define REG_AXXX_MH_MMU_VA_RANGE 0x00000041 ++ ++#define REG_AXXX_MH_MMU_PT_BASE 0x00000042 ++ ++#define REG_AXXX_MH_MMU_PAGE_FAULT 0x00000043 ++ ++#define REG_AXXX_MH_MMU_TRAN_ERROR 0x00000044 ++ ++#define REG_AXXX_MH_MMU_INVALIDATE 0x00000045 ++ ++#define REG_AXXX_MH_MMU_MPU_BASE 0x00000046 ++ ++#define REG_AXXX_MH_MMU_MPU_END 0x00000047 ++ ++#define REG_AXXX_CP_RB_BASE 0x000001c0 ++ ++#define REG_AXXX_CP_RB_CNTL 0x000001c1 ++#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f ++#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0 ++static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val) ++{ ++ return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK; ++} ++#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00 ++#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8 ++static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val) ++{ ++ return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK; ++} ++#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000 ++#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16 ++static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val) ++{ ++ return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK; ++} ++#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000 ++#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000 ++#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000 ++ ++#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3 ++#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003 ++#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0 ++static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) ++{ ++ return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK; ++} ++#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc ++#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 ++static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) ++{ ++ return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; ++} ++ ++#define REG_AXXX_CP_RB_RPTR 0x000001c4 ++ ++#define REG_AXXX_CP_RB_WPTR 0x000001c5 ++ ++#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6 ++ ++#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7 ++ ++#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8 ++ ++#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5 ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0 ++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val) ++{ ++ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK; ++} ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00 ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8 ++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val) ++{ ++ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK; ++} ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000 ++#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16 ++static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val) ++{ ++ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK; ++} ++ ++#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6 ++ ++#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7 ++#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f ++#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0 ++static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK; ++} ++#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00 ++#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8 ++static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK; ++} ++#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000 ++#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16 ++static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK; ++} ++ ++#define REG_AXXX_CP_STQ_AVAIL 0x000001d8 ++#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f ++#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0 ++static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val) ++{ ++ return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK; ++} ++ ++#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9 ++#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f ++#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0 ++static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val) ++{ ++ return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK; ++} ++ ++#define REG_AXXX_SCRATCH_UMSK 0x000001dc ++#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff ++#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0 ++static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val) ++{ ++ return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK; ++} ++#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000 ++#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16 ++static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val) ++{ ++ return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK; ++} ++ ++#define REG_AXXX_SCRATCH_ADDR 0x000001dd ++ ++#define REG_AXXX_CP_ME_RDADDR 0x000001ea ++ ++#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec ++ ++#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed ++ ++#define REG_AXXX_CP_INT_CNTL 0x000001f2 ++ ++#define REG_AXXX_CP_INT_STATUS 0x000001f3 ++ ++#define REG_AXXX_CP_INT_ACK 0x000001f4 ++ ++#define REG_AXXX_CP_ME_CNTL 0x000001f6 ++ ++#define REG_AXXX_CP_ME_STATUS 0x000001f7 ++ ++#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8 ++ ++#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9 ++ ++#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa ++ ++#define REG_AXXX_CP_DEBUG 0x000001fc ++#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000 ++#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000 ++#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000 ++#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000 ++#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000 ++#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000 ++#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000 ++#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000 ++ ++#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd ++#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f ++#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0 ++static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK; ++} ++#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000 ++#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16 ++static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK; ++} ++ ++#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe ++#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f ++#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0 ++static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK; ++} ++#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000 ++#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16 ++static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK; ++} ++ ++#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff ++#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f ++#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0 ++static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK; ++} ++#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000 ++#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16 ++static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) ++{ ++ return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK; ++} ++ ++#define REG_AXXX_CP_SCRATCH_REG0 0x00000578 ++ ++#define REG_AXXX_CP_SCRATCH_REG1 0x00000579 ++ ++#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a ++ ++#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b ++ ++#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c ++ ++#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d ++ ++#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e ++ ++#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f ++ ++#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a ++ ++#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b ++ ++#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c ++ ++#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d ++ ++#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e ++ + + #endif /* ADRENO_COMMON_XML */ +diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h +index d3a7bac..94c13f4 100644 +--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h ++++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h +@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng + git clone git://0x04.net/rules-ng-ng + + The rules-ng-ng source files this header was generated from are: +-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) + - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) +-- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) ++- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) ++- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) + - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) ++- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) + + Copyright (C) 2013 by the following authors: + - Rob Clark (robclark) +diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h +index b49cdfc..22857d2 100644 +--- a/src/gallium/drivers/freedreno/freedreno_util.h ++++ b/src/gallium/drivers/freedreno/freedreno_util.h +@@ -35,6 +35,7 @@ + #include "pipe/p_format.h" + #include "util/u_debug.h" + #include "util/u_math.h" ++#include "util/u_half.h" + + #include "adreno_common.xml.h" + #include "adreno_pm4.xml.h" +-- +1.8.4.2 + diff --git a/0005-freedreno-a3xx-some-texture-fixes.patch b/0005-freedreno-a3xx-some-texture-fixes.patch new file mode 100644 index 0000000..4fd4c68 --- /dev/null +++ b/0005-freedreno-a3xx-some-texture-fixes.patch @@ -0,0 +1,65 @@ +From 3da8868b5df98d8544091feeea7b6bb0f736324f Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Mon, 5 Aug 2013 18:03:33 -0400 +Subject: [PATCH 05/17] freedreno/a3xx: some texture fixes + +Stop hard coding bits that indicate texture type (2d/3d/cube/etc). + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 25 +++++++++++++++++++++++- + 1 file changed, 24 insertions(+), 1 deletion(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +index ae08b8a..e56325b 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +@@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx, + so->base = *cso; + + so->texsamp0 = ++ COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) | + A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | + A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | + A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | +@@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx, + return so; + } + ++static enum a3xx_tex_type ++tex_type(unsigned target) ++{ ++ switch (target) { ++ default: ++ assert(0); ++ case PIPE_BUFFER: ++ case PIPE_TEXTURE_1D: ++ case PIPE_TEXTURE_1D_ARRAY: ++ return A3XX_TEX_1D; ++ case PIPE_TEXTURE_RECT: ++ case PIPE_TEXTURE_2D: ++ case PIPE_TEXTURE_2D_ARRAY: ++ return A3XX_TEX_2D; ++ case PIPE_TEXTURE_3D: ++ return A3XX_TEX_3D; ++ case PIPE_TEXTURE_CUBE: ++ case PIPE_TEXTURE_CUBE_ARRAY: ++ return A3XX_TEX_CUBE; ++ } ++} ++ + static struct pipe_sampler_view * + fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +@@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + so->tex_resource = rsc; + + so->texconst0 = +- 0x40000000 | /* ??? */ ++ A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | + A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | + fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); +-- +1.8.4.2 + diff --git a/0006-freedreno-a3xx-compiler-fix-CMP.patch b/0006-freedreno-a3xx-compiler-fix-CMP.patch new file mode 100644 index 0000000..ded5b9d --- /dev/null +++ b/0006-freedreno-a3xx-compiler-fix-CMP.patch @@ -0,0 +1,45 @@ +From 83e65320012f327d2e8f1573443b2e20f059e76f Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Tue, 20 Aug 2013 13:46:30 -0400 +Subject: [PATCH 06/17] freedreno/a3xx/compiler: fix CMP + +The 1st src to add.s needs (r) flag (repeat), otherwise it will end up: + + add.s dst.xyzw, tmp.xxxx -1 + +instead of: + + add.s dst.xyzw, tmp.xyzw, -1 + +Also, if we are using a temporary dst to avoid clobbering one of the src +registers, we actually need to use that as the dst for the sel +instruction. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index eabe21c..07bede4 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -790,13 +790,13 @@ trans_cmp(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr->repeat = 3; + add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, &tmp_src, 0); ++ add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + + /* sel.{f32,f16} dst, src2, tmp, src1 */ + instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? + OPC_SEL_F16 : OPC_SEL_F32); +- vectorize(ctx, instr, &inst->Dst[0].Register, 3, ++ vectorize(ctx, instr, dst, 3, + &inst->Src[2].Register, 0, + &tmp_src, 0, + &inst->Src[1].Register, 0); +-- +1.8.4.2 + diff --git a/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch b/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch new file mode 100644 index 0000000..362c04f --- /dev/null +++ b/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch @@ -0,0 +1,98 @@ +From c83387438633233ae6bcc55e1f4eaa2793ce7449 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Tue, 20 Aug 2013 13:51:35 -0400 +Subject: [PATCH 07/17] freedreno/a3xx/compiler: handle saturate on dst + +Sometimes things other than color dst need saturating, like if there is +a 'clamp(foo, 0.0, 1.0)'. So for saturated dst add the extra +instructions to fix up dst. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index 07bede4..e2c7853 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -131,6 +131,11 @@ struct fd3_compile_context { + struct tgsi_src_register tmp_src; + }; + ++ ++static void vectorize(struct fd3_compile_context *ctx, ++ struct ir3_instruction *instr, struct tgsi_dst_register *dst, ++ int nsrcs, ...); ++ + static unsigned + compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens) +@@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + flags |= IR3_REG_CONST; + num = src->Index + ctx->base_reg[src->File]; + break; ++ case TGSI_FILE_OUTPUT: ++ /* NOTE: we should only end up w/ OUTPUT file for things like ++ * clamp()'ing saturated dst instructions ++ */ + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + num = src->Index + ctx->base_reg[src->File]; +@@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + + } + ++static void ++create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, ++ struct tgsi_src_register *minval, struct tgsi_src_register *maxval) ++{ ++ struct ir3_instruction *instr; ++ struct tgsi_src_register src; ++ ++ src_from_dst(&src, dst); ++ ++ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); ++ vectorize(ctx, instr, dst, 2, &src, 0, minval, 0); ++ ++ instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); ++ vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0); ++} ++ ++static void ++create_clamp_imm(struct fd3_compile_context *ctx, ++ struct tgsi_dst_register *dst, ++ uint32_t minval, uint32_t maxval) ++{ ++ struct tgsi_src_register minconst, maxconst; ++ ++ get_immediate(ctx, &minconst, minval); ++ get_immediate(ctx, &maxconst, maxval); ++ ++ create_clamp(ctx, dst, &minconst, &maxconst); ++} ++ + static struct tgsi_dst_register * + get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) + { +@@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx) + assert(0); + } + ++ switch (inst->Instruction.Saturate) { ++ case TGSI_SAT_ZERO_ONE: ++ create_clamp_imm(ctx, &inst->Dst[0].Register, ++ fui(0.0), fui(1.0)); ++ break; ++ case TGSI_SAT_MINUS_PLUS_ONE: ++ create_clamp_imm(ctx, &inst->Dst[0].Register, ++ fui(-1.0), fui(1.0)); ++ break; ++ } ++ + break; + } + default: +-- +1.8.4.2 + diff --git a/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch b/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch new file mode 100644 index 0000000..c1f76c3 --- /dev/null +++ b/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch @@ -0,0 +1,59 @@ +From 5394a872f30022f64e6b2b58ef983b1fe5f6c08d Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Tue, 20 Aug 2013 13:54:01 -0400 +Subject: [PATCH 08/17] freedreno/a3xx/compiler: use max_reg rather than + file_count + +Our current (rather naive) register assignment is based on mapping +different register files (INPUT, OUTPUT, TEMP, CONST, etc) based on the +max register index of the preceding file. But in some cases, the lowest +used register in a file might not be zero. In which case +file_count[file] != file_max[file] + 1. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index e2c7853..dc5c873 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -159,19 +159,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, + /* Immediates go after constants: */ + ctx->base_reg[TGSI_FILE_CONSTANT] = 0; + ctx->base_reg[TGSI_FILE_IMMEDIATE] = +- ctx->info.file_count[TGSI_FILE_CONSTANT]; ++ ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; + + /* Temporaries after outputs after inputs: */ + ctx->base_reg[TGSI_FILE_INPUT] = 0; + ctx->base_reg[TGSI_FILE_OUTPUT] = +- ctx->info.file_count[TGSI_FILE_INPUT]; ++ ctx->info.file_max[TGSI_FILE_INPUT] + 1; + ctx->base_reg[TGSI_FILE_TEMPORARY] = +- ctx->info.file_count[TGSI_FILE_INPUT] + +- ctx->info.file_count[TGSI_FILE_OUTPUT]; ++ ctx->info.file_max[TGSI_FILE_INPUT] + 1 + ++ ctx->info.file_max[TGSI_FILE_OUTPUT] + 1; + + so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; +- ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] + +- ctx->info.file_count[TGSI_FILE_IMMEDIATE]); ++ ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 + ++ ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); + + ret = tgsi_parse_init(&ctx->parser, tokens); + if (ret != TGSI_PARSE_OK) +@@ -309,7 +309,7 @@ get_internal_temp(struct fd3_compile_context *ctx, + /* assign next temporary: */ + n = ctx->num_internal_temps++; + +- tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n; ++ tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; + + src_from_dst(tmp_src, tmp_dst); + } +-- +1.8.4.2 + diff --git a/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch b/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch new file mode 100644 index 0000000..a43dde8 --- /dev/null +++ b/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch @@ -0,0 +1,104 @@ +From f3a7e28fe47ec547c1c9b561b04af208ae2f0f04 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Tue, 20 Aug 2013 13:57:22 -0400 +Subject: [PATCH 09/17] freedreno/a3xx/compiler: cat4 cannot use const reg as + src + +Category 4 instructions (rsq, rcp, sqrt, etc) seem to be unable to take +a const register as src. In these cases we need to move the src to a +temporary gpr first. + +This is the second case of such a restriction, where the instruction +encoding appears to support a const src, but in fact the hw appears to +ignore that bit. So split things out into a helper that can be re-used +for any instructions which have this limitation. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 37 +++++++++++++++++------ + 1 file changed, 27 insertions(+), 10 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index dc5c873..772c7d2 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -135,6 +135,8 @@ struct fd3_compile_context { + static void vectorize(struct fd3_compile_context *ctx, + struct ir3_instruction *instr, struct tgsi_dst_register *dst, + int nsrcs, ...); ++static void create_mov(struct fd3_compile_context *ctx, ++ struct tgsi_dst_register *dst, struct tgsi_src_register *src); + + static unsigned + compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, +@@ -374,6 +376,23 @@ get_immediate(struct fd3_compile_context *ctx, + reg->SwizzleW = swiz2tgsi[swiz]; + } + ++/* for instructions that cannot take a const register as src, if needed ++ * generate a move to temporary gpr: ++ */ ++static struct tgsi_src_register * ++get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src, ++ struct tgsi_src_register *tmp_src) ++{ ++ static struct tgsi_dst_register tmp_dst; ++ if ((src->File == TGSI_FILE_CONSTANT) || ++ (src->File == TGSI_FILE_IMMEDIATE)) { ++ get_internal_temp(ctx, &tmp_dst, tmp_src); ++ create_mov(ctx, &tmp_dst, src); ++ src = tmp_src; ++ } ++ return src; ++} ++ + static type_t + get_type(struct fd3_compile_context *ctx) + { +@@ -1027,8 +1046,7 @@ instr_cat3(const struct instr_translater *t, + struct tgsi_full_instruction *inst) + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); +- struct tgsi_src_register *src1 = &inst->Src[1].Register; +- struct tgsi_dst_register tmp_dst; ++ struct tgsi_src_register *src1; + struct tgsi_src_register tmp_src; + struct ir3_instruction *instr; + +@@ -1038,12 +1056,7 @@ instr_cat3(const struct instr_translater *t, + * const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ +- if ((src1->File == TGSI_FILE_CONSTANT) || +- (src1->File == TGSI_FILE_IMMEDIATE)) { +- get_internal_temp(ctx, &tmp_dst, &tmp_src); +- create_mov(ctx, &tmp_dst, src1); +- src1 = &tmp_src; +- } ++ src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src); + + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? t->hopc : t->opc); +@@ -1060,13 +1073,17 @@ instr_cat4(const struct instr_translater *t, + struct tgsi_full_instruction *inst) + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); ++ struct tgsi_src_register *src; ++ struct tgsi_src_register tmp_src; + struct ir3_instruction *instr; + ++ /* seems like blob compiler avoids const as src.. */ ++ src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src); ++ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, t->opc); + +- vectorize(ctx, instr, dst, 1, +- &inst->Src[0].Register, 0); ++ vectorize(ctx, instr, dst, 1, src, 0); + + regmask_set(ctx->needs_ss, instr->regs[0]); + +-- +1.8.4.2 + diff --git a/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch b/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch new file mode 100644 index 0000000..8978dd6 --- /dev/null +++ b/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch @@ -0,0 +1,216 @@ +From 12da4c1a6aa4b2a9cc337f669986a63c59fc3095 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Wed, 21 Aug 2013 13:20:05 -0400 +Subject: [PATCH 10/17] freedreno: fix segfault when no color buffer bound + +Don't crash when no color buffer bound. Something caught when starting +to run piglit, fixes a hanful of piglit tests. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 +++--- + src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 15 +++++++++++---- + src/gallium/drivers/freedreno/freedreno_context.c | 3 ++- + src/gallium/drivers/freedreno/freedreno_draw.c | 4 ++-- + src/gallium/drivers/freedreno/freedreno_gmem.c | 18 +++++++++++------- + src/gallium/drivers/freedreno/freedreno_state.c | 2 +- + src/gallium/drivers/freedreno/freedreno_util.h | 10 ++++++++++ + 7 files changed, 40 insertions(+), 18 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +index e239eed..93695bc 100644 +--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c ++++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +@@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx) + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_gmem_stateobj *gmem = &ctx->gmem; +- enum pipe_format format = pfb->cbufs[0]->format; ++ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + uint32_t reg; + + OUT_PKT3(ring, CP_SET_CONSTANT, 4); +@@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + { + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; +- enum pipe_format format = pfb->cbufs[0]->format; ++ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); +@@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + { + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; +- enum pipe_format format = pfb->cbufs[0]->format; ++ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +index 9050166..b9d0580 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +@@ -214,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + }, 1); + + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { +- uint32_t base = depth_base(&ctx->gmem) * +- fd_resource(pfb->cbufs[0]->texture)->cpp; ++ uint32_t base = 0; ++ if (pfb->cbufs[0]) { ++ struct fd_resource *rsc = ++ fd_resource(pfb->cbufs[0]->texture); ++ base = depth_base(&ctx->gmem) * rsc->cpp; ++ } + emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); + } + +@@ -410,8 +414,11 @@ static void + fd3_emit_sysmem_prep(struct fd_context *ctx) + { + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; +- struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture); + struct fd_ringbuffer *ring = ctx->ring; ++ uint32_t pitch = 0; ++ ++ if (pfb->cbufs[0]) ++ pitch = fd_resource(pfb->cbufs[0]->texture)->pitch; + + fd3_emit_restore(ctx); + +@@ -422,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); + + fd3_emit_rbrc_tile_state(ring, +- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch)); ++ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); + + /* setup scissor/offset for current tile: */ + OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); +diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c +index 44d525b..1d03351 100644 +--- a/src/gallium/drivers/freedreno/freedreno_context.c ++++ b/src/gallium/drivers/freedreno/freedreno_context.c +@@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx) + ctx->gmem_reason = 0; + ctx->num_draws = 0; + +- fd_resource(pfb->cbufs[0]->texture)->dirty = false; ++ if (pfb->cbufs[0]) ++ fd_resource(pfb->cbufs[0]->texture)->dirty = false; + if (pfb->zsbuf) + fd_resource(pfb->zsbuf->texture)->dirty = false; + } +diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c +index b02b8b9..d4f8d34 100644 +--- a/src/gallium/drivers/freedreno/freedreno_draw.c ++++ b/src/gallium/drivers/freedreno/freedreno_draw.c +@@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, + } + + DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, +- util_format_name(pfb->cbufs[0]->format), +- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); ++ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), ++ util_format_short_name(pipe_surface_format(pfb->zsbuf))); + + ctx->clear(ctx, buffers, color, depth, stencil); + +diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c +index 197d1d9..3d959c6 100644 +--- a/src/gallium/drivers/freedreno/freedreno_gmem.c ++++ b/src/gallium/drivers/freedreno/freedreno_gmem.c +@@ -72,12 +72,15 @@ calculate_tiles(struct fd_context *ctx) + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_scissor_state *scissor = &ctx->max_scissor; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; +- uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format); + uint32_t gmem_size = ctx->screen->gmemsize_bytes; + uint32_t minx, miny, width, height; + uint32_t nbins_x = 1, nbins_y = 1; + uint32_t bin_w, bin_h; + uint32_t max_width = 992; ++ uint32_t cpp = 4; ++ ++ if (pfb->cbufs[0]) ++ cpp = util_format_get_blocksize(pfb->cbufs[0]->format); + + if ((gmem->cpp == cpp) && + !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { +@@ -211,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx) + + if (sysmem) { + DBG("rendering sysmem (%s/%s)", +- util_format_name(pfb->cbufs[0]->format), +- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); ++ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), ++ util_format_short_name(pipe_surface_format(pfb->zsbuf))); + render_sysmem(ctx); + } else { + struct fd_gmem_stateobj *gmem = &ctx->gmem; +- DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, +- util_format_name(pfb->cbufs[0]->format), +- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); + calculate_tiles(ctx); ++ DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, ++ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), ++ util_format_short_name(pipe_surface_format(pfb->zsbuf))); + render_tiles(ctx); + } + +@@ -231,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx) + + /* update timestamps on render targets: */ + timestamp = fd_ringbuffer_timestamp(ctx->ring); +- fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; ++ if (pfb->cbufs[0]) ++ fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; + if (pfb->zsbuf) + fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; + +diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c +index 2f5d52c..f5290a9 100644 +--- a/src/gallium/drivers/freedreno/freedreno_state.c ++++ b/src/gallium/drivers/freedreno/freedreno_state.c +@@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx, + unsigned i; + + DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, +- cso->cbufs[0], cso->zsbuf); ++ framebuffer->cbufs[0], framebuffer->zsbuf); + + fd_context_render(pctx); + +diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h +index 22857d2..9f10686 100644 +--- a/src/gallium/drivers/freedreno/freedreno_util.h ++++ b/src/gallium/drivers/freedreno/freedreno_util.h +@@ -33,6 +33,7 @@ + #include + + #include "pipe/p_format.h" ++#include "pipe/p_state.h" + #include "util/u_debug.h" + #include "util/u_math.h" + #include "util/u_half.h" +@@ -79,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type, + (1 << 14); + } + ++ ++static inline enum pipe_format ++pipe_surface_format(struct pipe_surface *psurf) ++{ ++ if (!psurf) ++ return PIPE_FORMAT_NONE; ++ return psurf->format; ++} ++ + #define LOG_DWORDS 0 + + +-- +1.8.4.2 + diff --git a/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch b/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch new file mode 100644 index 0000000..53aecaa --- /dev/null +++ b/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch @@ -0,0 +1,172 @@ +From c726a6a907f119dfc4fb1c26fef7babf51dc1dea Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 24 Aug 2013 12:56:22 -0400 +Subject: [PATCH 11/17] freedreno/a3xx/compiler: make compiler errors more + useful + +We probably should get rid of assert() entirely, but at this stage it is +more useful for things to crash where we can catch it in a debugger. +With compile_error() we have a single place to set an error flag (to +bail out and return an error on the next instruction) so that will be a +small change later when enough of the compiler bugs are sorted. + +But re-arrange/cleanup the error/assert stuff so we at least get a dump +of the TGSI that triggered it. So we see some useful output in piglit +logs. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 +++++++++++++++-------- + src/gallium/drivers/freedreno/a3xx/ir-a3xx.h | 3 +- + 2 files changed, 33 insertions(+), 17 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index 772c7d2..e6c5bb7 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -185,6 +185,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, + } + + static void ++compile_error(struct fd3_compile_context *ctx, const char *format, ...) ++{ ++ va_list ap; ++ va_start(ap, format); ++ _debug_vprintf(format, ap); ++ va_end(ap); ++ tgsi_dump(ctx->tokens, 0); ++ assert(0); ++} ++ ++#define compile_assert(ctx, cond) do { \ ++ if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ ++ } while (0) ++ ++static void + compile_free(struct fd3_compile_context *ctx) + { + tgsi_parse_free(&ctx->parser); +@@ -212,9 +227,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + num = dst->Index + ctx->base_reg[dst->File]; + break; + default: +- DBG("unsupported dst register file: %s", ++ compile_error(ctx, "unsupported dst register file: %s\n", + tgsi_file_name(dst->File)); +- assert(0); + break; + } + +@@ -250,9 +264,8 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + num = src->Index + ctx->base_reg[src->File]; + break; + default: +- DBG("unsupported src register file: %s", ++ compile_error(ctx, "unsupported src register file: %s\n", + tgsi_file_name(src->File)); +- assert(0); + break; + } + +@@ -329,6 +342,13 @@ get_internal_temp_repl(struct fd3_compile_context *ctx, + tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; + } + ++static inline bool ++is_const(struct tgsi_src_register *src) ++{ ++ return (src->File == TGSI_FILE_CONSTANT) || ++ (src->File == TGSI_FILE_IMMEDIATE); ++} ++ + static void + get_immediate(struct fd3_compile_context *ctx, + struct tgsi_src_register *reg, uint32_t val) +@@ -578,8 +598,7 @@ trans_dotp(const struct instr_translater *t, + * is a const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ +- if ((src1->File == TGSI_FILE_IMMEDIATE) || +- (src1->File == TGSI_FILE_CONSTANT)) { ++ if (is_const(src1)) { + + /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: + */ +@@ -768,7 +787,7 @@ trans_samp(const struct instr_translater *t, + flags |= IR3_INSTR_P; + break; + default: +- assert(0); ++ compile_assert(ctx, 0); + break; + } + +@@ -1187,7 +1206,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) + unsigned name = decl->Semantic.Name; + unsigned i; + +- assert(decl->Declaration.Semantic); // TODO is this ever not true? ++ compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? + + DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX + +@@ -1207,9 +1226,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) + so->outputs[so->outputs_count++].regid = regid(i + base, 0); + break; + default: +- DBG("unknown VS semantic name: %s", ++ compile_error(ctx, "unknown VS semantic name: %s\n", + tgsi_semantic_names[name]); +- assert(0); + } + } else { + switch (name) { +@@ -1217,9 +1235,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) + so->color_regid = regid(decl->Range.First + base, 0); + break; + default: +- DBG("unknown VS semantic name: %s", ++ compile_error(ctx, "unknown VS semantic name: %s\n", + tgsi_semantic_names[name]); +- assert(0); + } + } + } +@@ -1278,10 +1295,8 @@ compile_instructions(struct fd3_compile_context *ctx) + t->fxn(t, ctx, inst); + ctx->num_internal_temps = 0; + } else { +- debug_printf("unknown TGSI opc: %s\n", ++ compile_error(ctx, "unknown TGSI opc: %s\n", + tgsi_get_opcode_name(opc)); +- tgsi_dump(ctx->tokens, 0); +- assert(0); + } + + switch (inst->Instruction.Saturate) { +@@ -1319,6 +1334,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so, + + so->ir = ir3_shader_create(); + ++ assert(so->ir); ++ + so->color_regid = regid(63,0); + so->pos_regid = regid(63,0); + so->psize_regid = regid(63,0); +diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h +index 2fedc7b..61c01a7 100644 +--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h ++++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h +@@ -166,8 +166,7 @@ struct ir3_instruction { + }; + }; + +-/* this is just large to cope w/ the large test *.asm: */ +-#define MAX_INSTRS 10240 ++#define MAX_INSTRS 1024 + + struct ir3_shader { + unsigned instrs_count; +-- +1.8.4.2 + diff --git a/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch b/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch new file mode 100644 index 0000000..a96fc8d --- /dev/null +++ b/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch @@ -0,0 +1,420 @@ +From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 24 Aug 2013 13:00:07 -0400 +Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup + +It seems there are a number of cases where instructions have limitations +about taking reading src's from const register file, so make +get_unconst() a bit easier to use. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++---------- + 1 file changed, 71 insertions(+), 61 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index e6c5bb7..b5cdda8 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -91,6 +91,7 @@ struct fd3_compile_context { + + unsigned next_inloc; + unsigned num_internal_temps; ++ struct tgsi_src_register internal_temps[6]; + + /* track registers which need to synchronize w/ "complex alu" cat3 + * instruction pipeline: +@@ -128,7 +129,7 @@ struct fd3_compile_context { + * up the vector operation + */ + struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; ++ struct tgsi_src_register *tmp_src; + }; + + +@@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) + /* Get internal-temp src/dst to use for a sequence of instructions + * generated by a single TGSI op. + */ +-static void ++static struct tgsi_src_register * + get_internal_temp(struct fd3_compile_context *ctx, +- struct tgsi_dst_register *tmp_dst, +- struct tgsi_src_register *tmp_src) ++ struct tgsi_dst_register *tmp_dst) + { ++ struct tgsi_src_register *tmp_src; + int n; + + tmp_dst->File = TGSI_FILE_TEMPORARY; +@@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx, + + /* assign next temporary: */ + n = ctx->num_internal_temps++; ++ compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); ++ tmp_src = &ctx->internal_temps[n]; + + tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; + + src_from_dst(tmp_src, tmp_dst); ++ ++ return tmp_src; + } + + /* same as get_internal_temp, but w/ src.xxxx (for instructions that + * replicate their results) + */ +-static void ++static struct tgsi_src_register * + get_internal_temp_repl(struct fd3_compile_context *ctx, +- struct tgsi_dst_register *tmp_dst, +- struct tgsi_src_register *tmp_src) ++ struct tgsi_dst_register *tmp_dst) + { +- get_internal_temp(ctx, tmp_dst, tmp_src); ++ struct tgsi_src_register *tmp_src = ++ get_internal_temp(ctx, tmp_dst); + tmp_src->SwizzleX = tmp_src->SwizzleY = + tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; ++ return tmp_src; + } + + static inline bool +@@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src) + (src->File == TGSI_FILE_IMMEDIATE); + } + ++/* for instructions that cannot take a const register as src, if needed ++ * generate a move to temporary gpr: ++ */ ++static struct tgsi_src_register * ++get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) ++{ ++ if (is_const(src)) { ++ static struct tgsi_dst_register tmp_dst; ++ struct tgsi_src_register *tmp_src = ++ get_internal_temp(ctx, &tmp_dst); ++ create_mov(ctx, &tmp_dst, src); ++ src = tmp_src; ++ } ++ return src; ++} ++ + static void + get_immediate(struct fd3_compile_context *ctx, + struct tgsi_src_register *reg, uint32_t val) +@@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx, + reg->SwizzleW = swiz2tgsi[swiz]; + } + +-/* for instructions that cannot take a const register as src, if needed +- * generate a move to temporary gpr: +- */ +-static struct tgsi_src_register * +-get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src, +- struct tgsi_src_register *tmp_src) ++static type_t ++get_ftype(struct fd3_compile_context *ctx) + { +- static struct tgsi_dst_register tmp_dst; +- if ((src->File == TGSI_FILE_CONSTANT) || +- (src->File == TGSI_FILE_IMMEDIATE)) { +- get_internal_temp(ctx, &tmp_dst, tmp_src); +- create_mov(ctx, &tmp_dst, src); +- src = tmp_src; +- } +- return src; ++ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; + } + + static type_t +-get_type(struct fd3_compile_context *ctx) ++get_utype(struct fd3_compile_context *ctx) + { +- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; ++ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; + } + + static unsigned +@@ -436,7 +447,7 @@ static void + create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *src) + { +- type_t type_mov = get_type(ctx); ++ type_t type_mov = get_ftype(ctx); + unsigned i; + + for (i = 0; i < 4; i++) { +@@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_src_register *src = &inst->Src[i].Register; + if ((src->File == dst->File) && (src->Index == dst->Index)) { +- get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); ++ ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); + ctx->tmp_dst.WriteMask = dst->WriteMask; + dst = &ctx->tmp_dst; + break; +@@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, + { + /* if necessary, add mov back into original dst: */ + if (dst != &inst->Dst[0].Register) { +- create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); ++ create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); + } + } + +@@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t, + { + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; ++ struct tgsi_src_register *tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; +@@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t, + unsigned n = t->arg; /* number of components */ + unsigned i; + +- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); ++ tmp_src = get_internal_temp_repl(ctx, &tmp_dst); + + /* Blob compiler never seems to use a const in src1 position for + * mad.*, although there does seem (according to disassembler +@@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t, + * because after that point we no longer need tmp.x: + */ + create_mov(ctx, &tmp_dst, src1); +- src1 = &tmp_src; ++ src1 = tmp_src; + } + + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); +@@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t, + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[i]); + add_src_reg(ctx, instr, src1, swiz1[i]); +- add_src_reg(ctx, instr, &tmp_src, 0); ++ add_src_reg(ctx, instr, tmp_src, 0); + } + + /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ +@@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src1, swiz1[i]); +- add_src_reg(ctx, instr, &tmp_src, 0); ++ add_src_reg(ctx, instr, tmp_src, 0); + + n++; + } +@@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t, + ir3_instr_create(ctx->ir, 0, OPC_NOP); + } + +- create_mov(ctx, dst, &tmp_src); ++ create_mov(ctx, dst, tmp_src); + } + + /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ +@@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t, + { + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst1, tmp_dst2; +- struct tgsi_src_register tmp_src1, tmp_src2; ++ struct tgsi_src_register *tmp_src1, *tmp_src2; + struct tgsi_src_register tmp_const; + +- get_internal_temp(ctx, &tmp_dst1, &tmp_src1); +- get_internal_temp(ctx, &tmp_dst2, &tmp_src2); ++ tmp_src1 = get_internal_temp(ctx, &tmp_dst1); ++ tmp_src2 = get_internal_temp(ctx, &tmp_dst2); + + get_immediate(ctx, &tmp_const, fui(1.0)); + +@@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t, + /* tmp2 = tmp2 * c */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + vectorize(ctx, instr, &tmp_dst2, 2, +- &tmp_src2, 0, ++ tmp_src2, 0, + &inst->Src[2].Register, 0); + + /* dst = tmp1 + tmp2 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, +- &tmp_src1, 0, +- &tmp_src2, 0); ++ tmp_src1, 0, ++ tmp_src2, 0); + } + + /* FRC(x) = x - FLOOR(x) */ +@@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t, + { + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; ++ struct tgsi_src_register *tmp_src; + +- get_internal_temp(ctx, &tmp_dst, &tmp_src); ++ tmp_src = get_internal_temp(ctx, &tmp_dst); + + /* tmp = FLOOR(x) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); +@@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, + &inst->Src[0].Register, 0, +- &tmp_src, IR3_REG_NEGATE); ++ tmp_src, IR3_REG_NEGATE); + } + + /* POW(a,b) = EXP2(b * LOG2(a)) */ +@@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t, + struct ir3_instruction *instr; + struct ir3_register *r; + struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; ++ struct tgsi_src_register *tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; + +- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); ++ tmp_src = get_internal_temp_repl(ctx, &tmp_dst); + + /* log2 Rtmp, Rsrc0 */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; +@@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t, + /* mul.f Rtmp, Rtmp, Rsrc1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, &tmp_src, 0); ++ add_src_reg(ctx, instr, tmp_src, 0); + add_src_reg(ctx, instr, src1, src1->SwizzleX); + + /* blob compiler seems to ensure there are at least 6 instructions +@@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t, + /* exp2 Rdst, Rtmp */ + instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, &tmp_src, 0); ++ add_src_reg(ctx, instr, tmp_src, 0); + regmask_set(ctx->needs_ss, r); + +- create_mov(ctx, dst, &tmp_src); ++ create_mov(ctx, dst, tmp_src); + } + + /* texture fetch/sample instructions: */ +@@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t, + { + struct ir3_register *r; + struct ir3_instruction *instr; +- struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; + struct tgsi_src_register *coord = &inst->Src[0].Register; + struct tgsi_src_register *samp = &inst->Src[1].Register; + unsigned tex = inst->Texture.Texture; +@@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t, + */ + for (i = 1; (i < 4) && (order[i] >= 0); i++) { + if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { +- type_t type_mov = get_type(ctx); ++ struct tgsi_dst_register tmp_dst; ++ struct tgsi_src_register *tmp_src; ++ ++ type_t type_mov = get_ftype(ctx); + + /* need to move things around: */ +- get_internal_temp(ctx, &tmp_dst, &tmp_src); ++ tmp_src = get_internal_temp(ctx, &tmp_dst); + + for (j = 0; (j < 4) && (order[j] >= 0); j++) { + instr = ir3_instr_create(ctx->ir, 1, 0); +@@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t, + src_swiz(coord, order[j])); + } + +- coord = &tmp_src; ++ coord = tmp_src; + + if (j < 4) + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; +@@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t, + } + + instr = ir3_instr_create(ctx->ir, 5, t->opc); +- instr->cat5.type = get_type(ctx); ++ instr->cat5.type = get_ftype(ctx); + instr->cat5.samp = samp->Index; + instr->cat5.tex = samp->Index; + instr->flags |= flags; +@@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t, + { + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register tmp_src; ++ struct tgsi_src_register *tmp_src; + struct tgsi_src_register constval; + /* final instruction uses original src1 and src2, so we need get_dst() */ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + +- get_internal_temp(ctx, &tmp_dst, &tmp_src); ++ tmp_src = get_internal_temp(ctx, &tmp_dst); + + /* cmps.f.ge tmp, src0, 0.0 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); +@@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr->repeat = 3; + add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R; ++ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + + /* sel.{f32,f16} dst, src2, tmp, src1 */ +@@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t, + OPC_SEL_F16 : OPC_SEL_F32); + vectorize(ctx, instr, dst, 3, + &inst->Src[2].Register, 0, +- &tmp_src, 0, ++ tmp_src, 0, + &inst->Src[1].Register, 0); + + put_dst(ctx, inst, dst); +@@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t, + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src1; +- struct tgsi_src_register tmp_src; + struct ir3_instruction *instr; + + /* Blob compiler never seems to use a const in src1 position.. +@@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t, + * const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ +- src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src); ++ src1 = get_unconst(ctx, &inst->Src[1].Register); + + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? t->hopc : t->opc); +@@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t, + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src; +- struct tgsi_src_register tmp_src; + struct ir3_instruction *instr; + + /* seems like blob compiler avoids const as src.. */ +- src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src); ++ src = get_unconst(ctx, &inst->Src[0].Register); + + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, t->opc); +-- +1.8.4.2 + diff --git a/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch b/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch new file mode 100644 index 0000000..621070a --- /dev/null +++ b/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch @@ -0,0 +1,231 @@ +From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 24 Aug 2013 13:02:53 -0400 +Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc + +The cmps.f.* instruction doesn't actually seem to give a float 1.0 or +0.0 output. It either needs a cov.u16f16 or add.s + sel.f16. This +makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp(). + +This fixes a bunch of piglit tests. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++---- + 1 file changed, 125 insertions(+), 29 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index b5cdda8..477053b 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t, + regmask_set(ctx->needs_sy, r); + } + +-/* CMP(a,b,c) = (a < 0) ? b : c */ ++/* ++ * SEQ(a,b) = (a == b) ? 1.0 : 0.0 ++ * cmps.f.eq tmp0, b, a ++ * cov.u16f16 dst, tmp0 ++ * ++ * SNE(a,b) = (a != b) ? 1.0 : 0.0 ++ * cmps.f.eq tmp0, b, a ++ * add.s tmp0, tmp0, -1 ++ * sel.f16 dst, {0.0}, tmp0, {1.0} ++ * ++ * SGE(a,b) = (a >= b) ? 1.0 : 0.0 ++ * cmps.f.ge tmp0, a, b ++ * cov.u16f16 dst, tmp0 ++ * ++ * SLE(a,b) = (a <= b) ? 1.0 : 0.0 ++ * cmps.f.ge tmp0, b, a ++ * cov.u16f16 dst, tmp0 ++ * ++ * SGT(a,b) = (a > b) ? 1.0 : 0.0 ++ * cmps.f.ge tmp0, b, a ++ * add.s tmp0, tmp0, -1 ++ * sel.f16 dst, {0.0}, tmp0, {1.0} ++ * ++ * SLT(a,b) = (a < b) ? 1.0 : 0.0 ++ * cmps.f.ge tmp0, a, b ++ * add.s tmp0, tmp0, -1 ++ * sel.f16 dst, {0.0}, tmp0, {1.0} ++ * ++ * CMP(a,b,c) = (a < 0.0) ? b : c ++ * cmps.f.ge tmp0, a, {0.0} ++ * add.s tmp0, tmp0, -1 ++ * sel.f16 dst, c, tmp0, b ++ */ + static void + trans_cmp(const struct instr_translater *t, + struct fd3_compile_context *ctx, +@@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t, + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register *tmp_src; +- struct tgsi_src_register constval; +- /* final instruction uses original src1 and src2, so we need get_dst() */ ++ struct tgsi_src_register constval0, constval1; ++ /* final instruction for CMP() uses orig src1 and src2: */ + struct tgsi_dst_register *dst = get_dst(ctx, inst); ++ struct tgsi_src_register *a0, *a1; ++ unsigned condition; + + tmp_src = get_internal_temp(ctx, &tmp_dst); + +- /* cmps.f.ge tmp, src0, 0.0 */ ++ switch (t->tgsi_opc) { ++ case TGSI_OPCODE_SEQ: ++ case TGSI_OPCODE_SNE: ++ a0 = &inst->Src[1].Register; /* b */ ++ a1 = &inst->Src[0].Register; /* a */ ++ condition = IR3_COND_EQ; ++ break; ++ case TGSI_OPCODE_SGE: ++ case TGSI_OPCODE_SLT: ++ a0 = &inst->Src[0].Register; /* a */ ++ a1 = &inst->Src[1].Register; /* b */ ++ condition = IR3_COND_GE; ++ break; ++ case TGSI_OPCODE_SLE: ++ case TGSI_OPCODE_SGT: ++ a0 = &inst->Src[1].Register; /* b */ ++ a1 = &inst->Src[0].Register; /* a */ ++ condition = IR3_COND_GE; ++ break; ++ case TGSI_OPCODE_CMP: ++ get_immediate(ctx, &constval0, fui(0.0)); ++ a0 = &inst->Src[0].Register; /* a */ ++ a1 = &constval0; /* {0.0} */ ++ condition = IR3_COND_GE; ++ break; ++ default: ++ compile_assert(ctx, 0); ++ return; ++ } ++ ++ /* NOTE: seems blob compiler will move a const to a gpr if both ++ * src args to cmps.f are const. Need to check if this applies ++ * to other instructions.. ++ */ ++ if (is_const(a0) && is_const(a1)) ++ a0 = get_unconst(ctx, a0); ++ ++ /* cmps.f.ge tmp, a0, a1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); +- instr->cat2.condition = IR3_COND_GE; +- get_immediate(ctx, &constval, fui(0.0)); +- vectorize(ctx, instr, &tmp_dst, 2, +- &inst->Src[0].Register, 0, +- &constval, 0); ++ instr->cat2.condition = condition; ++ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); + +- /* add.s tmp, tmp, -1 */ +- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); +- instr->repeat = 3; +- add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; +- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; ++ switch (t->tgsi_opc) { ++ case TGSI_OPCODE_SEQ: ++ case TGSI_OPCODE_SGE: ++ case TGSI_OPCODE_SLE: ++ /* cov.u16f16 dst, tmp0 */ ++ instr = ir3_instr_create(ctx->ir, 1, 0); ++ instr->cat1.src_type = get_utype(ctx); ++ instr->cat1.dst_type = get_ftype(ctx); ++ vectorize(ctx, instr, dst, 1, tmp_src, 0); ++ break; ++ case TGSI_OPCODE_SNE: ++ case TGSI_OPCODE_SGT: ++ case TGSI_OPCODE_SLT: ++ case TGSI_OPCODE_CMP: ++ /* add.s tmp, tmp, -1 */ ++ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); ++ instr->repeat = 3; ++ add_dst_reg(ctx, instr, &tmp_dst, 0); ++ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; ++ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; ++ ++ if (t->tgsi_opc == TGSI_OPCODE_CMP) { ++ /* sel.{f32,f16} dst, src2, tmp, src1 */ ++ instr = ir3_instr_create(ctx->ir, 3, ++ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); ++ vectorize(ctx, instr, dst, 3, ++ &inst->Src[2].Register, 0, ++ tmp_src, 0, ++ &inst->Src[1].Register, 0); ++ } else { ++ get_immediate(ctx, &constval0, fui(0.0)); ++ get_immediate(ctx, &constval1, fui(1.0)); ++ /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ ++ instr = ir3_instr_create(ctx->ir, 3, ++ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); ++ vectorize(ctx, instr, dst, 3, ++ &constval0, 0, tmp_src, 0, &constval1, 0); ++ } + +- /* sel.{f32,f16} dst, src2, tmp, src1 */ +- instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? +- OPC_SEL_F16 : OPC_SEL_F32); +- vectorize(ctx, instr, dst, 3, +- &inst->Src[2].Register, 0, +- tmp_src, 0, +- &inst->Src[1].Register, 0); ++ break; ++ } + + put_dst(ctx, inst, dst); + } +@@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t, + + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + ir3_reg_create(instr, regid(REG_P0, 0), 0); +- add_src_reg(ctx, instr, &constval, constval.SwizzleX); + add_src_reg(ctx, instr, src, src->SwizzleX); ++ add_src_reg(ctx, instr, &constval, constval.SwizzleX); + instr->cat2.condition = IR3_COND_EQ; + + instr = ir3_instr_create(ctx->ir, 0, OPC_BR); +@@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 2, t->opc); + + switch (t->tgsi_opc) { +- case TGSI_OPCODE_SLT: +- case TGSI_OPCODE_SGE: +- instr->cat2.condition = t->arg; +- break; + case TGSI_OPCODE_ABS: + src0_flags = IR3_REG_ABS; + break; +@@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { + INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */ + INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), + INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), +- INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT), +- INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE), + INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), + INSTR(LRP, trans_lrp), + INSTR(FRC, trans_frac), + INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), ++ INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F), + INSTR(EX2, instr_cat4, .opc = OPC_EXP2), + INSTR(LG2, instr_cat4, .opc = OPC_LOG2), + INSTR(POW, trans_pow), +@@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { + INSTR(SIN, instr_cat4, .opc = OPC_COS), + INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), + INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), ++ INSTR(SGT, trans_cmp), ++ INSTR(SLT, trans_cmp), ++ INSTR(SGE, trans_cmp), ++ INSTR(SLE, trans_cmp), ++ INSTR(SNE, trans_cmp), ++ INSTR(SEQ, trans_cmp), + INSTR(CMP, trans_cmp), + INSTR(IF, trans_if), + INSTR(ELSE, trans_else), +-- +1.8.4.2 + diff --git a/0014-freedreno-a3xx-don-t-leak-so-much.patch b/0014-freedreno-a3xx-don-t-leak-so-much.patch new file mode 100644 index 0000000..0ac9110 --- /dev/null +++ b/0014-freedreno-a3xx-don-t-leak-so-much.patch @@ -0,0 +1,36 @@ +From 0b2c5119cb772751edb3c42c9c0545443e26fd7f Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Mon, 17 Jun 2013 20:11:54 -0400 +Subject: [PATCH 14/17] freedreno/a3xx: don't leak so much + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_context.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c +index 3ae9b29..589aeed 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c +@@ -40,7 +40,18 @@ + static void + fd3_context_destroy(struct pipe_context *pctx) + { ++ struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx)); ++ + fd3_prog_fini(pctx); ++ ++ fd_bo_del(fd3_ctx->vs_pvt_mem); ++ fd_bo_del(fd3_ctx->fs_pvt_mem); ++ fd_bo_del(fd3_ctx->vsc_size_mem); ++ fd_bo_del(fd3_ctx->vsc_pipe_mem); ++ ++ pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL); ++ pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL); ++ + fd_context_destroy(pctx); + } + +-- +1.8.4.2 + diff --git a/0015-freedreno-a3xx-compiler-better-const-handling.patch b/0015-freedreno-a3xx-compiler-better-const-handling.patch new file mode 100644 index 0000000..221e083 --- /dev/null +++ b/0015-freedreno-a3xx-compiler-better-const-handling.patch @@ -0,0 +1,376 @@ +From f1998c8aa7d82006f9ef7e6710a0f68f30bfc109 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Sat, 24 Aug 2013 17:30:50 -0400 +Subject: [PATCH 15/17] freedreno/a3xx/compiler: better const handling + +Seems like most/all instructions have some restrictions about const src +registers. In seems like the 2 src (cat2) instructions can take at most +one const, and the 3 src (cat3) instructions can take at most one const +in the first 2 arguments. And so on. Handle this properly now. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 211 +++++++++++++--------- + 1 file changed, 121 insertions(+), 90 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index 477053b..dcdd2d9 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -355,20 +355,47 @@ is_const(struct tgsi_src_register *src) + (src->File == TGSI_FILE_IMMEDIATE); + } + ++static type_t ++get_ftype(struct fd3_compile_context *ctx) ++{ ++ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; ++} ++ ++static type_t ++get_utype(struct fd3_compile_context *ctx) ++{ ++ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; ++} ++ ++static unsigned ++src_swiz(struct tgsi_src_register *src, int chan) ++{ ++ switch (chan) { ++ case 0: return src->SwizzleX; ++ case 1: return src->SwizzleY; ++ case 2: return src->SwizzleZ; ++ case 3: return src->SwizzleW; ++ } ++ assert(0); ++ return 0; ++} ++ + /* for instructions that cannot take a const register as src, if needed + * generate a move to temporary gpr: + */ + static struct tgsi_src_register * + get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) + { +- if (is_const(src)) { +- static struct tgsi_dst_register tmp_dst; +- struct tgsi_src_register *tmp_src = +- get_internal_temp(ctx, &tmp_dst); +- create_mov(ctx, &tmp_dst, src); +- src = tmp_src; +- } +- return src; ++ struct tgsi_dst_register tmp_dst; ++ struct tgsi_src_register *tmp_src; ++ ++ compile_assert(ctx, is_const(src)); ++ ++ tmp_src = get_internal_temp(ctx, &tmp_dst); ++ ++ create_mov(ctx, &tmp_dst, src); ++ ++ return tmp_src; + } + + static void +@@ -418,31 +445,6 @@ get_immediate(struct fd3_compile_context *ctx, + reg->SwizzleW = swiz2tgsi[swiz]; + } + +-static type_t +-get_ftype(struct fd3_compile_context *ctx) +-{ +- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; +-} +- +-static type_t +-get_utype(struct fd3_compile_context *ctx) +-{ +- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; +-} +- +-static unsigned +-src_swiz(struct tgsi_src_register *src, int chan) +-{ +- switch (chan) { +- case 0: return src->SwizzleX; +- case 1: return src->SwizzleY; +- case 2: return src->SwizzleZ; +- case 3: return src->SwizzleW; +- } +- assert(0); +- return 0; +-} +- + static void + create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *src) +@@ -463,7 +465,6 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + ir3_instr_create(ctx->ir, 0, OPC_NOP); + } + } +- + } + + static void +@@ -584,6 +585,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + * native instructions: + */ + ++static inline void ++get_swiz(unsigned *swiz, struct tgsi_src_register *src) ++{ ++ swiz[0] = src->SwizzleX; ++ swiz[1] = src->SwizzleY; ++ swiz[2] = src->SwizzleZ; ++ swiz[3] = src->SwizzleW; ++} ++ + static void + trans_dotp(const struct instr_translater *t, + struct fd3_compile_context *ctx, +@@ -595,34 +605,31 @@ trans_dotp(const struct instr_translater *t, + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; +- unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW }; +- unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW }; ++ unsigned swiz0[4]; ++ unsigned swiz1[4]; + opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; + unsigned n = t->arg; /* number of components */ +- unsigned i; ++ unsigned i, swapped = 0; + + tmp_src = get_internal_temp_repl(ctx, &tmp_dst); + +- /* Blob compiler never seems to use a const in src1 position for +- * mad.*, although there does seem (according to disassembler +- * hidden in libllvm-a3xx.so) to be a bit to indicate that src1 +- * is a const. Not sure if this is a hw bug, or simply that the +- * disassembler lies. ++ /* in particular, can't handle const for src1 for cat3/mad: + */ + if (is_const(src1)) { +- +- /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: +- */ +- for (i = 0; i < 4; i++) +- swiz1[i] = i; +- +- /* the first mul.f will clobber tmp.x, but that is ok +- * because after that point we no longer need tmp.x: +- */ +- create_mov(ctx, &tmp_dst, src1); +- src1 = tmp_src; ++ if (!is_const(src0)) { ++ struct tgsi_src_register *tmp; ++ tmp = src0; ++ src0 = src1; ++ src1 = tmp; ++ swapped = 1; ++ } else { ++ src0 = get_unconst(ctx, src0); ++ } + } + ++ get_swiz(swiz0, src0); ++ get_swiz(swiz1, src1); ++ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[0]); +@@ -640,22 +647,20 @@ trans_dotp(const struct instr_translater *t, + + /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ + if (t->tgsi_opc == TGSI_OPCODE_DPH) { +- ir3_instr_create(ctx->ir, 0, OPC_NOP); ++ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1; + + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); +- add_src_reg(ctx, instr, src1, swiz1[i]); ++ if (swapped) ++ add_src_reg(ctx, instr, src0, swiz0[i]); ++ else ++ add_src_reg(ctx, instr, src1, swiz1[i]); + add_src_reg(ctx, instr, tmp_src, 0); + + n++; + } + +- ir3_instr_create(ctx->ir, 0, OPC_NOP); +- +- /* pad out to multiple of 4 scalar instructions: */ +- for (i = 2 * n; i % 4; i++) { +- ir3_instr_create(ctx->ir, 0, OPC_NOP); +- } ++ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; + + create_mov(ctx, dst, tmp_src); + } +@@ -670,6 +675,11 @@ trans_lrp(const struct instr_translater *t, + struct tgsi_dst_register tmp_dst1, tmp_dst2; + struct tgsi_src_register *tmp_src1, *tmp_src2; + struct tgsi_src_register tmp_const; ++ struct tgsi_src_register *src0 = &inst->Src[0].Register; ++ struct tgsi_src_register *src1 = &inst->Src[1].Register; ++ ++ if (is_const(src0) && is_const(src1)) ++ src0 = get_unconst(ctx, src0); + + tmp_src1 = get_internal_temp(ctx, &tmp_dst1); + tmp_src2 = get_internal_temp(ctx, &tmp_dst2); +@@ -678,15 +688,12 @@ trans_lrp(const struct instr_translater *t, + + /* tmp1 = (a * b) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); +- vectorize(ctx, instr, &tmp_dst1, 2, +- &inst->Src[0].Register, 0, +- &inst->Src[1].Register, 0); ++ vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0); + + /* tmp2 = (1 - a) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); +- vectorize(ctx, instr, &tmp_dst2, 2, +- &tmp_const, 0, +- &inst->Src[0].Register, IR3_REG_NEGATE); ++ vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0, ++ src0, IR3_REG_NEGATE); + + /* tmp2 = tmp2 * c */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); +@@ -930,10 +937,6 @@ trans_cmp(const struct instr_translater *t, + return; + } + +- /* NOTE: seems blob compiler will move a const to a gpr if both +- * src args to cmps.f are const. Need to check if this applies +- * to other instructions.. +- */ + if (is_const(a0) && is_const(a1)) + a0 = get_unconst(ctx, a0); + +@@ -1041,6 +1044,9 @@ trans_if(const struct instr_translater *t, + + get_immediate(ctx, &constval, fui(0.0)); + ++ if (is_const(src)) ++ src = get_unconst(ctx, src); ++ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + ir3_reg_create(instr, regid(REG_P0, 0), 0); + add_src_reg(ctx, instr, src, src->SwizzleX); +@@ -1122,11 +1128,11 @@ instr_cat2(const struct instr_translater *t, + struct tgsi_full_instruction *inst) + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); ++ struct tgsi_src_register *src0 = &inst->Src[0].Register; ++ struct tgsi_src_register *src1 = &inst->Src[1].Register; + struct ir3_instruction *instr; + unsigned src0_flags = 0; + +- instr = ir3_instr_create(ctx->ir, 2, t->opc); +- + switch (t->tgsi_opc) { + case TGSI_OPCODE_ABS: + src0_flags = IR3_REG_ABS; +@@ -1149,41 +1155,65 @@ instr_cat2(const struct instr_translater *t, + case OPC_SETRM: + case OPC_CBITS_B: + /* these only have one src reg */ +- vectorize(ctx, instr, dst, 1, +- &inst->Src[0].Register, src0_flags); ++ instr = ir3_instr_create(ctx->ir, 2, t->opc); ++ vectorize(ctx, instr, dst, 1, src0, src0_flags); + break; + default: +- vectorize(ctx, instr, dst, 2, +- &inst->Src[0].Register, src0_flags, +- &inst->Src[1].Register, 0); ++ if (is_const(src0) && is_const(src1)) ++ src0 = get_unconst(ctx, src0); ++ ++ instr = ir3_instr_create(ctx->ir, 2, t->opc); ++ vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0); + break; + } + + put_dst(ctx, inst, dst); + } + ++static bool is_mad(opc_t opc) ++{ ++ switch (opc) { ++ case OPC_MAD_U16: ++ case OPC_MADSH_U16: ++ case OPC_MAD_S16: ++ case OPC_MADSH_M16: ++ case OPC_MAD_U24: ++ case OPC_MAD_S24: ++ case OPC_MAD_F16: ++ case OPC_MAD_F32: ++ return true; ++ default: ++ return false; ++ } ++} ++ + static void + instr_cat3(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); +- struct tgsi_src_register *src1; ++ struct tgsi_src_register *src0 = &inst->Src[0].Register; ++ struct tgsi_src_register *src1 = &inst->Src[1].Register; + struct ir3_instruction *instr; + +- /* Blob compiler never seems to use a const in src1 position.. +- * although there does seem (according to disassembler hidden +- * in libllvm-a3xx.so) to be a bit to indicate that src1 is a +- * const. Not sure if this is a hw bug, or simply that the +- * disassembler lies. ++ /* in particular, can't handle const for src1 for cat3.. ++ * for mad, we can swap first two src's if needed: + */ +- src1 = get_unconst(ctx, &inst->Src[1].Register); ++ if (is_const(src1)) { ++ if (is_mad(t->opc) && !is_const(src0)) { ++ struct tgsi_src_register *tmp; ++ tmp = src0; ++ src0 = src1; ++ src1 = tmp; ++ } else { ++ src0 = get_unconst(ctx, src0); ++ } ++ } + + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? t->hopc : t->opc); +- vectorize(ctx, instr, dst, 3, +- &inst->Src[0].Register, 0, +- src1, 0, ++ vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, + &inst->Src[2].Register, 0); + put_dst(ctx, inst, dst); + } +@@ -1194,11 +1224,12 @@ instr_cat4(const struct instr_translater *t, + struct tgsi_full_instruction *inst) + { + struct tgsi_dst_register *dst = get_dst(ctx, inst); +- struct tgsi_src_register *src; ++ struct tgsi_src_register *src = &inst->Src[0].Register; + struct ir3_instruction *instr; + + /* seems like blob compiler avoids const as src.. */ +- src = get_unconst(ctx, &inst->Src[0].Register); ++ if (is_const(src)) ++ src = get_unconst(ctx, src); + + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, t->opc); +-- +1.8.4.2 + diff --git a/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch b/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch new file mode 100644 index 0000000..279b118 --- /dev/null +++ b/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch @@ -0,0 +1,128 @@ +From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Tue, 27 Aug 2013 19:24:53 -0400 +Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better + +We need to set the flag on all the .xyzw components that are written by +the instruction, not just on .x. Otherwise a later use of rN.y (for +example) will not trigger the appropriate sync bit to be set. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++-------- + 1 file changed, 34 insertions(+), 16 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +index dcdd2d9..5115411 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg) + return num; + } + +-static void regmask_set(regmask_t regmask, struct ir3_register *reg) ++static void regmask_set(regmask_t regmask, struct ir3_register *reg, ++ unsigned wrmask) + { +- unsigned idx = regmask_idx(reg); +- regmask[idx / 8] |= 1 << (idx % 8); ++ unsigned i; ++ for (i = 0; i < 4; i++) { ++ if (wrmask & (1 << i)) { ++ unsigned idx = regmask_idx(reg) + i; ++ regmask[idx / 8] |= 1 << (idx % 8); ++ } ++ } + } + + static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) +@@ -216,6 +222,24 @@ struct instr_translater { + unsigned arg; + }; + ++static unsigned ++src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) ++{ ++ unsigned flags = 0; ++ ++ if (regmask_get(ctx->needs_ss, reg)) { ++ flags |= IR3_INSTR_SS; ++ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); ++ } ++ ++ if (regmask_get(ctx->needs_sy, reg)) { ++ flags |= IR3_INSTR_SY; ++ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); ++ } ++ ++ return flags; ++} ++ + static struct ir3_register * + add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + const struct tgsi_dst_register *dst, unsigned chan) +@@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + + reg = ir3_reg_create(instr, regid(num, chan), flags); + +- if (regmask_get(ctx->needs_ss, reg)) { +- instr->flags |= IR3_INSTR_SS; +- memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); +- } +- +- if (regmask_get(ctx->needs_sy, reg)) { +- instr->flags |= IR3_INSTR_SY; +- memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); +- } ++ instr->flags |= src_flags(ctx, reg); + + return reg; + } +@@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + cur->regs[j+1]->num = + regid(cur->regs[j+1]->num >> 2, + src_swiz(src, i)); ++ cur->flags |= src_flags(ctx, cur->regs[j+1]); + } + va_end(ap); + } +@@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, src0->SwizzleX); +- regmask_set(ctx->needs_ss, r); ++ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); + + /* mul.f Rtmp, Rtmp, Rsrc1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); +@@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t, + instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, tmp_src, 0); +- regmask_set(ctx->needs_ss, r); ++ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); + + create_mov(ctx, dst, tmp_src); + } +@@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t, + + add_src_reg(ctx, instr, coord, coord->SwizzleX); + +- regmask_set(ctx->needs_sy, r); ++ regmask_set(ctx->needs_sy, r, r->wrmask); + } + + /* +@@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t, + + vectorize(ctx, instr, dst, 1, src, 0); + +- regmask_set(ctx->needs_ss, instr->regs[0]); ++ regmask_set(ctx->needs_ss, instr->regs[0], ++ inst->Dst[0].Register.WriteMask); + + put_dst(ctx, inst, dst); + } +-- +1.8.4.2 + diff --git a/0017-freedreno-updates-for-msm-drm-kms-driver.patch b/0017-freedreno-updates-for-msm-drm-kms-driver.patch new file mode 100644 index 0000000..2fe4f05 --- /dev/null +++ b/0017-freedreno-updates-for-msm-drm-kms-driver.patch @@ -0,0 +1,328 @@ +From 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 Mon Sep 17 00:00:00 2001 +From: Rob Clark +Date: Thu, 29 Aug 2013 17:24:33 -0400 +Subject: [PATCH 17/17] freedreno: updates for msm drm/kms driver + +There where some small API tweaks in libdrm_freedreno to enable support +for msm drm/kms driver. + +Signed-off-by: Rob Clark +--- + src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 4 +-- + src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 ++--- + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 14 +++++------ + src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 +++--- + src/gallium/drivers/freedreno/a3xx/fd3_program.c | 4 +-- + src/gallium/drivers/freedreno/freedreno_draw.c | 2 +- + src/gallium/drivers/freedreno/freedreno_resource.c | 18 ++++++++++++-- + src/gallium/drivers/freedreno/freedreno_util.h | 29 +++++++++++++++------- + 8 files changed, 55 insertions(+), 30 deletions(-) + +diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +index b03390e..35511ba 100644 +--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c ++++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c +@@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, + OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); + + OUT_RING(ring, sampler->tex0 | view->tex0); +- OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt); ++ OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0); + OUT_RING(ring, view->tex2); + OUT_RING(ring, sampler->tex3 | view->tex3); + OUT_RING(ring, sampler->tex4); +@@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, + OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); + for (i = 0; i < n; i++) { + struct fd_resource *rsc = fd_resource(vbufs[i].prsc); +- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3); ++ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0); + OUT_RING (ring, vbufs[i].size); + } + } +diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +index 93695bc..89f5a4d 100644 +--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c ++++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +@@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base, + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); + OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ +- OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */ ++ OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */ + OUT_RING(ring, /* RB_COPY_DEST_INFO */ + A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | +@@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base, + A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | + A2XX_SQ_TEX_0_PITCH(rsc->pitch)); + OUT_RELOC(ring, rsc->bo, 0, +- fd2_pipe2surface(psurf->format) | 0x800); ++ fd2_pipe2surface(psurf->format) | 0x800, 0); + OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | + A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); + OUT_RING(ring, 0x01000000 | // XXX +@@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + y0 = ((float)yoff) / ((float)pfb->height); + y1 = ((float)yoff + bin_h) / ((float)pfb->height); + OUT_PKT3(ring, CP_MEM_WRITE, 9); +- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0); ++ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0); + OUT_RING(ring, fui(x0)); + OUT_RING(ring, fui(y0)); + OUT_RING(ring, fui(x1)); +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +index 5ffd561..5e58618 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +@@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring, + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, +- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); ++ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); + } else { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); +@@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring, + for (i = 0; i < tex->num_textures; i++) { + struct fd3_pipe_sampler_view *view = + fd3_pipe_sampler_view(tex->textures[i]); +- OUT_RELOC(ring, view->tex_resource->bo, 0, 0); ++ OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0); + /* I think each entry is a ptr to mipmap level.. for now, just + * pad w/ null's until I get around to actually implementing + * mipmap support.. +@@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf + CP_LOAD_STATE_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); +- OUT_RELOC(ring, rsc->bo, 0, 0); ++ OUT_RELOC(ring, rsc->bo, 0, 0, 0); + } + + void +@@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, + COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | + A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) | + A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); +- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0); ++ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0); + + OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1); + OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | +@@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx) + + OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3); + OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ +- OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */ ++ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */ + OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ + + OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3); + OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ +- OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */ ++ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */ + OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ + + OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); +@@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx) + OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */ + + OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); +- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */ ++ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +index b9d0580..8d2df47 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +@@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + if (bin_w || (i >= nr_bufs)) { + OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); + } else { +- OUT_RELOCS(ring, res->bo, 0, 0, -1); ++ OUT_RELOCW(ring, res->bo, 0, 0, -1); + } + + OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); +@@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, + OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | + A3XX_RB_COPY_CONTROL_MODE(mode) | + A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); +- OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ ++ OUT_RELOCW(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp)); + OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | + A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | +@@ -272,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + y1 = ((float)yoff + bin_h) / ((float)pfb->height); + + OUT_PKT3(ring, CP_MEM_WRITE, 5); +- OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0); ++ OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); + OUT_RING(ring, fui(x0)); + OUT_RING(ring, fui(y0)); + OUT_RING(ring, fui(x1)); +@@ -395,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx) + A3XX_VSC_PIPE_CONFIG_Y(0) | + A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) | + A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y)); +- OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ ++ OUT_RELOC(ring, bo, 0, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ + OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */ + + for (i = 1; i < 8; i++) { +diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c +index 259c2dd..c6c51b1 100644 +--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c ++++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c +@@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, + OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | + A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); +- OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */ ++ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ + #endif + + OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); +@@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, + OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | + A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen)); +- OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */ ++ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ + #endif + + OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); +diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c +index d4f8d34..4a98ab4 100644 +--- a/src/gallium/drivers/freedreno/freedreno_draw.c ++++ b/src/gallium/drivers/freedreno/freedreno_draw.c +@@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info) + src_sel, idx_type, IGNORE_VISIBILITY)); + OUT_RING(ring, info->count); /* NumIndices */ + if (info->indexed) { +- OUT_RELOC(ring, idx_bo, idx_offset, 0); ++ OUT_RELOC(ring, idx_bo, idx_offset, 0, 0); + OUT_RING (ring, idx_size); + } + } +diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c +index 1b1eaa5..3e051ea 100644 +--- a/src/gallium/drivers/freedreno/freedreno_resource.c ++++ b/src/gallium/drivers/freedreno/freedreno_resource.c +@@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *ptrans) + { + struct fd_context *ctx = fd_context(pctx); ++ struct fd_resource *rsc = fd_resource(ptrans->resource); ++ if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) ++ fd_bo_cpu_fini(rsc->bo); + pipe_resource_reference(&ptrans->resource, NULL); + util_slab_free(&ctx->transfer_pool, ptrans); + } +@@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx, + struct fd_resource *rsc = fd_resource(prsc); + struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool); + enum pipe_format format = prsc->format; ++ uint32_t op = 0; + char *buf; + + if (!ptrans) + return NULL; + +- /* util_slap_alloc() doesn't zero: */ ++ /* util_slab_alloc() doesn't zero: */ + memset(ptrans, 0, sizeof(*ptrans)); + + pipe_resource_reference(&ptrans->resource, prsc); +@@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, + ptrans->layer_stride = ptrans->stride; + + /* some state trackers (at least XA) don't do this.. */ +- fd_resource_transfer_flush_region(pctx, ptrans, box); ++ if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) ++ fd_resource_transfer_flush_region(pctx, ptrans, box); + + buf = fd_bo_map(rsc->bo); + if (!buf) { +@@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, + return NULL; + } + ++ if (usage & PIPE_TRANSFER_READ) ++ op |= DRM_FREEDRENO_PREP_READ; ++ ++ if (usage & PIPE_TRANSFER_WRITE) ++ op |= DRM_FREEDRENO_PREP_WRITE; ++ ++ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) ++ fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); ++ + *pptrans = ptrans; + + return buf + +diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h +index 9f10686..7bbbe80 100644 +--- a/src/gallium/drivers/freedreno/freedreno_util.h ++++ b/src/gallium/drivers/freedreno/freedreno_util.h +@@ -104,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data) + + static inline void + OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, +- uint32_t offset, uint32_t or) ++ uint32_t offset, uint32_t or, int32_t shift) + { + if (LOG_DWORDS) { +- DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring, +- (uint32_t)(ring->cur - ring->last_start), bo, offset); ++ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, ++ (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); + } +- fd_ringbuffer_emit_reloc(ring, bo, offset, or); ++ fd_ringbuffer_reloc(ring, &(struct fd_reloc){ ++ .bo = bo, ++ .flags = FD_RELOC_READ, ++ .offset = offset, ++ .or = or, ++ .shift = shift, ++ }); + } + +-/* shifted reloc: */ + static inline void +-OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo, ++OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint32_t or, int32_t shift) + { + if (LOG_DWORDS) { +- DBG("ring[%p]: OUT_RELOCS %04x: %p+%u << %d", ring, ++ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, + (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); + } +- fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift); ++ fd_ringbuffer_reloc(ring, &(struct fd_reloc){ ++ .bo = bo, ++ .flags = FD_RELOC_READ | FD_RELOC_WRITE, ++ .offset = offset, ++ .or = or, ++ .shift = shift, ++ }); + } + + static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) +@@ -155,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, + struct fd_ringmarker *end) + { + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); +- fd_ringbuffer_emit_reloc_ring(ring, start); ++ fd_ringbuffer_emit_reloc_ring(ring, start, end); + OUT_RING(ring, fd_ringmarker_dwords(start, end)); + } + +-- +1.8.4.2 + diff --git a/mesa.spec b/mesa.spec index a8da90c..0619a6c 100644 --- a/mesa.spec +++ b/mesa.spec @@ -48,12 +48,12 @@ %define _default_patch_fuzz 2 -%define gitdate 20131113 +%define gitdate 20131114 #% define snapshot Summary: Mesa graphics libraries Name: mesa -Version: 9.2.2 +Version: 9.2.3 Release: 1.%{gitdate}%{?dist} License: MIT Group: System Environment/Libraries @@ -77,6 +77,25 @@ Patch15: mesa-9.2-hardware-float.patch Patch16: mesa-9.2-no-useless-vdpau.patch Patch20: mesa-9.2-evergreen-big-endian.patch +# https://bugs.freedesktop.org/show_bug.cgi?id=71573 +Patch21: 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch +Patch22: 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch +Patch23: 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch +Patch24: 0004-freedreno-update-register-headers.patch +Patch25: 0005-freedreno-a3xx-some-texture-fixes.patch +Patch26: 0006-freedreno-a3xx-compiler-fix-CMP.patch +Patch27: 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch +Patch28: 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch +Patch29: 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch +Patch30: 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch +Patch31: 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch +Patch32: 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch +Patch33: 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch +Patch34: 0014-freedreno-a3xx-don-t-leak-so-much.patch +Patch35: 0015-freedreno-a3xx-compiler-better-const-handling.patch +Patch36: 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch +Patch37: 0017-freedreno-updates-for-msm-drm-kms-driver.patch + BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} BuildRequires: kernel-headers @@ -302,6 +321,24 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1 %patch16 -p1 -b .vdpau %patch20 -p1 -b .egbe +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 +%patch29 -p1 +%patch30 -p1 +%patch31 -p1 +%patch32 -p1 +%patch33 -p1 +%patch34 -p1 +%patch35 -p1 +%patch36 -p1 +%patch37 -p1 + %if 0%{with_private_llvm} sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac @@ -600,6 +637,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Thu Nov 14 2013 Igor Gnatenko - 9.2.3-1.20131114 +- 9.2.3 upstream release + * Wed Nov 13 2013 Igor Gnatenko - 9.2.2-1.20131113 - 9.2.2 upstream release + fixes from git 9.2 branch diff --git a/sources b/sources index e08cadd..caf10db 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -eafb41bd8c3160972e192b1d502ad8b6 mesa-20131113.tar.xz +54f46fc070303e0d467779ab39103d58 mesa-20131114.tar.xz From bbef9c7b4e21e1f721e0274fce603cb72a5ef304 Mon Sep 17 00:00:00 2001 From: Igor Gnatenko Date: Thu, 28 Nov 2013 14:21:24 +0400 Subject: [PATCH 2/2] 9.2.4 upstream release Signed-off-by: Igor Gnatenko --- .gitignore | 1 + ...ix-color-inversion-on-mem-gmem-resto.patch | 31 - ...t-create-a-new-context-for-msaa-copy.patch | 313 ----- ...3xx-fix-viewport-on-gmem-mem-resolve.patch | 32 - ...bug-option-to-disable-scissor-optimi.patch | 113 -- 0004-freedreno-update-register-headers.patch | 1238 ----------------- 0005-freedreno-a3xx-some-texture-fixes.patch | 65 - 0006-freedreno-a3xx-compiler-fix-CMP.patch | 45 - ...a3xx-compiler-handle-saturate-on-dst.patch | 98 -- ...ompiler-use-max_reg-rather-than-file.patch | 59 - ...ompiler-cat4-cannot-use-const-reg-as.patch | 104 -- ...-segfault-when-no-color-buffer-bound.patch | 216 --- ...ompiler-make-compiler-errors-more-us.patch | 172 --- ...x-compiler-bit-of-re-arrange-cleanup.patch | 420 ------ ...edreno-a3xx-compiler-fix-SGT-SLT-etc.patch | 231 --- 0014-freedreno-a3xx-don-t-leak-so-much.patch | 36 - ...-a3xx-compiler-better-const-handling.patch | 376 ----- ...xx-compiler-handle-sync-flags-better.patch | 128 -- ...dreno-updates-for-msm-drm-kms-driver.patch | 328 ----- mesa.spec | 44 +- sources | 2 +- 21 files changed, 7 insertions(+), 4045 deletions(-) delete mode 100644 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch delete mode 100644 0001-st-dri-do-not-create-a-new-context-for-msaa-copy.patch delete mode 100644 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch delete mode 100644 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch delete mode 100644 0004-freedreno-update-register-headers.patch delete mode 100644 0005-freedreno-a3xx-some-texture-fixes.patch delete mode 100644 0006-freedreno-a3xx-compiler-fix-CMP.patch delete mode 100644 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch delete mode 100644 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch delete mode 100644 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch delete mode 100644 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch delete mode 100644 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch delete mode 100644 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch delete mode 100644 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch delete mode 100644 0014-freedreno-a3xx-don-t-leak-so-much.patch delete mode 100644 0015-freedreno-a3xx-compiler-better-const-handling.patch delete mode 100644 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch delete mode 100644 0017-freedreno-updates-for-msm-drm-kms-driver.patch diff --git a/.gitignore b/.gitignore index 089b161..378e862 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ mesa-20100720.tar.bz2 /mesa-20130919.tar.xz /mesa-20131113.tar.xz /mesa-20131114.tar.xz +/mesa-20131128.tar.xz diff --git a/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch b/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch deleted file mode 100644 index 23b9bf6..0000000 --- a/0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 2d844be97fd5b6b0f02a94d4bb194c0bd19de6f9 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 13 Jul 2013 13:07:46 -0400 -Subject: [PATCH 01/17] freedreno/a3xx: fix color inversion on mem->gmem - restore - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -index a7a4bf7..b8436c9 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -@@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) | - 0x40000000 | // XXX -- fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN, -- PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA)); -- OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) | -+ fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, -+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); -+ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | - A3XX_TEX_CONST_1_WIDTH(psurf->width) | - A3XX_TEX_CONST_1_HEIGHT(psurf->height)); - OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) | --- -1.8.4.2 - diff --git a/0001-st-dri-do-not-create-a-new-context-for-msaa-copy.patch b/0001-st-dri-do-not-create-a-new-context-for-msaa-copy.patch deleted file mode 100644 index 8a722db..0000000 --- a/0001-st-dri-do-not-create-a-new-context-for-msaa-copy.patch +++ /dev/null @@ -1,313 +0,0 @@ -From b217d48364f368f541e53006af5dd56f664be24d Mon Sep 17 00:00:00 2001 -From: Maarten Lankhorst -Date: Mon, 9 Sep 2013 13:02:08 +0200 -Subject: [PATCH] st/dri: do not create a new context for msaa copy -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Commit b77316ad7594f - st/dri: always copy new DRI front and back buffers to corresponding MSAA buffers - -introduced creating a pipe_context for every call to validate, which is not required -because the callers have a context anyway. - -Only exception is egl_g3d_create_pbuffer_from_client_buffer, can someone test if it -still works with NULL passed as context for validate? From examining the code I -believe it does, but I didn't thoroughly test it. - -Signed-off-by: Maarten Lankhorst -Cc: 9.2 -Reviewed-by: Marek Olšák ---- - src/gallium/include/state_tracker/st_api.h | 3 ++- - .../state_trackers/dri/common/dri_drawable.c | 13 +++++++----- - .../state_trackers/dri/common/dri_drawable.h | 3 ++- - src/gallium/state_trackers/dri/drm/dri2.c | 23 ++++++---------------- - src/gallium/state_trackers/dri/sw/drisw.c | 3 ++- - .../state_trackers/egl/common/egl_g3d_api.c | 2 +- - src/gallium/state_trackers/egl/common/egl_g3d_st.c | 6 ++++-- - src/gallium/state_trackers/glx/xlib/xm_st.c | 3 ++- - src/gallium/state_trackers/osmesa/osmesa.c | 3 ++- - src/gallium/state_trackers/vega/vg_manager.c | 3 ++- - src/gallium/state_trackers/wgl/stw_st.c | 3 ++- - src/mesa/state_tracker/st_manager.c | 2 +- - 12 files changed, 34 insertions(+), 33 deletions(-) - -diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h -index 3ecd12e..9dcb76f 100644 ---- a/src/gallium/include/state_tracker/st_api.h -+++ b/src/gallium/include/state_tracker/st_api.h -@@ -342,7 +342,8 @@ struct st_framebuffer_iface - * the last call might be destroyed. This behavior might change in the - * future. - */ -- boolean (*validate)(struct st_framebuffer_iface *stfbi, -+ boolean (*validate)(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out); -diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c b/src/gallium/state_trackers/dri/common/dri_drawable.c -index 18d8d89..ddf9400 100644 ---- a/src/gallium/state_trackers/dri/common/dri_drawable.c -+++ b/src/gallium/state_trackers/dri/common/dri_drawable.c -@@ -42,11 +42,13 @@ static void - swap_fences_unref(struct dri_drawable *draw); - - static boolean --dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, -+dri_st_framebuffer_validate(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) - { -+ struct dri_context *ctx = (struct dri_context *)stctx->st_manager_private; - struct dri_drawable *drawable = - (struct dri_drawable *) stfbi->st_manager_private; - struct dri_screen *screen = dri_screen(drawable->sPriv); -@@ -78,7 +80,7 @@ dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, - if (new_stamp && drawable->update_drawable_info) - drawable->update_drawable_info(drawable); - -- drawable->allocate_textures(drawable, statts, count); -+ drawable->allocate_textures(ctx, drawable, statts, count); - - /* add existing textures */ - for (i = 0; i < ST_ATTACHMENT_COUNT; i++) { -@@ -183,7 +185,8 @@ dri_destroy_buffer(__DRIdrawable * dPriv) - * exist. Used by the TFP extension. - */ - static void --dri_drawable_validate_att(struct dri_drawable *drawable, -+dri_drawable_validate_att(struct dri_context *ctx, -+ struct dri_drawable *drawable, - enum st_attachment_type statt) - { - enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; -@@ -203,7 +206,7 @@ dri_drawable_validate_att(struct dri_drawable *drawable, - - drawable->texture_stamp = drawable->dPriv->lastStamp - 1; - -- drawable->base.validate(&drawable->base, statts, count, NULL); -+ drawable->base.validate(ctx->st, &drawable->base, statts, count, NULL); - } - - /** -@@ -217,7 +220,7 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, - struct dri_drawable *drawable = dri_drawable(dPriv); - struct pipe_resource *pt; - -- dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT); -+ dri_drawable_validate_att(ctx, drawable, ST_ATTACHMENT_FRONT_LEFT); - - /* Use the pipe resource associated with the X drawable */ - pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT]; -diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h b/src/gallium/state_trackers/dri/common/dri_drawable.h -index 50e5cc4..c514218 100644 ---- a/src/gallium/state_trackers/dri/common/dri_drawable.h -+++ b/src/gallium/state_trackers/dri/common/dri_drawable.h -@@ -71,7 +71,8 @@ struct dri_drawable - struct pipe_surface *drisw_surface; - - /* hooks filled in by dri2 & drisw */ -- void (*allocate_textures)(struct dri_drawable *drawable, -+ void (*allocate_textures)(struct dri_context *ctx, -+ struct dri_drawable *drawable, - const enum st_attachment_type *statts, - unsigned count); - -diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c -index e4477d6..fea1c8d 100644 ---- a/src/gallium/state_trackers/dri/drm/dri2.c -+++ b/src/gallium/state_trackers/dri/drm/dri2.c -@@ -169,7 +169,8 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, - * Process __DRIbuffer and convert them into pipe_resources. - */ - static void --dri2_drawable_process_buffers(struct dri_drawable *drawable, -+dri2_drawable_process_buffers(struct dri_context *ctx, -+ struct dri_drawable *drawable, - __DRIbuffer *buffers, unsigned buffer_count, - const enum st_attachment_type *atts, - unsigned att_count) -@@ -180,8 +181,6 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, - struct winsys_handle whandle; - boolean alloc_depthstencil = FALSE; - unsigned i, j, bind; -- struct pipe_screen *pscreen = screen->base.screen; -- struct pipe_context *pipe = NULL; - - if (drawable->old_num == buffer_count && - drawable->old_w == dri_drawable->w && -@@ -308,14 +307,8 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, - * The single-sample resources are not exposed - * to the state tracker. - * -- * We don't have a context here, so create one temporarily. -- * We may need to create a persistent context if creation and -- * destruction of the context becomes a bottleneck. - */ -- if (!pipe) -- pipe = pscreen->context_create(pscreen, NULL); -- -- dri_pipe_blit(pipe, -+ dri_pipe_blit(ctx->st->pipe, - drawable->msaa_textures[att], - drawable->textures[att]); - } -@@ -371,11 +364,6 @@ dri2_drawable_process_buffers(struct dri_drawable *drawable, - drawable->old_w = dri_drawable->w; - drawable->old_h = dri_drawable->h; - memcpy(drawable->old, buffers, sizeof(__DRIbuffer) * buffer_count); -- -- if (pipe) { -- pipe->flush(pipe, NULL, 0); -- pipe->destroy(pipe); -- } - } - - static __DRIbuffer * -@@ -470,7 +458,8 @@ dri2_release_buffer(__DRIscreen *sPriv, __DRIbuffer *bPriv) - */ - - static void --dri2_allocate_textures(struct dri_drawable *drawable, -+dri2_allocate_textures(struct dri_context *ctx, -+ struct dri_drawable *drawable, - const enum st_attachment_type *statts, - unsigned statts_count) - { -@@ -479,7 +468,7 @@ dri2_allocate_textures(struct dri_drawable *drawable, - - buffers = dri2_drawable_get_buffers(drawable, statts, &num_buffers); - if (buffers) -- dri2_drawable_process_buffers(drawable, buffers, num_buffers, -+ dri2_drawable_process_buffers(ctx, drawable, buffers, num_buffers, - statts, statts_count); - } - -diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c -index 41f66d5..121a205 100644 ---- a/src/gallium/state_trackers/dri/sw/drisw.c -+++ b/src/gallium/state_trackers/dri/sw/drisw.c -@@ -182,7 +182,8 @@ drisw_flush_frontbuffer(struct dri_context *ctx, - * framebuffer is resized or destroyed. - */ - static void --drisw_allocate_textures(struct dri_drawable *drawable, -+drisw_allocate_textures(struct dri_context *stctx, -+ struct dri_drawable *drawable, - const enum st_attachment_type *statts, - unsigned count) - { -diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_api.c b/src/gallium/state_trackers/egl/common/egl_g3d_api.c -index 3ee0d67..46a3245 100644 ---- a/src/gallium/state_trackers/egl/common/egl_g3d_api.c -+++ b/src/gallium/state_trackers/egl/common/egl_g3d_api.c -@@ -443,7 +443,7 @@ egl_g3d_create_pbuffer_from_client_buffer(_EGLDriver *drv, _EGLDisplay *dpy, - gsurf->client_buffer = buffer; - - /* validate now so that it fails if the client buffer is invalid */ -- if (!gsurf->stfbi->validate(gsurf->stfbi, -+ if (!gsurf->stfbi->validate(NULL, gsurf->stfbi, - &gsurf->stvis.render_buffer, 1, &ptex)) { - egl_g3d_destroy_st_framebuffer(gsurf->stfbi); - FREE(gsurf); -diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c -index f2ee11c..cf1846c 100644 ---- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c -+++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c -@@ -149,7 +149,8 @@ pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf) - } - - static boolean --egl_g3d_st_framebuffer_validate_pbuffer(struct st_framebuffer_iface *stfbi, -+egl_g3d_st_framebuffer_validate_pbuffer(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -@@ -202,7 +203,8 @@ egl_g3d_st_framebuffer_flush_front(struct st_context_iface *stctx, - } - - static boolean --egl_g3d_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, -+egl_g3d_st_framebuffer_validate(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c -index 30e69ea..fb69998 100644 ---- a/src/gallium/state_trackers/glx/xlib/xm_st.c -+++ b/src/gallium/state_trackers/glx/xlib/xm_st.c -@@ -194,7 +194,8 @@ xmesa_st_framebuffer_validate_textures(struct st_framebuffer_iface *stfbi, - * \param out returns resources for each of the attachments - */ - static boolean --xmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, -+xmesa_st_framebuffer_validate(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -diff --git a/src/gallium/state_trackers/osmesa/osmesa.c b/src/gallium/state_trackers/osmesa/osmesa.c -index bb85e5c..3546183 100644 ---- a/src/gallium/state_trackers/osmesa/osmesa.c -+++ b/src/gallium/state_trackers/osmesa/osmesa.c -@@ -342,7 +342,8 @@ osmesa_st_framebuffer_flush_front(struct st_context_iface *stctx, - * its resources). - */ - static boolean --osmesa_st_framebuffer_validate(struct st_framebuffer_iface *stfbi, -+osmesa_st_framebuffer_validate(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfbi, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -diff --git a/src/gallium/state_trackers/vega/vg_manager.c b/src/gallium/state_trackers/vega/vg_manager.c -index c079d90..2c43d76 100644 ---- a/src/gallium/state_trackers/vega/vg_manager.c -+++ b/src/gallium/state_trackers/vega/vg_manager.c -@@ -113,7 +113,8 @@ vg_manager_validate_framebuffer(struct vg_context *ctx) - if (stfb->iface_stamp != new_stamp) { - do { - /* validate the fb */ -- if (!stfb->iface->validate(stfb->iface, &stfb->strb_att, -+ if (!stfb->iface->validate((struct st_context_iface *)ctx, -+ stfb->iface, &stfb->strb_att, - 1, &pt) || !pt) - return; - -diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c -index 9427398..e95c37f 100644 ---- a/src/gallium/state_trackers/wgl/stw_st.c -+++ b/src/gallium/state_trackers/wgl/stw_st.c -@@ -121,7 +121,8 @@ stw_st_framebuffer_validate_locked(struct st_framebuffer_iface *stfb, - } - - static boolean --stw_st_framebuffer_validate(struct st_framebuffer_iface *stfb, -+stw_st_framebuffer_validate(struct st_context_iface *stctx, -+ struct st_framebuffer_iface *stfb, - const enum st_attachment_type *statts, - unsigned count, - struct pipe_resource **out) -diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c -index 098e6c0..b1fd91a 100644 ---- a/src/mesa/state_tracker/st_manager.c -+++ b/src/mesa/state_tracker/st_manager.c -@@ -189,7 +189,7 @@ st_framebuffer_validate(struct st_framebuffer *stfb, - - /* validate the fb */ - do { -- if (!stfb->iface->validate(stfb->iface, stfb->statts, -+ if (!stfb->iface->validate(&st->iface, stfb->iface, stfb->statts, - stfb->num_statts, textures)) - return; - --- -1.8.3.1 - diff --git a/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch b/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch deleted file mode 100644 index eede425..0000000 --- a/0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch +++ /dev/null @@ -1,32 +0,0 @@ -From b2a32254d65c356604bbffda6e771dca0509e9ed Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 13 Jul 2013 13:08:22 -0400 -Subject: [PATCH 02/17] freedreno/a3xx: fix viewport on gmem->mem resolve - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -index 1cb170a..9050166 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -@@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); - OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ - -+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5)); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0)); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); -+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); -+ - OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); - OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); --- -1.8.4.2 - diff --git a/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch b/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch deleted file mode 100644 index e0bb580..0000000 --- a/0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 8b167d34bebcc9aaf67838be71cc3272728d4fe1 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Wed, 29 May 2013 10:16:33 -0400 -Subject: [PATCH 03/17] freedreno: add debug option to disable scissor - optimization - -Useful for testing and debugging. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/freedreno_gmem.c | 26 +++++++++++++++--------- - src/gallium/drivers/freedreno/freedreno_screen.c | 1 + - src/gallium/drivers/freedreno/freedreno_util.h | 9 ++++---- - 3 files changed, 22 insertions(+), 14 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c -index 12633bd..197d1d9 100644 ---- a/src/gallium/drivers/freedreno/freedreno_gmem.c -+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c -@@ -71,7 +71,8 @@ calculate_tiles(struct fd_context *ctx) - { - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_scissor_state *scissor = &ctx->max_scissor; -- uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format); -+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer; -+ uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format); - uint32_t gmem_size = ctx->screen->gmemsize_bytes; - uint32_t minx, miny, width, height; - uint32_t nbins_x = 1, nbins_y = 1; -@@ -84,10 +85,17 @@ calculate_tiles(struct fd_context *ctx) - return; - } - -- minx = scissor->minx & ~31; /* round down to multiple of 32 */ -- miny = scissor->miny & ~31; -- width = scissor->maxx - minx; -- height = scissor->maxy - miny; -+ if (fd_mesa_debug & FD_DBG_DSCIS) { -+ minx = 0; -+ miny = 0; -+ width = pfb->width; -+ height = pfb->height; -+ } else { -+ minx = scissor->minx & ~31; /* round down to multiple of 32 */ -+ miny = scissor->miny & ~31; -+ width = scissor->maxx - minx; -+ height = scissor->maxy - miny; -+ } - - // TODO we probably could optimize this a bit if we know that - // Z or stencil is not enabled for any of the draw calls.. -@@ -132,9 +140,7 @@ static void - render_tiles(struct fd_context *ctx) - { - struct fd_gmem_stateobj *gmem = &ctx->gmem; -- uint32_t i, yoff = 0; -- -- yoff= gmem->miny; -+ uint32_t i, yoff = gmem->miny; - - ctx->emit_tile_init(ctx); - -@@ -143,13 +149,13 @@ render_tiles(struct fd_context *ctx) - uint32_t bh = gmem->bin_h; - - /* clip bin height: */ -- bh = MIN2(bh, gmem->height - yoff); -+ bh = MIN2(bh, gmem->miny + gmem->height - yoff); - - for (j = 0; j < gmem->nbins_x; j++) { - uint32_t bw = gmem->bin_w; - - /* clip bin width: */ -- bw = MIN2(bw, gmem->width - xoff); -+ bw = MIN2(bw, gmem->minx + gmem->width - xoff); - - DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", - bh, yoff, bw, xoff); -diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c -index 52d51c2..36ef8b0 100644 ---- a/src/gallium/drivers/freedreno/freedreno_screen.c -+++ b/src/gallium/drivers/freedreno/freedreno_screen.c -@@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = { - {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"}, - {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, - {"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"}, -+ {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"}, - DEBUG_NAMED_VALUE_END - }; - -diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h -index f18f0fe..b49cdfc 100644 ---- a/src/gallium/drivers/freedreno/freedreno_util.h -+++ b/src/gallium/drivers/freedreno/freedreno_util.h -@@ -47,10 +47,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); - enum adreno_stencil_op fd_stencil_op(unsigned op); - - --#define FD_DBG_MSGS 0x1 --#define FD_DBG_DISASM 0x2 --#define FD_DBG_DCLEAR 0x4 --#define FD_DBG_DGMEM 0x8 -+#define FD_DBG_MSGS 0x01 -+#define FD_DBG_DISASM 0x02 -+#define FD_DBG_DCLEAR 0x04 -+#define FD_DBG_DGMEM 0x08 -+#define FD_DBG_DSCIS 0x10 - extern int fd_mesa_debug; - - #define DBG(fmt, ...) \ --- -1.8.4.2 - diff --git a/0004-freedreno-update-register-headers.patch b/0004-freedreno-update-register-headers.patch deleted file mode 100644 index 4654619..0000000 --- a/0004-freedreno-update-register-headers.patch +++ /dev/null @@ -1,1238 +0,0 @@ -From e1e9f69d3c90803d3c0e2d9b9396c1a06b5f0bb2 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Mon, 5 Aug 2013 17:57:24 -0400 -Subject: [PATCH 04/17] freedreno: update register headers - -resync w/ rnndb database - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 93 ++--- - src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 439 +++++++++++++++++++--- - src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 4 +- - src/gallium/drivers/freedreno/a3xx/fd3_program.c | 2 +- - src/gallium/drivers/freedreno/a3xx/fd3_util.c | 5 +- - src/gallium/drivers/freedreno/adreno_common.xml.h | 319 +++++++++++++++- - src/gallium/drivers/freedreno/adreno_pm4.xml.h | 6 +- - src/gallium/drivers/freedreno/freedreno_util.h | 1 + - 8 files changed, 758 insertions(+), 111 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h -index bee01f1..3546386 100644 ---- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h -+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h -@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng - git clone git://0x04.net/rules-ng-ng - - The rules-ng-ng source files this header was generated from are: --- /home/robclark/src/freedreno/envytools/rnndb/a2xx.xml ( 30127 bytes, from 2013-05-05 18:29:35) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) --- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) -+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) -+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) - - Copyright (C) 2013 by the following authors: - - Rob Clark (robclark) -@@ -236,56 +238,6 @@ enum sq_tex_filter { - - #define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1 - --#define REG_A2XX_CP_RB_BASE 0x000001c0 -- --#define REG_A2XX_CP_RB_CNTL 0x000001c1 -- --#define REG_A2XX_CP_RB_RPTR_ADDR 0x000001c3 -- --#define REG_A2XX_CP_RB_RPTR 0x000001c4 -- --#define REG_A2XX_CP_RB_WPTR 0x000001c5 -- --#define REG_A2XX_CP_RB_WPTR_DELAY 0x000001c6 -- --#define REG_A2XX_CP_RB_RPTR_WR 0x000001c7 -- --#define REG_A2XX_CP_RB_WPTR_BASE 0x000001c8 -- --#define REG_A2XX_CP_QUEUE_THRESHOLDS 0x000001d5 -- --#define REG_A2XX_SCRATCH_UMSK 0x000001dc -- --#define REG_A2XX_SCRATCH_ADDR 0x000001dd -- --#define REG_A2XX_CP_STATE_DEBUG_INDEX 0x000001ec -- --#define REG_A2XX_CP_STATE_DEBUG_DATA 0x000001ed -- --#define REG_A2XX_CP_INT_CNTL 0x000001f2 -- --#define REG_A2XX_CP_INT_STATUS 0x000001f3 -- --#define REG_A2XX_CP_INT_ACK 0x000001f4 -- --#define REG_A2XX_CP_ME_CNTL 0x000001f6 -- --#define REG_A2XX_CP_ME_STATUS 0x000001f7 -- --#define REG_A2XX_CP_ME_RAM_WADDR 0x000001f8 -- --#define REG_A2XX_CP_ME_RAM_RADDR 0x000001f9 -- --#define REG_A2XX_CP_ME_RAM_DATA 0x000001fa -- --#define REG_A2XX_CP_DEBUG 0x000001fc -- --#define REG_A2XX_CP_CSQ_RB_STAT 0x000001fd -- --#define REG_A2XX_CP_CSQ_IB1_STAT 0x000001fe -- --#define REG_A2XX_CP_CSQ_IB2_STAT 0x000001ff -- - #define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 - - #define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 -@@ -338,11 +290,32 @@ enum sq_tex_filter { - - #define REG_A2XX_CP_STAT 0x0000047f - --#define REG_A2XX_SCRATCH_REG0 0x00000578 -- --#define REG_A2XX_SCRATCH_REG2 0x0000057a -- - #define REG_A2XX_RBBM_STATUS 0x000005d0 -+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f -+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0 -+static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val) -+{ -+ return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK; -+} -+#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020 -+#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100 -+#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200 -+#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400 -+#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800 -+#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000 -+#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000 -+#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000 -+#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000 -+#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000 -+#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000 -+#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000 -+#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000 -+#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000 -+#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000 -+#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000 -+#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000 -+#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000 -+#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000 - - #define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01 - #define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f -@@ -358,13 +331,13 @@ static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) - return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; - } - --#define REG_A2XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) -+static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - --#define REG_A2XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) -+static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - --#define REG_A2XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) -+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } - --#define REG_A2XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) -+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } - - #define REG_A2XX_PC_DEBUG_CNTL 0x00000c38 - -diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h -index c7f5085..d183516 100644 ---- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h -+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h -@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng - git clone git://0x04.net/rules-ng-ng - - The rules-ng-ng source files this header was generated from are: --- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) --- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) -+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) -+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) - - Copyright (C) 2013 by the following authors: - - Rob Clark (robclark) -@@ -130,6 +132,13 @@ enum a3xx_tex_fmt { - TFMT_NORM_USHORT_5551 = 6, - TFMT_NORM_USHORT_4444 = 7, - TFMT_NORM_UINT_X8Z24 = 10, -+ TFMT_NORM_UINT_NV12_UV_TILED = 17, -+ TFMT_NORM_UINT_NV12_Y_TILED = 19, -+ TFMT_NORM_UINT_NV12_UV = 21, -+ TFMT_NORM_UINT_NV12_Y = 23, -+ TFMT_NORM_UINT_I420_Y = 24, -+ TFMT_NORM_UINT_I420_U = 26, -+ TFMT_NORM_UINT_I420_V = 27, - TFMT_NORM_UINT_2_10_10_10 = 41, - TFMT_NORM_UINT_A8 = 44, - TFMT_NORM_UINT_L8_A8 = 47, -@@ -207,6 +216,37 @@ enum a3xx_tex_swiz { - A3XX_TEX_ONE = 5, - }; - -+enum a3xx_tex_type { -+ A3XX_TEX_1D = 0, -+ A3XX_TEX_2D = 1, -+ A3XX_TEX_CUBE = 2, -+ A3XX_TEX_3D = 3, -+}; -+ -+#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 -+#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 -+#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 -+#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 -+#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 -+#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 -+#define A3XX_INT0_VFD_ERROR 0x00000040 -+#define A3XX_INT0_CP_SW_INT 0x00000080 -+#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 -+#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200 -+#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 -+#define A3XX_INT0_CP_HW_FAULT 0x00000800 -+#define A3XX_INT0_CP_DMA 0x00001000 -+#define A3XX_INT0_CP_IB2_INT 0x00002000 -+#define A3XX_INT0_CP_IB1_INT 0x00004000 -+#define A3XX_INT0_CP_RB_INT 0x00008000 -+#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 -+#define A3XX_INT0_CP_RB_DONE_TS 0x00020000 -+#define A3XX_INT0_CP_VS_DONE_TS 0x00040000 -+#define A3XX_INT0_CP_PS_DONE_TS 0x00080000 -+#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000 -+#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000 -+#define A3XX_INT0_MISC_HANG_DETECT 0x01000000 -+#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000 - #define REG_A3XX_RBBM_HW_VERSION 0x00000000 - - #define REG_A3XX_RBBM_HW_RELEASE 0x00000001 -@@ -230,6 +270,27 @@ enum a3xx_tex_swiz { - #define REG_A3XX_RBBM_GPR0_CTL 0x0000002e - - #define REG_A3XX_RBBM_STATUS 0x00000030 -+#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001 -+#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 -+#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 -+#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 -+#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000 -+#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000 -+#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000 -+#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000 -+#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 -+#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 -+#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000 -+#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000 -+#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000 -+#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000 -+#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000 -+#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000 -+#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000 -+#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000 -+#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 -+#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 -+#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000 - - #define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 - -@@ -251,20 +312,202 @@ enum a3xx_tex_swiz { - - #define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 - -+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 -+ -+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082 -+ -+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084 -+ -+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085 -+ -+#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086 -+ -+#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087 -+ - #define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 - -+#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090 -+ -+#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091 -+ -+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092 -+ -+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093 -+ -+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094 -+ -+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095 -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096 -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097 -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098 -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099 -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c -+ -+#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d -+ -+#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e -+ -+#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f -+ -+#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0 -+ -+#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9 -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac -+ -+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad -+ -+#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae -+ -+#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af -+ -+#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0 -+ -+#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1 -+ -+#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2 -+ -+#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3 -+ -+#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4 -+ -+#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5 -+ -+#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6 -+ -+#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7 -+ -+#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8 -+ -+#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4 -+ -+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0 -+ -+#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9 -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de -+ -+#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df -+ - #define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 - - #define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 - -+#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2 -+ -+#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3 -+ -+#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4 -+ -+#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5 -+ -+#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea -+ -+#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb -+ - #define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec - - #define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed - - #define REG_A3XX_RBBM_RBBM_CTL 0x00000100 - --#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 -- - #define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 - - #define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 -@@ -287,22 +530,20 @@ enum a3xx_tex_swiz { - - #define REG_A3XX_CP_MEQ_DATA 0x000001db - -+#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445 -+ - #define REG_A3XX_CP_HW_FAULT 0x0000045c - - #define REG_A3XX_CP_PROTECT_CTRL 0x0000045e - - #define REG_A3XX_CP_PROTECT_STATUS 0x0000045f - --#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; } - --#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; } - - #define REG_A3XX_CP_AHB_FAULT 0x0000054d - --#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a -- --#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b -- - #define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 - #define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 - #define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 -@@ -528,9 +769,9 @@ static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) - - #define REG_A3XX_UNKNOWN_20C3 0x000020c3 - --#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; } - --#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; } - #define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 - #define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 - #define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 -@@ -553,7 +794,7 @@ static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) - return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; - } - --#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; } - #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f - #define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 - static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) -@@ -579,7 +820,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) - return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; - } - --#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; } - #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 - #define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 - static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) -@@ -587,7 +828,7 @@ static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) - return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; - } - --#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; } - #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f - #define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 - static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -@@ -627,12 +868,60 @@ static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r - #define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 - - #define REG_A3XX_RB_BLEND_RED 0x000020e4 -+#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff -+#define A3XX_RB_BLEND_RED_UINT__SHIFT 0 -+static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val) -+{ -+ return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK; -+} -+#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 -+#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16 -+static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val) -+{ -+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK; -+} - - #define REG_A3XX_RB_BLEND_GREEN 0x000020e5 -+#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff -+#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0 -+static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val) -+{ -+ return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK; -+} -+#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 -+#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 -+static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val) -+{ -+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK; -+} - - #define REG_A3XX_RB_BLEND_BLUE 0x000020e6 -+#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff -+#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0 -+static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val) -+{ -+ return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK; -+} -+#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 -+#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 -+static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val) -+{ -+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK; -+} - - #define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 -+#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff -+#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0 -+static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val) -+{ -+ return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK; -+} -+#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 -+#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 -+static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val) -+{ -+ return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK; -+} - - #define REG_A3XX_UNKNOWN_20E8 0x000020e8 - -@@ -1063,9 +1352,9 @@ static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) - - #define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 - --#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0)) -+static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; } - --#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0)) -+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; } - #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f - #define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 - static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) -@@ -1092,11 +1381,11 @@ static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) - return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; - } - --#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0)) -+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; } - --#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; } - --#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; } - #define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f - #define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 - static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) -@@ -1173,13 +1462,13 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) - return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; - } - --#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; } - --#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; } - --#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } - --#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } - - #define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a - -@@ -1293,9 +1582,9 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) - return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; - } - --#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } - --#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } - #define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff - #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 - static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -@@ -1321,9 +1610,9 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) - return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; - } - --#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } - --#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } - #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff - #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 - static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -@@ -1480,9 +1769,9 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) - - #define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec - --#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; } - --#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; } - #define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff - #define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 - static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) -@@ -1491,9 +1780,9 @@ static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) - } - #define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 - --#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; } - --#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0)) -+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; } - #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f - #define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 - static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) -@@ -1607,9 +1896,9 @@ static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) - - #define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 - --#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) -+static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - --#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) -+static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - #define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff - #define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 - static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) -@@ -1635,26 +1924,46 @@ static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) - return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; - } - --#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) -+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } - --#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) -+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } - - #define REG_A3XX_UNKNOWN_0C3D 0x00000c3d - -+#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48 -+ -+#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49 -+ -+#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a -+ -+#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b -+ - #define REG_A3XX_UNKNOWN_0C81 0x00000c81 - --#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0)) -+#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88 -+ -+#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89 - --#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0)) -+#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a - --#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0)) -+#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b - --#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } - --#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0)) -+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } -+ -+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; } -+ -+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; } -+ -+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; } - - #define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 - -+#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6 -+ -+#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7 -+ - #define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0 - #define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff - #define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0 -@@ -1669,18 +1978,46 @@ static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val) - return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK; - } - --#define REG_A3XX_UNKNOWN_0E00 0x00000e00 -+#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00 -+ -+#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01 -+ -+#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02 -+ -+#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03 -+ -+#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04 -+ -+#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05 - - #define REG_A3XX_UNKNOWN_0E43 0x00000e43 - - #define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 - -+#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45 -+ - #define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 - - #define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 - -+#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64 -+ -+#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65 -+ - #define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 - -+#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84 -+ -+#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85 -+ -+#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86 -+ -+#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87 -+ -+#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88 -+ -+#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89 -+ - #define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 - #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff - #define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 -@@ -1724,6 +2061,18 @@ static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_op - - #define REG_A3XX_UNKNOWN_0F03 0x00000f03 - -+#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04 -+ -+#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05 -+ -+#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06 -+ -+#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07 -+ -+#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08 -+ -+#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09 -+ - #define REG_A3XX_TEX_SAMP_0 0x00000000 - #define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c - #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 -@@ -1791,6 +2140,12 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) - { - return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; - } -+#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 -+#define A3XX_TEX_CONST_0_TYPE__SHIFT 30 -+static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) -+{ -+ return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK; -+} - - #define REG_A3XX_TEX_CONST_1 0x00000001 - #define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -index b8436c9..5ffd561 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -@@ -536,8 +536,8 @@ fd3_emit_restore(struct fd_context *ctx) - OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); - OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ - -- OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1); -- OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */ -+ OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1); -+ OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */ - - OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); - OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c -index b5a027e..259c2dd 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c -@@ -249,7 +249,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, - */ - for (i = 0; i < 6; i++) { - OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1); -- OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ -+ OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER0_SELECT */ - - OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1); - OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c -index a08bc23..6537fb7 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_util.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c -@@ -306,10 +306,11 @@ fd3_pipe2swap(enum pipe_format format) - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - return WXYZ; -+ -+ case PIPE_FORMAT_R8G8B8A8_UNORM: -+ case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: -- return WZYX; -- - default: - return WZYX; - } -diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h -index b119812..61979d4 100644 ---- a/src/gallium/drivers/freedreno/adreno_common.xml.h -+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h -@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng - git clone git://0x04.net/rules-ng-ng - - The rules-ng-ng source files this header was generated from are: --- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) --- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) -+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) -+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) - - Copyright (C) 2013 by the following authors: - - Rob Clark (robclark) -@@ -113,5 +115,318 @@ enum adreno_rb_depth_format { - DEPTHX_24_8 = 1, - }; - -+enum adreno_mmu_clnt_beh { -+ BEH_NEVR = 0, -+ BEH_TRAN_RNG = 1, -+ BEH_TRAN_FLT = 2, -+}; -+ -+#define REG_AXXX_MH_MMU_CONFIG 0x00000040 -+#define AXXX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001 -+#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002 -+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030 -+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4 -+static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0 -+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300 -+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00 -+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000 -+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000 -+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000 -+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16 -+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000 -+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18 -+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000 -+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20 -+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000 -+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22 -+static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK; -+} -+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000 -+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24 -+static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -+{ -+ return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK; -+} -+ -+#define REG_AXXX_MH_MMU_VA_RANGE 0x00000041 -+ -+#define REG_AXXX_MH_MMU_PT_BASE 0x00000042 -+ -+#define REG_AXXX_MH_MMU_PAGE_FAULT 0x00000043 -+ -+#define REG_AXXX_MH_MMU_TRAN_ERROR 0x00000044 -+ -+#define REG_AXXX_MH_MMU_INVALIDATE 0x00000045 -+ -+#define REG_AXXX_MH_MMU_MPU_BASE 0x00000046 -+ -+#define REG_AXXX_MH_MMU_MPU_END 0x00000047 -+ -+#define REG_AXXX_CP_RB_BASE 0x000001c0 -+ -+#define REG_AXXX_CP_RB_CNTL 0x000001c1 -+#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f -+#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0 -+static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val) -+{ -+ return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK; -+} -+#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00 -+#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8 -+static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val) -+{ -+ return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK; -+} -+#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000 -+#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16 -+static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val) -+{ -+ return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK; -+} -+#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000 -+#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000 -+#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000 -+ -+#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3 -+#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003 -+#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0 -+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) -+{ -+ return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK; -+} -+#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc -+#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 -+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) -+{ -+ return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; -+} -+ -+#define REG_AXXX_CP_RB_RPTR 0x000001c4 -+ -+#define REG_AXXX_CP_RB_WPTR 0x000001c5 -+ -+#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6 -+ -+#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7 -+ -+#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8 -+ -+#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5 -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0 -+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val) -+{ -+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK; -+} -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00 -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8 -+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val) -+{ -+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK; -+} -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000 -+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16 -+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val) -+{ -+ return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK; -+} -+ -+#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6 -+ -+#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7 -+#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f -+#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0 -+static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK; -+} -+#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00 -+#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8 -+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK; -+} -+#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000 -+#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16 -+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK; -+} -+ -+#define REG_AXXX_CP_STQ_AVAIL 0x000001d8 -+#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f -+#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0 -+static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val) -+{ -+ return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK; -+} -+ -+#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9 -+#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f -+#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0 -+static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val) -+{ -+ return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK; -+} -+ -+#define REG_AXXX_SCRATCH_UMSK 0x000001dc -+#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff -+#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0 -+static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val) -+{ -+ return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK; -+} -+#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000 -+#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16 -+static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val) -+{ -+ return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK; -+} -+ -+#define REG_AXXX_SCRATCH_ADDR 0x000001dd -+ -+#define REG_AXXX_CP_ME_RDADDR 0x000001ea -+ -+#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec -+ -+#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed -+ -+#define REG_AXXX_CP_INT_CNTL 0x000001f2 -+ -+#define REG_AXXX_CP_INT_STATUS 0x000001f3 -+ -+#define REG_AXXX_CP_INT_ACK 0x000001f4 -+ -+#define REG_AXXX_CP_ME_CNTL 0x000001f6 -+ -+#define REG_AXXX_CP_ME_STATUS 0x000001f7 -+ -+#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8 -+ -+#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9 -+ -+#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa -+ -+#define REG_AXXX_CP_DEBUG 0x000001fc -+#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000 -+#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000 -+#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000 -+#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000 -+#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000 -+#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000 -+#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000 -+#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000 -+ -+#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd -+#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f -+#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0 -+static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK; -+} -+#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000 -+#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16 -+static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK; -+} -+ -+#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe -+#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f -+#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0 -+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK; -+} -+#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000 -+#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16 -+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK; -+} -+ -+#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff -+#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f -+#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0 -+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK; -+} -+#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000 -+#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16 -+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) -+{ -+ return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK; -+} -+ -+#define REG_AXXX_CP_SCRATCH_REG0 0x00000578 -+ -+#define REG_AXXX_CP_SCRATCH_REG1 0x00000579 -+ -+#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a -+ -+#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b -+ -+#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c -+ -+#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d -+ -+#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e -+ -+#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f -+ -+#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a -+ -+#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b -+ -+#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c -+ -+#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d -+ -+#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e -+ - - #endif /* ADRENO_COMMON_XML */ -diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h -index d3a7bac..94c13f4 100644 ---- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h -+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h -@@ -8,10 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng - git clone git://0x04.net/rules-ng-ng - - The rules-ng-ng source files this header was generated from are: --- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 327 bytes, from 2013-07-05 19:21:12) - - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) --- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) -+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml ( 30005 bytes, from 2013-07-19 21:30:48) -+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 8983 bytes, from 2013-07-24 01:38:36) - - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) -+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml ( 51415 bytes, from 2013-08-03 14:26:05) - - Copyright (C) 2013 by the following authors: - - Rob Clark (robclark) -diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h -index b49cdfc..22857d2 100644 ---- a/src/gallium/drivers/freedreno/freedreno_util.h -+++ b/src/gallium/drivers/freedreno/freedreno_util.h -@@ -35,6 +35,7 @@ - #include "pipe/p_format.h" - #include "util/u_debug.h" - #include "util/u_math.h" -+#include "util/u_half.h" - - #include "adreno_common.xml.h" - #include "adreno_pm4.xml.h" --- -1.8.4.2 - diff --git a/0005-freedreno-a3xx-some-texture-fixes.patch b/0005-freedreno-a3xx-some-texture-fixes.patch deleted file mode 100644 index 4fd4c68..0000000 --- a/0005-freedreno-a3xx-some-texture-fixes.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 3da8868b5df98d8544091feeea7b6bb0f736324f Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Mon, 5 Aug 2013 18:03:33 -0400 -Subject: [PATCH 05/17] freedreno/a3xx: some texture fixes - -Stop hard coding bits that indicate texture type (2d/3d/cube/etc). - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 25 +++++++++++++++++++++++- - 1 file changed, 24 insertions(+), 1 deletion(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c -index ae08b8a..e56325b 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c -@@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx, - so->base = *cso; - - so->texsamp0 = -+ COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) | - A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | - A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | - A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | -@@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx, - return so; - } - -+static enum a3xx_tex_type -+tex_type(unsigned target) -+{ -+ switch (target) { -+ default: -+ assert(0); -+ case PIPE_BUFFER: -+ case PIPE_TEXTURE_1D: -+ case PIPE_TEXTURE_1D_ARRAY: -+ return A3XX_TEX_1D; -+ case PIPE_TEXTURE_RECT: -+ case PIPE_TEXTURE_2D: -+ case PIPE_TEXTURE_2D_ARRAY: -+ return A3XX_TEX_2D; -+ case PIPE_TEXTURE_3D: -+ return A3XX_TEX_3D; -+ case PIPE_TEXTURE_CUBE: -+ case PIPE_TEXTURE_CUBE_ARRAY: -+ return A3XX_TEX_CUBE; -+ } -+} -+ - static struct pipe_sampler_view * - fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, - const struct pipe_sampler_view *cso) -@@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, - so->tex_resource = rsc; - - so->texconst0 = -- 0x40000000 | /* ??? */ -+ A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | - A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | - fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, - cso->swizzle_b, cso->swizzle_a); --- -1.8.4.2 - diff --git a/0006-freedreno-a3xx-compiler-fix-CMP.patch b/0006-freedreno-a3xx-compiler-fix-CMP.patch deleted file mode 100644 index ded5b9d..0000000 --- a/0006-freedreno-a3xx-compiler-fix-CMP.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 83e65320012f327d2e8f1573443b2e20f059e76f Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Tue, 20 Aug 2013 13:46:30 -0400 -Subject: [PATCH 06/17] freedreno/a3xx/compiler: fix CMP - -The 1st src to add.s needs (r) flag (repeat), otherwise it will end up: - - add.s dst.xyzw, tmp.xxxx -1 - -instead of: - - add.s dst.xyzw, tmp.xyzw, -1 - -Also, if we are using a temporary dst to avoid clobbering one of the src -registers, we actually need to use that as the dst for the sel -instruction. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index eabe21c..07bede4 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -790,13 +790,13 @@ trans_cmp(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); - instr->repeat = 3; - add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, &tmp_src, 0); -+ add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; - - /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? - OPC_SEL_F16 : OPC_SEL_F32); -- vectorize(ctx, instr, &inst->Dst[0].Register, 3, -+ vectorize(ctx, instr, dst, 3, - &inst->Src[2].Register, 0, - &tmp_src, 0, - &inst->Src[1].Register, 0); --- -1.8.4.2 - diff --git a/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch b/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch deleted file mode 100644 index 362c04f..0000000 --- a/0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch +++ /dev/null @@ -1,98 +0,0 @@ -From c83387438633233ae6bcc55e1f4eaa2793ce7449 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Tue, 20 Aug 2013 13:51:35 -0400 -Subject: [PATCH 07/17] freedreno/a3xx/compiler: handle saturate on dst - -Sometimes things other than color dst need saturating, like if there is -a 'clamp(foo, 0.0, 1.0)'. So for saturated dst add the extra -instructions to fix up dst. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++ - 1 file changed, 49 insertions(+) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index 07bede4..e2c7853 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -131,6 +131,11 @@ struct fd3_compile_context { - struct tgsi_src_register tmp_src; - }; - -+ -+static void vectorize(struct fd3_compile_context *ctx, -+ struct ir3_instruction *instr, struct tgsi_dst_register *dst, -+ int nsrcs, ...); -+ - static unsigned - compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, - const struct tgsi_token *tokens) -@@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - flags |= IR3_REG_CONST; - num = src->Index + ctx->base_reg[src->File]; - break; -+ case TGSI_FILE_OUTPUT: -+ /* NOTE: we should only end up w/ OUTPUT file for things like -+ * clamp()'ing saturated dst instructions -+ */ - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - num = src->Index + ctx->base_reg[src->File]; -@@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - - } - -+static void -+create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, -+ struct tgsi_src_register *minval, struct tgsi_src_register *maxval) -+{ -+ struct ir3_instruction *instr; -+ struct tgsi_src_register src; -+ -+ src_from_dst(&src, dst); -+ -+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); -+ vectorize(ctx, instr, dst, 2, &src, 0, minval, 0); -+ -+ instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); -+ vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0); -+} -+ -+static void -+create_clamp_imm(struct fd3_compile_context *ctx, -+ struct tgsi_dst_register *dst, -+ uint32_t minval, uint32_t maxval) -+{ -+ struct tgsi_src_register minconst, maxconst; -+ -+ get_immediate(ctx, &minconst, minval); -+ get_immediate(ctx, &maxconst, maxval); -+ -+ create_clamp(ctx, dst, &minconst, &maxconst); -+} -+ - static struct tgsi_dst_register * - get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) - { -@@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx) - assert(0); - } - -+ switch (inst->Instruction.Saturate) { -+ case TGSI_SAT_ZERO_ONE: -+ create_clamp_imm(ctx, &inst->Dst[0].Register, -+ fui(0.0), fui(1.0)); -+ break; -+ case TGSI_SAT_MINUS_PLUS_ONE: -+ create_clamp_imm(ctx, &inst->Dst[0].Register, -+ fui(-1.0), fui(1.0)); -+ break; -+ } -+ - break; - } - default: --- -1.8.4.2 - diff --git a/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch b/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch deleted file mode 100644 index c1f76c3..0000000 --- a/0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 5394a872f30022f64e6b2b58ef983b1fe5f6c08d Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Tue, 20 Aug 2013 13:54:01 -0400 -Subject: [PATCH 08/17] freedreno/a3xx/compiler: use max_reg rather than - file_count - -Our current (rather naive) register assignment is based on mapping -different register files (INPUT, OUTPUT, TEMP, CONST, etc) based on the -max register index of the preceding file. But in some cases, the lowest -used register in a file might not be zero. In which case -file_count[file] != file_max[file] + 1. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index e2c7853..dc5c873 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -159,19 +159,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, - /* Immediates go after constants: */ - ctx->base_reg[TGSI_FILE_CONSTANT] = 0; - ctx->base_reg[TGSI_FILE_IMMEDIATE] = -- ctx->info.file_count[TGSI_FILE_CONSTANT]; -+ ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; - - /* Temporaries after outputs after inputs: */ - ctx->base_reg[TGSI_FILE_INPUT] = 0; - ctx->base_reg[TGSI_FILE_OUTPUT] = -- ctx->info.file_count[TGSI_FILE_INPUT]; -+ ctx->info.file_max[TGSI_FILE_INPUT] + 1; - ctx->base_reg[TGSI_FILE_TEMPORARY] = -- ctx->info.file_count[TGSI_FILE_INPUT] + -- ctx->info.file_count[TGSI_FILE_OUTPUT]; -+ ctx->info.file_max[TGSI_FILE_INPUT] + 1 + -+ ctx->info.file_max[TGSI_FILE_OUTPUT] + 1; - - so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; -- ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] + -- ctx->info.file_count[TGSI_FILE_IMMEDIATE]); -+ ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 + -+ ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); - - ret = tgsi_parse_init(&ctx->parser, tokens); - if (ret != TGSI_PARSE_OK) -@@ -309,7 +309,7 @@ get_internal_temp(struct fd3_compile_context *ctx, - /* assign next temporary: */ - n = ctx->num_internal_temps++; - -- tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n; -+ tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); - } --- -1.8.4.2 - diff --git a/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch b/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch deleted file mode 100644 index a43dde8..0000000 --- a/0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch +++ /dev/null @@ -1,104 +0,0 @@ -From f3a7e28fe47ec547c1c9b561b04af208ae2f0f04 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Tue, 20 Aug 2013 13:57:22 -0400 -Subject: [PATCH 09/17] freedreno/a3xx/compiler: cat4 cannot use const reg as - src - -Category 4 instructions (rsq, rcp, sqrt, etc) seem to be unable to take -a const register as src. In these cases we need to move the src to a -temporary gpr first. - -This is the second case of such a restriction, where the instruction -encoding appears to support a const src, but in fact the hw appears to -ignore that bit. So split things out into a helper that can be re-used -for any instructions which have this limitation. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 37 +++++++++++++++++------ - 1 file changed, 27 insertions(+), 10 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index dc5c873..772c7d2 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -135,6 +135,8 @@ struct fd3_compile_context { - static void vectorize(struct fd3_compile_context *ctx, - struct ir3_instruction *instr, struct tgsi_dst_register *dst, - int nsrcs, ...); -+static void create_mov(struct fd3_compile_context *ctx, -+ struct tgsi_dst_register *dst, struct tgsi_src_register *src); - - static unsigned - compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, -@@ -374,6 +376,23 @@ get_immediate(struct fd3_compile_context *ctx, - reg->SwizzleW = swiz2tgsi[swiz]; - } - -+/* for instructions that cannot take a const register as src, if needed -+ * generate a move to temporary gpr: -+ */ -+static struct tgsi_src_register * -+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src, -+ struct tgsi_src_register *tmp_src) -+{ -+ static struct tgsi_dst_register tmp_dst; -+ if ((src->File == TGSI_FILE_CONSTANT) || -+ (src->File == TGSI_FILE_IMMEDIATE)) { -+ get_internal_temp(ctx, &tmp_dst, tmp_src); -+ create_mov(ctx, &tmp_dst, src); -+ src = tmp_src; -+ } -+ return src; -+} -+ - static type_t - get_type(struct fd3_compile_context *ctx) - { -@@ -1027,8 +1046,7 @@ instr_cat3(const struct instr_translater *t, - struct tgsi_full_instruction *inst) - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); -- struct tgsi_src_register *src1 = &inst->Src[1].Register; -- struct tgsi_dst_register tmp_dst; -+ struct tgsi_src_register *src1; - struct tgsi_src_register tmp_src; - struct ir3_instruction *instr; - -@@ -1038,12 +1056,7 @@ instr_cat3(const struct instr_translater *t, - * const. Not sure if this is a hw bug, or simply that the - * disassembler lies. - */ -- if ((src1->File == TGSI_FILE_CONSTANT) || -- (src1->File == TGSI_FILE_IMMEDIATE)) { -- get_internal_temp(ctx, &tmp_dst, &tmp_src); -- create_mov(ctx, &tmp_dst, src1); -- src1 = &tmp_src; -- } -+ src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src); - - instr = ir3_instr_create(ctx->ir, 3, - ctx->so->half_precision ? t->hopc : t->opc); -@@ -1060,13 +1073,17 @@ instr_cat4(const struct instr_translater *t, - struct tgsi_full_instruction *inst) - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); -+ struct tgsi_src_register *src; -+ struct tgsi_src_register tmp_src; - struct ir3_instruction *instr; - -+ /* seems like blob compiler avoids const as src.. */ -+ src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src); -+ - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; - instr = ir3_instr_create(ctx->ir, 4, t->opc); - -- vectorize(ctx, instr, dst, 1, -- &inst->Src[0].Register, 0); -+ vectorize(ctx, instr, dst, 1, src, 0); - - regmask_set(ctx->needs_ss, instr->regs[0]); - --- -1.8.4.2 - diff --git a/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch b/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch deleted file mode 100644 index 8978dd6..0000000 --- a/0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch +++ /dev/null @@ -1,216 +0,0 @@ -From 12da4c1a6aa4b2a9cc337f669986a63c59fc3095 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Wed, 21 Aug 2013 13:20:05 -0400 -Subject: [PATCH 10/17] freedreno: fix segfault when no color buffer bound - -Don't crash when no color buffer bound. Something caught when starting -to run piglit, fixes a hanful of piglit tests. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 +++--- - src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 15 +++++++++++---- - src/gallium/drivers/freedreno/freedreno_context.c | 3 ++- - src/gallium/drivers/freedreno/freedreno_draw.c | 4 ++-- - src/gallium/drivers/freedreno/freedreno_gmem.c | 18 +++++++++++------- - src/gallium/drivers/freedreno/freedreno_state.c | 2 +- - src/gallium/drivers/freedreno/freedreno_util.h | 10 ++++++++++ - 7 files changed, 40 insertions(+), 18 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -index e239eed..93695bc 100644 ---- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -@@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx) - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_gmem_stateobj *gmem = &ctx->gmem; -- enum pipe_format format = pfb->cbufs[0]->format; -+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); - uint32_t reg; - - OUT_PKT3(ring, CP_SET_CONSTANT, 4); -@@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; -- enum pipe_format format = pfb->cbufs[0]->format; -+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); -@@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; -- enum pipe_format format = pfb->cbufs[0]->format; -+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -index 9050166..b9d0580 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -@@ -214,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - }, 1); - - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { -- uint32_t base = depth_base(&ctx->gmem) * -- fd_resource(pfb->cbufs[0]->texture)->cpp; -+ uint32_t base = 0; -+ if (pfb->cbufs[0]) { -+ struct fd_resource *rsc = -+ fd_resource(pfb->cbufs[0]->texture); -+ base = depth_base(&ctx->gmem) * rsc->cpp; -+ } - emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); - } - -@@ -410,8 +414,11 @@ static void - fd3_emit_sysmem_prep(struct fd_context *ctx) - { - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; -- struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture); - struct fd_ringbuffer *ring = ctx->ring; -+ uint32_t pitch = 0; -+ -+ if (pfb->cbufs[0]) -+ pitch = fd_resource(pfb->cbufs[0]->texture)->pitch; - - fd3_emit_restore(ctx); - -@@ -422,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); - - fd3_emit_rbrc_tile_state(ring, -- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch)); -+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); - - /* setup scissor/offset for current tile: */ - OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); -diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c -index 44d525b..1d03351 100644 ---- a/src/gallium/drivers/freedreno/freedreno_context.c -+++ b/src/gallium/drivers/freedreno/freedreno_context.c -@@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx) - ctx->gmem_reason = 0; - ctx->num_draws = 0; - -- fd_resource(pfb->cbufs[0]->texture)->dirty = false; -+ if (pfb->cbufs[0]) -+ fd_resource(pfb->cbufs[0]->texture)->dirty = false; - if (pfb->zsbuf) - fd_resource(pfb->zsbuf->texture)->dirty = false; - } -diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c -index b02b8b9..d4f8d34 100644 ---- a/src/gallium/drivers/freedreno/freedreno_draw.c -+++ b/src/gallium/drivers/freedreno/freedreno_draw.c -@@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, - } - - DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, -- util_format_name(pfb->cbufs[0]->format), -- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); -+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), -+ util_format_short_name(pipe_surface_format(pfb->zsbuf))); - - ctx->clear(ctx, buffers, color, depth, stencil); - -diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c -index 197d1d9..3d959c6 100644 ---- a/src/gallium/drivers/freedreno/freedreno_gmem.c -+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c -@@ -72,12 +72,15 @@ calculate_tiles(struct fd_context *ctx) - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_scissor_state *scissor = &ctx->max_scissor; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; -- uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format); - uint32_t gmem_size = ctx->screen->gmemsize_bytes; - uint32_t minx, miny, width, height; - uint32_t nbins_x = 1, nbins_y = 1; - uint32_t bin_w, bin_h; - uint32_t max_width = 992; -+ uint32_t cpp = 4; -+ -+ if (pfb->cbufs[0]) -+ cpp = util_format_get_blocksize(pfb->cbufs[0]->format); - - if ((gmem->cpp == cpp) && - !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { -@@ -211,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx) - - if (sysmem) { - DBG("rendering sysmem (%s/%s)", -- util_format_name(pfb->cbufs[0]->format), -- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); -+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), -+ util_format_short_name(pipe_surface_format(pfb->zsbuf))); - render_sysmem(ctx); - } else { - struct fd_gmem_stateobj *gmem = &ctx->gmem; -- DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, -- util_format_name(pfb->cbufs[0]->format), -- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); - calculate_tiles(ctx); -+ DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, -+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])), -+ util_format_short_name(pipe_surface_format(pfb->zsbuf))); - render_tiles(ctx); - } - -@@ -231,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx) - - /* update timestamps on render targets: */ - timestamp = fd_ringbuffer_timestamp(ctx->ring); -- fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; -+ if (pfb->cbufs[0]) -+ fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; - if (pfb->zsbuf) - fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; - -diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c -index 2f5d52c..f5290a9 100644 ---- a/src/gallium/drivers/freedreno/freedreno_state.c -+++ b/src/gallium/drivers/freedreno/freedreno_state.c -@@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx, - unsigned i; - - DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, -- cso->cbufs[0], cso->zsbuf); -+ framebuffer->cbufs[0], framebuffer->zsbuf); - - fd_context_render(pctx); - -diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h -index 22857d2..9f10686 100644 ---- a/src/gallium/drivers/freedreno/freedreno_util.h -+++ b/src/gallium/drivers/freedreno/freedreno_util.h -@@ -33,6 +33,7 @@ - #include - - #include "pipe/p_format.h" -+#include "pipe/p_state.h" - #include "util/u_debug.h" - #include "util/u_math.h" - #include "util/u_half.h" -@@ -79,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type, - (1 << 14); - } - -+ -+static inline enum pipe_format -+pipe_surface_format(struct pipe_surface *psurf) -+{ -+ if (!psurf) -+ return PIPE_FORMAT_NONE; -+ return psurf->format; -+} -+ - #define LOG_DWORDS 0 - - --- -1.8.4.2 - diff --git a/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch b/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch deleted file mode 100644 index 53aecaa..0000000 --- a/0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch +++ /dev/null @@ -1,172 +0,0 @@ -From c726a6a907f119dfc4fb1c26fef7babf51dc1dea Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 24 Aug 2013 12:56:22 -0400 -Subject: [PATCH 11/17] freedreno/a3xx/compiler: make compiler errors more - useful - -We probably should get rid of assert() entirely, but at this stage it is -more useful for things to crash where we can catch it in a debugger. -With compile_error() we have a single place to set an error flag (to -bail out and return an error on the next instruction) so that will be a -small change later when enough of the compiler bugs are sorted. - -But re-arrange/cleanup the error/assert stuff so we at least get a dump -of the TGSI that triggered it. So we see some useful output in piglit -logs. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 +++++++++++++++-------- - src/gallium/drivers/freedreno/a3xx/ir-a3xx.h | 3 +- - 2 files changed, 33 insertions(+), 17 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index 772c7d2..e6c5bb7 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -185,6 +185,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, - } - - static void -+compile_error(struct fd3_compile_context *ctx, const char *format, ...) -+{ -+ va_list ap; -+ va_start(ap, format); -+ _debug_vprintf(format, ap); -+ va_end(ap); -+ tgsi_dump(ctx->tokens, 0); -+ assert(0); -+} -+ -+#define compile_assert(ctx, cond) do { \ -+ if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ -+ } while (0) -+ -+static void - compile_free(struct fd3_compile_context *ctx) - { - tgsi_parse_free(&ctx->parser); -@@ -212,9 +227,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - num = dst->Index + ctx->base_reg[dst->File]; - break; - default: -- DBG("unsupported dst register file: %s", -+ compile_error(ctx, "unsupported dst register file: %s\n", - tgsi_file_name(dst->File)); -- assert(0); - break; - } - -@@ -250,9 +264,8 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - num = src->Index + ctx->base_reg[src->File]; - break; - default: -- DBG("unsupported src register file: %s", -+ compile_error(ctx, "unsupported src register file: %s\n", - tgsi_file_name(src->File)); -- assert(0); - break; - } - -@@ -329,6 +342,13 @@ get_internal_temp_repl(struct fd3_compile_context *ctx, - tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; - } - -+static inline bool -+is_const(struct tgsi_src_register *src) -+{ -+ return (src->File == TGSI_FILE_CONSTANT) || -+ (src->File == TGSI_FILE_IMMEDIATE); -+} -+ - static void - get_immediate(struct fd3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -@@ -578,8 +598,7 @@ trans_dotp(const struct instr_translater *t, - * is a const. Not sure if this is a hw bug, or simply that the - * disassembler lies. - */ -- if ((src1->File == TGSI_FILE_IMMEDIATE) || -- (src1->File == TGSI_FILE_CONSTANT)) { -+ if (is_const(src1)) { - - /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: - */ -@@ -768,7 +787,7 @@ trans_samp(const struct instr_translater *t, - flags |= IR3_INSTR_P; - break; - default: -- assert(0); -+ compile_assert(ctx, 0); - break; - } - -@@ -1187,7 +1206,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) - unsigned name = decl->Semantic.Name; - unsigned i; - -- assert(decl->Declaration.Semantic); // TODO is this ever not true? -+ compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? - - DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX - -@@ -1207,9 +1226,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) - so->outputs[so->outputs_count++].regid = regid(i + base, 0); - break; - default: -- DBG("unknown VS semantic name: %s", -+ compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); -- assert(0); - } - } else { - switch (name) { -@@ -1217,9 +1235,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) - so->color_regid = regid(decl->Range.First + base, 0); - break; - default: -- DBG("unknown VS semantic name: %s", -+ compile_error(ctx, "unknown VS semantic name: %s\n", - tgsi_semantic_names[name]); -- assert(0); - } - } - } -@@ -1278,10 +1295,8 @@ compile_instructions(struct fd3_compile_context *ctx) - t->fxn(t, ctx, inst); - ctx->num_internal_temps = 0; - } else { -- debug_printf("unknown TGSI opc: %s\n", -+ compile_error(ctx, "unknown TGSI opc: %s\n", - tgsi_get_opcode_name(opc)); -- tgsi_dump(ctx->tokens, 0); -- assert(0); - } - - switch (inst->Instruction.Saturate) { -@@ -1319,6 +1334,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so, - - so->ir = ir3_shader_create(); - -+ assert(so->ir); -+ - so->color_regid = regid(63,0); - so->pos_regid = regid(63,0); - so->psize_regid = regid(63,0); -diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h -index 2fedc7b..61c01a7 100644 ---- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h -+++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h -@@ -166,8 +166,7 @@ struct ir3_instruction { - }; - }; - --/* this is just large to cope w/ the large test *.asm: */ --#define MAX_INSTRS 10240 -+#define MAX_INSTRS 1024 - - struct ir3_shader { - unsigned instrs_count; --- -1.8.4.2 - diff --git a/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch b/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch deleted file mode 100644 index a96fc8d..0000000 --- a/0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch +++ /dev/null @@ -1,420 +0,0 @@ -From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 24 Aug 2013 13:00:07 -0400 -Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup - -It seems there are a number of cases where instructions have limitations -about taking reading src's from const register file, so make -get_unconst() a bit easier to use. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++---------- - 1 file changed, 71 insertions(+), 61 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index e6c5bb7..b5cdda8 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -91,6 +91,7 @@ struct fd3_compile_context { - - unsigned next_inloc; - unsigned num_internal_temps; -+ struct tgsi_src_register internal_temps[6]; - - /* track registers which need to synchronize w/ "complex alu" cat3 - * instruction pipeline: -@@ -128,7 +129,7 @@ struct fd3_compile_context { - * up the vector operation - */ - struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; -+ struct tgsi_src_register *tmp_src; - }; - - -@@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) - /* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ --static void -+static struct tgsi_src_register * - get_internal_temp(struct fd3_compile_context *ctx, -- struct tgsi_dst_register *tmp_dst, -- struct tgsi_src_register *tmp_src) -+ struct tgsi_dst_register *tmp_dst) - { -+ struct tgsi_src_register *tmp_src; - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; -@@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx, - - /* assign next temporary: */ - n = ctx->num_internal_temps++; -+ compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); -+ tmp_src = &ctx->internal_temps[n]; - - tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; - - src_from_dst(tmp_src, tmp_dst); -+ -+ return tmp_src; - } - - /* same as get_internal_temp, but w/ src.xxxx (for instructions that - * replicate their results) - */ --static void -+static struct tgsi_src_register * - get_internal_temp_repl(struct fd3_compile_context *ctx, -- struct tgsi_dst_register *tmp_dst, -- struct tgsi_src_register *tmp_src) -+ struct tgsi_dst_register *tmp_dst) - { -- get_internal_temp(ctx, tmp_dst, tmp_src); -+ struct tgsi_src_register *tmp_src = -+ get_internal_temp(ctx, tmp_dst); - tmp_src->SwizzleX = tmp_src->SwizzleY = - tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; -+ return tmp_src; - } - - static inline bool -@@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src) - (src->File == TGSI_FILE_IMMEDIATE); - } - -+/* for instructions that cannot take a const register as src, if needed -+ * generate a move to temporary gpr: -+ */ -+static struct tgsi_src_register * -+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) -+{ -+ if (is_const(src)) { -+ static struct tgsi_dst_register tmp_dst; -+ struct tgsi_src_register *tmp_src = -+ get_internal_temp(ctx, &tmp_dst); -+ create_mov(ctx, &tmp_dst, src); -+ src = tmp_src; -+ } -+ return src; -+} -+ - static void - get_immediate(struct fd3_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -@@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx, - reg->SwizzleW = swiz2tgsi[swiz]; - } - --/* for instructions that cannot take a const register as src, if needed -- * generate a move to temporary gpr: -- */ --static struct tgsi_src_register * --get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src, -- struct tgsi_src_register *tmp_src) -+static type_t -+get_ftype(struct fd3_compile_context *ctx) - { -- static struct tgsi_dst_register tmp_dst; -- if ((src->File == TGSI_FILE_CONSTANT) || -- (src->File == TGSI_FILE_IMMEDIATE)) { -- get_internal_temp(ctx, &tmp_dst, tmp_src); -- create_mov(ctx, &tmp_dst, src); -- src = tmp_src; -- } -- return src; -+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; - } - - static type_t --get_type(struct fd3_compile_context *ctx) -+get_utype(struct fd3_compile_context *ctx) - { -- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; -+ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; - } - - static unsigned -@@ -436,7 +447,7 @@ static void - create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) - { -- type_t type_mov = get_type(ctx); -+ type_t type_mov = get_ftype(ctx); - unsigned i; - - for (i = 0; i < 4; i++) { -@@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_src_register *src = &inst->Src[i].Register; - if ((src->File == dst->File) && (src->Index == dst->Index)) { -- get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); -+ ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); - ctx->tmp_dst.WriteMask = dst->WriteMask; - dst = &ctx->tmp_dst; - break; -@@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, - { - /* if necessary, add mov back into original dst: */ - if (dst != &inst->Dst[0].Register) { -- create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); -+ create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); - } - } - -@@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t, - { - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; -+ struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; -@@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t, - unsigned n = t->arg; /* number of components */ - unsigned i; - -- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); -+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst); - - /* Blob compiler never seems to use a const in src1 position for - * mad.*, although there does seem (according to disassembler -@@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t, - * because after that point we no longer need tmp.x: - */ - create_mov(ctx, &tmp_dst, src1); -- src1 = &tmp_src; -+ src1 = tmp_src; - } - - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); -@@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t, - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src0, swiz0[i]); - add_src_reg(ctx, instr, src1, swiz1[i]); -- add_src_reg(ctx, instr, &tmp_src, 0); -+ add_src_reg(ctx, instr, tmp_src, 0); - } - - /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ -@@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src1, swiz1[i]); -- add_src_reg(ctx, instr, &tmp_src, 0); -+ add_src_reg(ctx, instr, tmp_src, 0); - - n++; - } -@@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t, - ir3_instr_create(ctx->ir, 0, OPC_NOP); - } - -- create_mov(ctx, dst, &tmp_src); -+ create_mov(ctx, dst, tmp_src); - } - - /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ -@@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t, - { - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst1, tmp_dst2; -- struct tgsi_src_register tmp_src1, tmp_src2; -+ struct tgsi_src_register *tmp_src1, *tmp_src2; - struct tgsi_src_register tmp_const; - -- get_internal_temp(ctx, &tmp_dst1, &tmp_src1); -- get_internal_temp(ctx, &tmp_dst2, &tmp_src2); -+ tmp_src1 = get_internal_temp(ctx, &tmp_dst1); -+ tmp_src2 = get_internal_temp(ctx, &tmp_dst2); - - get_immediate(ctx, &tmp_const, fui(1.0)); - -@@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t, - /* tmp2 = tmp2 * c */ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); - vectorize(ctx, instr, &tmp_dst2, 2, -- &tmp_src2, 0, -+ tmp_src2, 0, - &inst->Src[2].Register, 0); - - /* dst = tmp1 + tmp2 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); - vectorize(ctx, instr, &inst->Dst[0].Register, 2, -- &tmp_src1, 0, -- &tmp_src2, 0); -+ tmp_src1, 0, -+ tmp_src2, 0); - } - - /* FRC(x) = x - FLOOR(x) */ -@@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t, - { - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; -+ struct tgsi_src_register *tmp_src; - -- get_internal_temp(ctx, &tmp_dst, &tmp_src); -+ tmp_src = get_internal_temp(ctx, &tmp_dst); - - /* tmp = FLOOR(x) */ - instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); -@@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); - vectorize(ctx, instr, &inst->Dst[0].Register, 2, - &inst->Src[0].Register, 0, -- &tmp_src, IR3_REG_NEGATE); -+ tmp_src, IR3_REG_NEGATE); - } - - /* POW(a,b) = EXP2(b * LOG2(a)) */ -@@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t, - struct ir3_instruction *instr; - struct ir3_register *r; - struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; -+ struct tgsi_src_register *tmp_src; - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; - -- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src); -+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst); - - /* log2 Rtmp, Rsrc0 */ - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; -@@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t, - /* mul.f Rtmp, Rtmp, Rsrc1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, &tmp_src, 0); -+ add_src_reg(ctx, instr, tmp_src, 0); - add_src_reg(ctx, instr, src1, src1->SwizzleX); - - /* blob compiler seems to ensure there are at least 6 instructions -@@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t, - /* exp2 Rdst, Rtmp */ - instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); - r = add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, &tmp_src, 0); -+ add_src_reg(ctx, instr, tmp_src, 0); - regmask_set(ctx->needs_ss, r); - -- create_mov(ctx, dst, &tmp_src); -+ create_mov(ctx, dst, tmp_src); - } - - /* texture fetch/sample instructions: */ -@@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t, - { - struct ir3_register *r; - struct ir3_instruction *instr; -- struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; - struct tgsi_src_register *coord = &inst->Src[0].Register; - struct tgsi_src_register *samp = &inst->Src[1].Register; - unsigned tex = inst->Texture.Texture; -@@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t, - */ - for (i = 1; (i < 4) && (order[i] >= 0); i++) { - if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { -- type_t type_mov = get_type(ctx); -+ struct tgsi_dst_register tmp_dst; -+ struct tgsi_src_register *tmp_src; -+ -+ type_t type_mov = get_ftype(ctx); - - /* need to move things around: */ -- get_internal_temp(ctx, &tmp_dst, &tmp_src); -+ tmp_src = get_internal_temp(ctx, &tmp_dst); - - for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = ir3_instr_create(ctx->ir, 1, 0); -@@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t, - src_swiz(coord, order[j])); - } - -- coord = &tmp_src; -+ coord = tmp_src; - - if (j < 4) - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; -@@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t, - } - - instr = ir3_instr_create(ctx->ir, 5, t->opc); -- instr->cat5.type = get_type(ctx); -+ instr->cat5.type = get_ftype(ctx); - instr->cat5.samp = samp->Index; - instr->cat5.tex = samp->Index; - instr->flags |= flags; -@@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t, - { - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register tmp_src; -+ struct tgsi_src_register *tmp_src; - struct tgsi_src_register constval; - /* final instruction uses original src1 and src2, so we need get_dst() */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); - -- get_internal_temp(ctx, &tmp_dst, &tmp_src); -+ tmp_src = get_internal_temp(ctx, &tmp_dst); - - /* cmps.f.ge tmp, src0, 0.0 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); -@@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); - instr->repeat = 3; - add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R; -+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; - ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; - - /* sel.{f32,f16} dst, src2, tmp, src1 */ -@@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t, - OPC_SEL_F16 : OPC_SEL_F32); - vectorize(ctx, instr, dst, 3, - &inst->Src[2].Register, 0, -- &tmp_src, 0, -+ tmp_src, 0, - &inst->Src[1].Register, 0); - - put_dst(ctx, inst, dst); -@@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t, - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src1; -- struct tgsi_src_register tmp_src; - struct ir3_instruction *instr; - - /* Blob compiler never seems to use a const in src1 position.. -@@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t, - * const. Not sure if this is a hw bug, or simply that the - * disassembler lies. - */ -- src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src); -+ src1 = get_unconst(ctx, &inst->Src[1].Register); - - instr = ir3_instr_create(ctx->ir, 3, - ctx->so->half_precision ? t->hopc : t->opc); -@@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t, - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); - struct tgsi_src_register *src; -- struct tgsi_src_register tmp_src; - struct ir3_instruction *instr; - - /* seems like blob compiler avoids const as src.. */ -- src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src); -+ src = get_unconst(ctx, &inst->Src[0].Register); - - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; - instr = ir3_instr_create(ctx->ir, 4, t->opc); --- -1.8.4.2 - diff --git a/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch b/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch deleted file mode 100644 index 621070a..0000000 --- a/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch +++ /dev/null @@ -1,231 +0,0 @@ -From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 24 Aug 2013 13:02:53 -0400 -Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc - -The cmps.f.* instruction doesn't actually seem to give a float 1.0 or -0.0 output. It either needs a cov.u16f16 or add.s + sel.f16. This -makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp(). - -This fixes a bunch of piglit tests. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++---- - 1 file changed, 125 insertions(+), 29 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index b5cdda8..477053b 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t, - regmask_set(ctx->needs_sy, r); - } - --/* CMP(a,b,c) = (a < 0) ? b : c */ -+/* -+ * SEQ(a,b) = (a == b) ? 1.0 : 0.0 -+ * cmps.f.eq tmp0, b, a -+ * cov.u16f16 dst, tmp0 -+ * -+ * SNE(a,b) = (a != b) ? 1.0 : 0.0 -+ * cmps.f.eq tmp0, b, a -+ * add.s tmp0, tmp0, -1 -+ * sel.f16 dst, {0.0}, tmp0, {1.0} -+ * -+ * SGE(a,b) = (a >= b) ? 1.0 : 0.0 -+ * cmps.f.ge tmp0, a, b -+ * cov.u16f16 dst, tmp0 -+ * -+ * SLE(a,b) = (a <= b) ? 1.0 : 0.0 -+ * cmps.f.ge tmp0, b, a -+ * cov.u16f16 dst, tmp0 -+ * -+ * SGT(a,b) = (a > b) ? 1.0 : 0.0 -+ * cmps.f.ge tmp0, b, a -+ * add.s tmp0, tmp0, -1 -+ * sel.f16 dst, {0.0}, tmp0, {1.0} -+ * -+ * SLT(a,b) = (a < b) ? 1.0 : 0.0 -+ * cmps.f.ge tmp0, a, b -+ * add.s tmp0, tmp0, -1 -+ * sel.f16 dst, {0.0}, tmp0, {1.0} -+ * -+ * CMP(a,b,c) = (a < 0.0) ? b : c -+ * cmps.f.ge tmp0, a, {0.0} -+ * add.s tmp0, tmp0, -1 -+ * sel.f16 dst, c, tmp0, b -+ */ - static void - trans_cmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, -@@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t, - struct ir3_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register *tmp_src; -- struct tgsi_src_register constval; -- /* final instruction uses original src1 and src2, so we need get_dst() */ -+ struct tgsi_src_register constval0, constval1; -+ /* final instruction for CMP() uses orig src1 and src2: */ - struct tgsi_dst_register *dst = get_dst(ctx, inst); -+ struct tgsi_src_register *a0, *a1; -+ unsigned condition; - - tmp_src = get_internal_temp(ctx, &tmp_dst); - -- /* cmps.f.ge tmp, src0, 0.0 */ -+ switch (t->tgsi_opc) { -+ case TGSI_OPCODE_SEQ: -+ case TGSI_OPCODE_SNE: -+ a0 = &inst->Src[1].Register; /* b */ -+ a1 = &inst->Src[0].Register; /* a */ -+ condition = IR3_COND_EQ; -+ break; -+ case TGSI_OPCODE_SGE: -+ case TGSI_OPCODE_SLT: -+ a0 = &inst->Src[0].Register; /* a */ -+ a1 = &inst->Src[1].Register; /* b */ -+ condition = IR3_COND_GE; -+ break; -+ case TGSI_OPCODE_SLE: -+ case TGSI_OPCODE_SGT: -+ a0 = &inst->Src[1].Register; /* b */ -+ a1 = &inst->Src[0].Register; /* a */ -+ condition = IR3_COND_GE; -+ break; -+ case TGSI_OPCODE_CMP: -+ get_immediate(ctx, &constval0, fui(0.0)); -+ a0 = &inst->Src[0].Register; /* a */ -+ a1 = &constval0; /* {0.0} */ -+ condition = IR3_COND_GE; -+ break; -+ default: -+ compile_assert(ctx, 0); -+ return; -+ } -+ -+ /* NOTE: seems blob compiler will move a const to a gpr if both -+ * src args to cmps.f are const. Need to check if this applies -+ * to other instructions.. -+ */ -+ if (is_const(a0) && is_const(a1)) -+ a0 = get_unconst(ctx, a0); -+ -+ /* cmps.f.ge tmp, a0, a1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); -- instr->cat2.condition = IR3_COND_GE; -- get_immediate(ctx, &constval, fui(0.0)); -- vectorize(ctx, instr, &tmp_dst, 2, -- &inst->Src[0].Register, 0, -- &constval, 0); -+ instr->cat2.condition = condition; -+ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); - -- /* add.s tmp, tmp, -1 */ -- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); -- instr->repeat = 3; -- add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; -- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; -+ switch (t->tgsi_opc) { -+ case TGSI_OPCODE_SEQ: -+ case TGSI_OPCODE_SGE: -+ case TGSI_OPCODE_SLE: -+ /* cov.u16f16 dst, tmp0 */ -+ instr = ir3_instr_create(ctx->ir, 1, 0); -+ instr->cat1.src_type = get_utype(ctx); -+ instr->cat1.dst_type = get_ftype(ctx); -+ vectorize(ctx, instr, dst, 1, tmp_src, 0); -+ break; -+ case TGSI_OPCODE_SNE: -+ case TGSI_OPCODE_SGT: -+ case TGSI_OPCODE_SLT: -+ case TGSI_OPCODE_CMP: -+ /* add.s tmp, tmp, -1 */ -+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); -+ instr->repeat = 3; -+ add_dst_reg(ctx, instr, &tmp_dst, 0); -+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; -+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; -+ -+ if (t->tgsi_opc == TGSI_OPCODE_CMP) { -+ /* sel.{f32,f16} dst, src2, tmp, src1 */ -+ instr = ir3_instr_create(ctx->ir, 3, -+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); -+ vectorize(ctx, instr, dst, 3, -+ &inst->Src[2].Register, 0, -+ tmp_src, 0, -+ &inst->Src[1].Register, 0); -+ } else { -+ get_immediate(ctx, &constval0, fui(0.0)); -+ get_immediate(ctx, &constval1, fui(1.0)); -+ /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ -+ instr = ir3_instr_create(ctx->ir, 3, -+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); -+ vectorize(ctx, instr, dst, 3, -+ &constval0, 0, tmp_src, 0, &constval1, 0); -+ } - -- /* sel.{f32,f16} dst, src2, tmp, src1 */ -- instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? -- OPC_SEL_F16 : OPC_SEL_F32); -- vectorize(ctx, instr, dst, 3, -- &inst->Src[2].Register, 0, -- tmp_src, 0, -- &inst->Src[1].Register, 0); -+ break; -+ } - - put_dst(ctx, inst, dst); - } -@@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t, - - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); - ir3_reg_create(instr, regid(REG_P0, 0), 0); -- add_src_reg(ctx, instr, &constval, constval.SwizzleX); - add_src_reg(ctx, instr, src, src->SwizzleX); -+ add_src_reg(ctx, instr, &constval, constval.SwizzleX); - instr->cat2.condition = IR3_COND_EQ; - - instr = ir3_instr_create(ctx->ir, 0, OPC_BR); -@@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 2, t->opc); - - switch (t->tgsi_opc) { -- case TGSI_OPCODE_SLT: -- case TGSI_OPCODE_SGE: -- instr->cat2.condition = t->arg; -- break; - case TGSI_OPCODE_ABS: - src0_flags = IR3_REG_ABS; - break; -@@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { - INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */ - INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), - INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), -- INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT), -- INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE), - INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), - INSTR(LRP, trans_lrp), - INSTR(FRC, trans_frac), - INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), -+ INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F), - INSTR(EX2, instr_cat4, .opc = OPC_EXP2), - INSTR(LG2, instr_cat4, .opc = OPC_LOG2), - INSTR(POW, trans_pow), -@@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { - INSTR(SIN, instr_cat4, .opc = OPC_COS), - INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), - INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), -+ INSTR(SGT, trans_cmp), -+ INSTR(SLT, trans_cmp), -+ INSTR(SGE, trans_cmp), -+ INSTR(SLE, trans_cmp), -+ INSTR(SNE, trans_cmp), -+ INSTR(SEQ, trans_cmp), - INSTR(CMP, trans_cmp), - INSTR(IF, trans_if), - INSTR(ELSE, trans_else), --- -1.8.4.2 - diff --git a/0014-freedreno-a3xx-don-t-leak-so-much.patch b/0014-freedreno-a3xx-don-t-leak-so-much.patch deleted file mode 100644 index 0ac9110..0000000 --- a/0014-freedreno-a3xx-don-t-leak-so-much.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 0b2c5119cb772751edb3c42c9c0545443e26fd7f Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Mon, 17 Jun 2013 20:11:54 -0400 -Subject: [PATCH 14/17] freedreno/a3xx: don't leak so much - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_context.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c -index 3ae9b29..589aeed 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c -@@ -40,7 +40,18 @@ - static void - fd3_context_destroy(struct pipe_context *pctx) - { -+ struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx)); -+ - fd3_prog_fini(pctx); -+ -+ fd_bo_del(fd3_ctx->vs_pvt_mem); -+ fd_bo_del(fd3_ctx->fs_pvt_mem); -+ fd_bo_del(fd3_ctx->vsc_size_mem); -+ fd_bo_del(fd3_ctx->vsc_pipe_mem); -+ -+ pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL); -+ pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL); -+ - fd_context_destroy(pctx); - } - --- -1.8.4.2 - diff --git a/0015-freedreno-a3xx-compiler-better-const-handling.patch b/0015-freedreno-a3xx-compiler-better-const-handling.patch deleted file mode 100644 index 221e083..0000000 --- a/0015-freedreno-a3xx-compiler-better-const-handling.patch +++ /dev/null @@ -1,376 +0,0 @@ -From f1998c8aa7d82006f9ef7e6710a0f68f30bfc109 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Sat, 24 Aug 2013 17:30:50 -0400 -Subject: [PATCH 15/17] freedreno/a3xx/compiler: better const handling - -Seems like most/all instructions have some restrictions about const src -registers. In seems like the 2 src (cat2) instructions can take at most -one const, and the 3 src (cat3) instructions can take at most one const -in the first 2 arguments. And so on. Handle this properly now. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 211 +++++++++++++--------- - 1 file changed, 121 insertions(+), 90 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index 477053b..dcdd2d9 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -355,20 +355,47 @@ is_const(struct tgsi_src_register *src) - (src->File == TGSI_FILE_IMMEDIATE); - } - -+static type_t -+get_ftype(struct fd3_compile_context *ctx) -+{ -+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; -+} -+ -+static type_t -+get_utype(struct fd3_compile_context *ctx) -+{ -+ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; -+} -+ -+static unsigned -+src_swiz(struct tgsi_src_register *src, int chan) -+{ -+ switch (chan) { -+ case 0: return src->SwizzleX; -+ case 1: return src->SwizzleY; -+ case 2: return src->SwizzleZ; -+ case 3: return src->SwizzleW; -+ } -+ assert(0); -+ return 0; -+} -+ - /* for instructions that cannot take a const register as src, if needed - * generate a move to temporary gpr: - */ - static struct tgsi_src_register * - get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) - { -- if (is_const(src)) { -- static struct tgsi_dst_register tmp_dst; -- struct tgsi_src_register *tmp_src = -- get_internal_temp(ctx, &tmp_dst); -- create_mov(ctx, &tmp_dst, src); -- src = tmp_src; -- } -- return src; -+ struct tgsi_dst_register tmp_dst; -+ struct tgsi_src_register *tmp_src; -+ -+ compile_assert(ctx, is_const(src)); -+ -+ tmp_src = get_internal_temp(ctx, &tmp_dst); -+ -+ create_mov(ctx, &tmp_dst, src); -+ -+ return tmp_src; - } - - static void -@@ -418,31 +445,6 @@ get_immediate(struct fd3_compile_context *ctx, - reg->SwizzleW = swiz2tgsi[swiz]; - } - --static type_t --get_ftype(struct fd3_compile_context *ctx) --{ -- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; --} -- --static type_t --get_utype(struct fd3_compile_context *ctx) --{ -- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; --} -- --static unsigned --src_swiz(struct tgsi_src_register *src, int chan) --{ -- switch (chan) { -- case 0: return src->SwizzleX; -- case 1: return src->SwizzleY; -- case 2: return src->SwizzleZ; -- case 3: return src->SwizzleW; -- } -- assert(0); -- return 0; --} -- - static void - create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -@@ -463,7 +465,6 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, - ir3_instr_create(ctx->ir, 0, OPC_NOP); - } - } -- - } - - static void -@@ -584,6 +585,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - * native instructions: - */ - -+static inline void -+get_swiz(unsigned *swiz, struct tgsi_src_register *src) -+{ -+ swiz[0] = src->SwizzleX; -+ swiz[1] = src->SwizzleY; -+ swiz[2] = src->SwizzleZ; -+ swiz[3] = src->SwizzleW; -+} -+ - static void - trans_dotp(const struct instr_translater *t, - struct fd3_compile_context *ctx, -@@ -595,34 +605,31 @@ trans_dotp(const struct instr_translater *t, - struct tgsi_dst_register *dst = &inst->Dst[0].Register; - struct tgsi_src_register *src0 = &inst->Src[0].Register; - struct tgsi_src_register *src1 = &inst->Src[1].Register; -- unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW }; -- unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW }; -+ unsigned swiz0[4]; -+ unsigned swiz1[4]; - opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; - unsigned n = t->arg; /* number of components */ -- unsigned i; -+ unsigned i, swapped = 0; - - tmp_src = get_internal_temp_repl(ctx, &tmp_dst); - -- /* Blob compiler never seems to use a const in src1 position for -- * mad.*, although there does seem (according to disassembler -- * hidden in libllvm-a3xx.so) to be a bit to indicate that src1 -- * is a const. Not sure if this is a hw bug, or simply that the -- * disassembler lies. -+ /* in particular, can't handle const for src1 for cat3/mad: - */ - if (is_const(src1)) { -- -- /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: -- */ -- for (i = 0; i < 4; i++) -- swiz1[i] = i; -- -- /* the first mul.f will clobber tmp.x, but that is ok -- * because after that point we no longer need tmp.x: -- */ -- create_mov(ctx, &tmp_dst, src1); -- src1 = tmp_src; -+ if (!is_const(src0)) { -+ struct tgsi_src_register *tmp; -+ tmp = src0; -+ src0 = src1; -+ src1 = tmp; -+ swapped = 1; -+ } else { -+ src0 = get_unconst(ctx, src0); -+ } - } - -+ get_swiz(swiz0, src0); -+ get_swiz(swiz1, src1); -+ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src0, swiz0[0]); -@@ -640,22 +647,20 @@ trans_dotp(const struct instr_translater *t, - - /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ - if (t->tgsi_opc == TGSI_OPCODE_DPH) { -- ir3_instr_create(ctx->ir, 0, OPC_NOP); -+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1; - - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); - add_dst_reg(ctx, instr, &tmp_dst, 0); -- add_src_reg(ctx, instr, src1, swiz1[i]); -+ if (swapped) -+ add_src_reg(ctx, instr, src0, swiz0[i]); -+ else -+ add_src_reg(ctx, instr, src1, swiz1[i]); - add_src_reg(ctx, instr, tmp_src, 0); - - n++; - } - -- ir3_instr_create(ctx->ir, 0, OPC_NOP); -- -- /* pad out to multiple of 4 scalar instructions: */ -- for (i = 2 * n; i % 4; i++) { -- ir3_instr_create(ctx->ir, 0, OPC_NOP); -- } -+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; - - create_mov(ctx, dst, tmp_src); - } -@@ -670,6 +675,11 @@ trans_lrp(const struct instr_translater *t, - struct tgsi_dst_register tmp_dst1, tmp_dst2; - struct tgsi_src_register *tmp_src1, *tmp_src2; - struct tgsi_src_register tmp_const; -+ struct tgsi_src_register *src0 = &inst->Src[0].Register; -+ struct tgsi_src_register *src1 = &inst->Src[1].Register; -+ -+ if (is_const(src0) && is_const(src1)) -+ src0 = get_unconst(ctx, src0); - - tmp_src1 = get_internal_temp(ctx, &tmp_dst1); - tmp_src2 = get_internal_temp(ctx, &tmp_dst2); -@@ -678,15 +688,12 @@ trans_lrp(const struct instr_translater *t, - - /* tmp1 = (a * b) */ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); -- vectorize(ctx, instr, &tmp_dst1, 2, -- &inst->Src[0].Register, 0, -- &inst->Src[1].Register, 0); -+ vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0); - - /* tmp2 = (1 - a) */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); -- vectorize(ctx, instr, &tmp_dst2, 2, -- &tmp_const, 0, -- &inst->Src[0].Register, IR3_REG_NEGATE); -+ vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0, -+ src0, IR3_REG_NEGATE); - - /* tmp2 = tmp2 * c */ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); -@@ -930,10 +937,6 @@ trans_cmp(const struct instr_translater *t, - return; - } - -- /* NOTE: seems blob compiler will move a const to a gpr if both -- * src args to cmps.f are const. Need to check if this applies -- * to other instructions.. -- */ - if (is_const(a0) && is_const(a1)) - a0 = get_unconst(ctx, a0); - -@@ -1041,6 +1044,9 @@ trans_if(const struct instr_translater *t, - - get_immediate(ctx, &constval, fui(0.0)); - -+ if (is_const(src)) -+ src = get_unconst(ctx, src); -+ - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); - ir3_reg_create(instr, regid(REG_P0, 0), 0); - add_src_reg(ctx, instr, src, src->SwizzleX); -@@ -1122,11 +1128,11 @@ instr_cat2(const struct instr_translater *t, - struct tgsi_full_instruction *inst) - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); -+ struct tgsi_src_register *src0 = &inst->Src[0].Register; -+ struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - unsigned src0_flags = 0; - -- instr = ir3_instr_create(ctx->ir, 2, t->opc); -- - switch (t->tgsi_opc) { - case TGSI_OPCODE_ABS: - src0_flags = IR3_REG_ABS; -@@ -1149,41 +1155,65 @@ instr_cat2(const struct instr_translater *t, - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ -- vectorize(ctx, instr, dst, 1, -- &inst->Src[0].Register, src0_flags); -+ instr = ir3_instr_create(ctx->ir, 2, t->opc); -+ vectorize(ctx, instr, dst, 1, src0, src0_flags); - break; - default: -- vectorize(ctx, instr, dst, 2, -- &inst->Src[0].Register, src0_flags, -- &inst->Src[1].Register, 0); -+ if (is_const(src0) && is_const(src1)) -+ src0 = get_unconst(ctx, src0); -+ -+ instr = ir3_instr_create(ctx->ir, 2, t->opc); -+ vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0); - break; - } - - put_dst(ctx, inst, dst); - } - -+static bool is_mad(opc_t opc) -+{ -+ switch (opc) { -+ case OPC_MAD_U16: -+ case OPC_MADSH_U16: -+ case OPC_MAD_S16: -+ case OPC_MADSH_M16: -+ case OPC_MAD_U24: -+ case OPC_MAD_S24: -+ case OPC_MAD_F16: -+ case OPC_MAD_F32: -+ return true; -+ default: -+ return false; -+ } -+} -+ - static void - instr_cat3(const struct instr_translater *t, - struct fd3_compile_context *ctx, - struct tgsi_full_instruction *inst) - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); -- struct tgsi_src_register *src1; -+ struct tgsi_src_register *src0 = &inst->Src[0].Register; -+ struct tgsi_src_register *src1 = &inst->Src[1].Register; - struct ir3_instruction *instr; - -- /* Blob compiler never seems to use a const in src1 position.. -- * although there does seem (according to disassembler hidden -- * in libllvm-a3xx.so) to be a bit to indicate that src1 is a -- * const. Not sure if this is a hw bug, or simply that the -- * disassembler lies. -+ /* in particular, can't handle const for src1 for cat3.. -+ * for mad, we can swap first two src's if needed: - */ -- src1 = get_unconst(ctx, &inst->Src[1].Register); -+ if (is_const(src1)) { -+ if (is_mad(t->opc) && !is_const(src0)) { -+ struct tgsi_src_register *tmp; -+ tmp = src0; -+ src0 = src1; -+ src1 = tmp; -+ } else { -+ src0 = get_unconst(ctx, src0); -+ } -+ } - - instr = ir3_instr_create(ctx->ir, 3, - ctx->so->half_precision ? t->hopc : t->opc); -- vectorize(ctx, instr, dst, 3, -- &inst->Src[0].Register, 0, -- src1, 0, -+ vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, - &inst->Src[2].Register, 0); - put_dst(ctx, inst, dst); - } -@@ -1194,11 +1224,12 @@ instr_cat4(const struct instr_translater *t, - struct tgsi_full_instruction *inst) - { - struct tgsi_dst_register *dst = get_dst(ctx, inst); -- struct tgsi_src_register *src; -+ struct tgsi_src_register *src = &inst->Src[0].Register; - struct ir3_instruction *instr; - - /* seems like blob compiler avoids const as src.. */ -- src = get_unconst(ctx, &inst->Src[0].Register); -+ if (is_const(src)) -+ src = get_unconst(ctx, src); - - ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; - instr = ir3_instr_create(ctx->ir, 4, t->opc); --- -1.8.4.2 - diff --git a/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch b/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch deleted file mode 100644 index 279b118..0000000 --- a/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Tue, 27 Aug 2013 19:24:53 -0400 -Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better - -We need to set the flag on all the .xyzw components that are written by -the instruction, not just on .x. Otherwise a later use of rN.y (for -example) will not trigger the appropriate sync bit to be set. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++-------- - 1 file changed, 34 insertions(+), 16 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -index dcdd2d9..5115411 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c -@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg) - return num; - } - --static void regmask_set(regmask_t regmask, struct ir3_register *reg) -+static void regmask_set(regmask_t regmask, struct ir3_register *reg, -+ unsigned wrmask) - { -- unsigned idx = regmask_idx(reg); -- regmask[idx / 8] |= 1 << (idx % 8); -+ unsigned i; -+ for (i = 0; i < 4; i++) { -+ if (wrmask & (1 << i)) { -+ unsigned idx = regmask_idx(reg) + i; -+ regmask[idx / 8] |= 1 << (idx % 8); -+ } -+ } - } - - static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) -@@ -216,6 +222,24 @@ struct instr_translater { - unsigned arg; - }; - -+static unsigned -+src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) -+{ -+ unsigned flags = 0; -+ -+ if (regmask_get(ctx->needs_ss, reg)) { -+ flags |= IR3_INSTR_SS; -+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); -+ } -+ -+ if (regmask_get(ctx->needs_sy, reg)) { -+ flags |= IR3_INSTR_SY; -+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); -+ } -+ -+ return flags; -+} -+ - static struct ir3_register * - add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - const struct tgsi_dst_register *dst, unsigned chan) -@@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - - reg = ir3_reg_create(instr, regid(num, chan), flags); - -- if (regmask_get(ctx->needs_ss, reg)) { -- instr->flags |= IR3_INSTR_SS; -- memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); -- } -- -- if (regmask_get(ctx->needs_sy, reg)) { -- instr->flags |= IR3_INSTR_SY; -- memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); -- } -+ instr->flags |= src_flags(ctx, reg); - - return reg; - } -@@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, - cur->regs[j+1]->num = - regid(cur->regs[j+1]->num >> 2, - src_swiz(src, i)); -+ cur->flags |= src_flags(ctx, cur->regs[j+1]); - } - va_end(ap); - } -@@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); - r = add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, src0, src0->SwizzleX); -- regmask_set(ctx->needs_ss, r); -+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); - - /* mul.f Rtmp, Rtmp, Rsrc1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); -@@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t, - instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); - r = add_dst_reg(ctx, instr, &tmp_dst, 0); - add_src_reg(ctx, instr, tmp_src, 0); -- regmask_set(ctx->needs_ss, r); -+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); - - create_mov(ctx, dst, tmp_src); - } -@@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t, - - add_src_reg(ctx, instr, coord, coord->SwizzleX); - -- regmask_set(ctx->needs_sy, r); -+ regmask_set(ctx->needs_sy, r, r->wrmask); - } - - /* -@@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t, - - vectorize(ctx, instr, dst, 1, src, 0); - -- regmask_set(ctx->needs_ss, instr->regs[0]); -+ regmask_set(ctx->needs_ss, instr->regs[0], -+ inst->Dst[0].Register.WriteMask); - - put_dst(ctx, inst, dst); - } --- -1.8.4.2 - diff --git a/0017-freedreno-updates-for-msm-drm-kms-driver.patch b/0017-freedreno-updates-for-msm-drm-kms-driver.patch deleted file mode 100644 index 2fe4f05..0000000 --- a/0017-freedreno-updates-for-msm-drm-kms-driver.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 Mon Sep 17 00:00:00 2001 -From: Rob Clark -Date: Thu, 29 Aug 2013 17:24:33 -0400 -Subject: [PATCH 17/17] freedreno: updates for msm drm/kms driver - -There where some small API tweaks in libdrm_freedreno to enable support -for msm drm/kms driver. - -Signed-off-by: Rob Clark ---- - src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 4 +-- - src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 ++--- - src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 14 +++++------ - src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 +++--- - src/gallium/drivers/freedreno/a3xx/fd3_program.c | 4 +-- - src/gallium/drivers/freedreno/freedreno_draw.c | 2 +- - src/gallium/drivers/freedreno/freedreno_resource.c | 18 ++++++++++++-- - src/gallium/drivers/freedreno/freedreno_util.h | 29 +++++++++++++++------- - 8 files changed, 55 insertions(+), 30 deletions(-) - -diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c -index b03390e..35511ba 100644 ---- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c -+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c -@@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx, - OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); - - OUT_RING(ring, sampler->tex0 | view->tex0); -- OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt); -+ OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0); - OUT_RING(ring, view->tex2); - OUT_RING(ring, sampler->tex3 | view->tex3); - OUT_RING(ring, sampler->tex4); -@@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, - OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); - for (i = 0; i < n; i++) { - struct fd_resource *rsc = fd_resource(vbufs[i].prsc); -- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3); -+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0); - OUT_RING (ring, vbufs[i].size); - } - } -diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -index 93695bc..89f5a4d 100644 ---- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c -@@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base, - OUT_PKT3(ring, CP_SET_CONSTANT, 5); - OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); - OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ -- OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */ -+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */ - OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */ - OUT_RING(ring, /* RB_COPY_DEST_INFO */ - A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | -@@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base, - A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | - A2XX_SQ_TEX_0_PITCH(rsc->pitch)); - OUT_RELOC(ring, rsc->bo, 0, -- fd2_pipe2surface(psurf->format) | 0x800); -+ fd2_pipe2surface(psurf->format) | 0x800, 0); - OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | - A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); - OUT_RING(ring, 0x01000000 | // XXX -@@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - y0 = ((float)yoff) / ((float)pfb->height); - y1 = ((float)yoff + bin_h) / ((float)pfb->height); - OUT_PKT3(ring, CP_MEM_WRITE, 9); -- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0); -+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0); - OUT_RING(ring, fui(x0)); - OUT_RING(ring, fui(y0)); - OUT_RING(ring, fui(x1)); -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -index 5ffd561..5e58618 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c -@@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring, - if (prsc) { - struct fd_bo *bo = fd_resource(prsc)->bo; - OUT_RELOC(ring, bo, offset, -- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); -+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); - } else { - OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); -@@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring, - for (i = 0; i < tex->num_textures; i++) { - struct fd3_pipe_sampler_view *view = - fd3_pipe_sampler_view(tex->textures[i]); -- OUT_RELOC(ring, view->tex_resource->bo, 0, 0); -+ OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0); - /* I think each entry is a ptr to mipmap level.. for now, just - * pad w/ null's until I get around to actually implementing - * mipmap support.. -@@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf - CP_LOAD_STATE_0_NUM_UNIT(1)); - OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | - CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); -- OUT_RELOC(ring, rsc->bo, 0, 0); -+ OUT_RELOC(ring, rsc->bo, 0, 0, 0); - } - - void -@@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | - A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) | - A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); -- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0); -+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0); - - OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1); - OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | -@@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx) - - OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3); - OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ -- OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */ -+ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */ - OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ - - OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3); - OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ -- OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */ -+ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */ - OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ - - OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); -@@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx) - OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */ - - OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); -- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */ -+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ - - OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); - OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -index b9d0580..8d2df47 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c -@@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, - if (bin_w || (i >= nr_bufs)) { - OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); - } else { -- OUT_RELOCS(ring, res->bo, 0, 0, -1); -+ OUT_RELOCW(ring, res->bo, 0, 0, -1); - } - - OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); -@@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, - OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | - A3XX_RB_COPY_CONTROL_MODE(mode) | - A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); -- OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ -+ OUT_RELOCW(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ - OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp)); - OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | - A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | -@@ -272,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, - y1 = ((float)yoff + bin_h) / ((float)pfb->height); - - OUT_PKT3(ring, CP_MEM_WRITE, 5); -- OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0); -+ OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0); - OUT_RING(ring, fui(x0)); - OUT_RING(ring, fui(y0)); - OUT_RING(ring, fui(x1)); -@@ -395,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx) - A3XX_VSC_PIPE_CONFIG_Y(0) | - A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) | - A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y)); -- OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ -+ OUT_RELOC(ring, bo, 0, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ - OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */ - - for (i = 1; i < 8; i++) { -diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c -index 259c2dd..c6c51b1 100644 ---- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c -+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c -@@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, - OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); - OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | - A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); -- OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */ -+ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ - #endif - - OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); -@@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, - OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); - OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | - A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen)); -- OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */ -+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ - #endif - - OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); -diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c -index d4f8d34..4a98ab4 100644 ---- a/src/gallium/drivers/freedreno/freedreno_draw.c -+++ b/src/gallium/drivers/freedreno/freedreno_draw.c -@@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info) - src_sel, idx_type, IGNORE_VISIBILITY)); - OUT_RING(ring, info->count); /* NumIndices */ - if (info->indexed) { -- OUT_RELOC(ring, idx_bo, idx_offset, 0); -+ OUT_RELOC(ring, idx_bo, idx_offset, 0, 0); - OUT_RING (ring, idx_size); - } - } -diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c -index 1b1eaa5..3e051ea 100644 ---- a/src/gallium/drivers/freedreno/freedreno_resource.c -+++ b/src/gallium/drivers/freedreno/freedreno_resource.c -@@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, - struct pipe_transfer *ptrans) - { - struct fd_context *ctx = fd_context(pctx); -+ struct fd_resource *rsc = fd_resource(ptrans->resource); -+ if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) -+ fd_bo_cpu_fini(rsc->bo); - pipe_resource_reference(&ptrans->resource, NULL); - util_slab_free(&ctx->transfer_pool, ptrans); - } -@@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx, - struct fd_resource *rsc = fd_resource(prsc); - struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool); - enum pipe_format format = prsc->format; -+ uint32_t op = 0; - char *buf; - - if (!ptrans) - return NULL; - -- /* util_slap_alloc() doesn't zero: */ -+ /* util_slab_alloc() doesn't zero: */ - memset(ptrans, 0, sizeof(*ptrans)); - - pipe_resource_reference(&ptrans->resource, prsc); -@@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, - ptrans->layer_stride = ptrans->stride; - - /* some state trackers (at least XA) don't do this.. */ -- fd_resource_transfer_flush_region(pctx, ptrans, box); -+ if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) -+ fd_resource_transfer_flush_region(pctx, ptrans, box); - - buf = fd_bo_map(rsc->bo); - if (!buf) { -@@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx, - return NULL; - } - -+ if (usage & PIPE_TRANSFER_READ) -+ op |= DRM_FREEDRENO_PREP_READ; -+ -+ if (usage & PIPE_TRANSFER_WRITE) -+ op |= DRM_FREEDRENO_PREP_WRITE; -+ -+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) -+ fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); -+ - *pptrans = ptrans; - - return buf + -diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h -index 9f10686..7bbbe80 100644 ---- a/src/gallium/drivers/freedreno/freedreno_util.h -+++ b/src/gallium/drivers/freedreno/freedreno_util.h -@@ -104,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data) - - static inline void - OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, -- uint32_t offset, uint32_t or) -+ uint32_t offset, uint32_t or, int32_t shift) - { - if (LOG_DWORDS) { -- DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring, -- (uint32_t)(ring->cur - ring->last_start), bo, offset); -+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, -+ (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); - } -- fd_ringbuffer_emit_reloc(ring, bo, offset, or); -+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){ -+ .bo = bo, -+ .flags = FD_RELOC_READ, -+ .offset = offset, -+ .or = or, -+ .shift = shift, -+ }); - } - --/* shifted reloc: */ - static inline void --OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo, -+OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) - { - if (LOG_DWORDS) { -- DBG("ring[%p]: OUT_RELOCS %04x: %p+%u << %d", ring, -+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, - (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); - } -- fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift); -+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){ -+ .bo = bo, -+ .flags = FD_RELOC_READ | FD_RELOC_WRITE, -+ .offset = offset, -+ .or = or, -+ .shift = shift, -+ }); - } - - static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) -@@ -155,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start, - struct fd_ringmarker *end) - { - OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); -- fd_ringbuffer_emit_reloc_ring(ring, start); -+ fd_ringbuffer_emit_reloc_ring(ring, start, end); - OUT_RING(ring, fd_ringmarker_dwords(start, end)); - } - --- -1.8.4.2 - diff --git a/mesa.spec b/mesa.spec index 0619a6c..e941de1 100644 --- a/mesa.spec +++ b/mesa.spec @@ -48,12 +48,12 @@ %define _default_patch_fuzz 2 -%define gitdate 20131114 +%define gitdate 20131128 #% define snapshot Summary: Mesa graphics libraries Name: mesa -Version: 9.2.3 +Version: 9.2.4 Release: 1.%{gitdate}%{?dist} License: MIT Group: System Environment/Libraries @@ -77,25 +77,6 @@ Patch15: mesa-9.2-hardware-float.patch Patch16: mesa-9.2-no-useless-vdpau.patch Patch20: mesa-9.2-evergreen-big-endian.patch -# https://bugs.freedesktop.org/show_bug.cgi?id=71573 -Patch21: 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch -Patch22: 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch -Patch23: 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch -Patch24: 0004-freedreno-update-register-headers.patch -Patch25: 0005-freedreno-a3xx-some-texture-fixes.patch -Patch26: 0006-freedreno-a3xx-compiler-fix-CMP.patch -Patch27: 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch -Patch28: 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch -Patch29: 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch -Patch30: 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch -Patch31: 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch -Patch32: 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch -Patch33: 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch -Patch34: 0014-freedreno-a3xx-don-t-leak-so-much.patch -Patch35: 0015-freedreno-a3xx-compiler-better-const-handling.patch -Patch36: 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch -Patch37: 0017-freedreno-updates-for-msm-drm-kms-driver.patch - BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} BuildRequires: kernel-headers @@ -321,24 +302,6 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1 %patch16 -p1 -b .vdpau %patch20 -p1 -b .egbe -%patch21 -p1 -%patch22 -p1 -%patch23 -p1 -%patch24 -p1 -%patch25 -p1 -%patch26 -p1 -%patch27 -p1 -%patch28 -p1 -%patch29 -p1 -%patch30 -p1 -%patch31 -p1 -%patch32 -p1 -%patch33 -p1 -%patch34 -p1 -%patch35 -p1 -%patch36 -p1 -%patch37 -p1 - %if 0%{with_private_llvm} sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac @@ -637,6 +600,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Thu Nov 28 2013 Igor Gnatenko - 9.2.4-1.20131128 +- 9.2.4 upstream release + * Thu Nov 14 2013 Igor Gnatenko - 9.2.3-1.20131114 - 9.2.3 upstream release diff --git a/sources b/sources index caf10db..f4bcce4 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -54f46fc070303e0d467779ab39103d58 mesa-20131114.tar.xz +0f501dfd50b6094774b0d564d745a285 mesa-20131128.tar.xz