1
0
forked from rpms/mesa

9.2.3 upstream release

Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
This commit is contained in:
Igor Gnatenko 2013-11-14 15:41:45 +04:00
parent ea1c6e950a
commit 8002493cec
20 changed files with 3736 additions and 3 deletions

1
.gitignore vendored
View File

@ -57,3 +57,4 @@ mesa-20100720.tar.bz2
/mesa-20130902.tar.xz
/mesa-20130919.tar.xz
/mesa-20131113.tar.xz
/mesa-20131114.tar.xz

View File

@ -0,0 +1,31 @@
From 2d844be97fd5b6b0f02a94d4bb194c0bd19de6f9 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 13 Jul 2013 13:07:46 -0400
Subject: [PATCH 01/17] freedreno/a3xx: fix color inversion on mem->gmem
restore
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index a7a4bf7..b8436c9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
0x40000000 | // XXX
- fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
- OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
+ fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
A3XX_TEX_CONST_1_WIDTH(psurf->width) |
A3XX_TEX_CONST_1_HEIGHT(psurf->height));
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
--
1.8.4.2

View File

@ -0,0 +1,32 @@
From b2a32254d65c356604bbffda6e771dca0509e9ed Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 13 Jul 2013 13:08:22 -0400
Subject: [PATCH 02/17] freedreno/a3xx: fix viewport on gmem->mem resolve
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 1cb170a..9050166 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
+ OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+ OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
--
1.8.4.2

View File

@ -0,0 +1,113 @@
From 8b167d34bebcc9aaf67838be71cc3272728d4fe1 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 29 May 2013 10:16:33 -0400
Subject: [PATCH 03/17] freedreno: add debug option to disable scissor
optimization
Useful for testing and debugging.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/freedreno_gmem.c | 26 +++++++++++++++---------
src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
src/gallium/drivers/freedreno/freedreno_util.h | 9 ++++----
3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 12633bd..197d1d9 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -71,7 +71,8 @@ calculate_tiles(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_scissor_state *scissor = &ctx->max_scissor;
- uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format);
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
uint32_t gmem_size = ctx->screen->gmemsize_bytes;
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
@@ -84,10 +85,17 @@ calculate_tiles(struct fd_context *ctx)
return;
}
- minx = scissor->minx & ~31; /* round down to multiple of 32 */
- miny = scissor->miny & ~31;
- width = scissor->maxx - minx;
- height = scissor->maxy - miny;
+ if (fd_mesa_debug & FD_DBG_DSCIS) {
+ minx = 0;
+ miny = 0;
+ width = pfb->width;
+ height = pfb->height;
+ } else {
+ minx = scissor->minx & ~31; /* round down to multiple of 32 */
+ miny = scissor->miny & ~31;
+ width = scissor->maxx - minx;
+ height = scissor->maxy - miny;
+ }
// TODO we probably could optimize this a bit if we know that
// Z or stencil is not enabled for any of the draw calls..
@@ -132,9 +140,7 @@ static void
render_tiles(struct fd_context *ctx)
{
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- uint32_t i, yoff = 0;
-
- yoff= gmem->miny;
+ uint32_t i, yoff = gmem->miny;
ctx->emit_tile_init(ctx);
@@ -143,13 +149,13 @@ render_tiles(struct fd_context *ctx)
uint32_t bh = gmem->bin_h;
/* clip bin height: */
- bh = MIN2(bh, gmem->height - yoff);
+ bh = MIN2(bh, gmem->miny + gmem->height - yoff);
for (j = 0; j < gmem->nbins_x; j++) {
uint32_t bw = gmem->bin_w;
/* clip bin width: */
- bw = MIN2(bw, gmem->width - xoff);
+ bw = MIN2(bw, gmem->minx + gmem->width - xoff);
DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
bh, yoff, bw, xoff);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 52d51c2..36ef8b0 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = {
{"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},
{"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"},
+ {"dscis", FD_DBG_DSCIS, "Disable scissor optimization"},
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index f18f0fe..b49cdfc 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -47,10 +47,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
enum adreno_stencil_op fd_stencil_op(unsigned op);
-#define FD_DBG_MSGS 0x1
-#define FD_DBG_DISASM 0x2
-#define FD_DBG_DCLEAR 0x4
-#define FD_DBG_DGMEM 0x8
+#define FD_DBG_MSGS 0x01
+#define FD_DBG_DISASM 0x02
+#define FD_DBG_DCLEAR 0x04
+#define FD_DBG_DGMEM 0x08
+#define FD_DBG_DSCIS 0x10
extern int fd_mesa_debug;
#define DBG(fmt, ...) \
--
1.8.4.2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
From 3da8868b5df98d8544091feeea7b6bb0f736324f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 5 Aug 2013 18:03:33 -0400
Subject: [PATCH 05/17] freedreno/a3xx: some texture fixes
Stop hard coding bits that indicate texture type (2d/3d/cube/etc).
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 25 +++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index ae08b8a..e56325b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
so->base = *cso;
so->texsamp0 =
+ COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
@@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx,
return so;
}
+static enum a3xx_tex_type
+tex_type(unsigned target)
+{
+ switch (target) {
+ default:
+ assert(0);
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return A3XX_TEX_1D;
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return A3XX_TEX_2D;
+ case PIPE_TEXTURE_3D:
+ return A3XX_TEX_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return A3XX_TEX_CUBE;
+ }
+}
+
static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
@@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
so->tex_resource = rsc;
so->texconst0 =
- 0x40000000 | /* ??? */
+ A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
cso->swizzle_b, cso->swizzle_a);
--
1.8.4.2

View File

@ -0,0 +1,45 @@
From 83e65320012f327d2e8f1573443b2e20f059e76f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 20 Aug 2013 13:46:30 -0400
Subject: [PATCH 06/17] freedreno/a3xx/compiler: fix CMP
The 1st src to add.s needs (r) flag (repeat), otherwise it will end up:
add.s dst.xyzw, tmp.xxxx -1
instead of:
add.s dst.xyzw, tmp.xyzw, -1
Also, if we are using a temporary dst to avoid clobbering one of the src
registers, we actually need to use that as the dst for the sel
instruction.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index eabe21c..07bede4 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -790,13 +790,13 @@ trans_cmp(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
instr->repeat = 3;
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
/* sel.{f32,f16} dst, src2, tmp, src1 */
instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
OPC_SEL_F16 : OPC_SEL_F32);
- vectorize(ctx, instr, &inst->Dst[0].Register, 3,
+ vectorize(ctx, instr, dst, 3,
&inst->Src[2].Register, 0,
&tmp_src, 0,
&inst->Src[1].Register, 0);
--
1.8.4.2

View File

@ -0,0 +1,98 @@
From c83387438633233ae6bcc55e1f4eaa2793ce7449 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 20 Aug 2013 13:51:35 -0400
Subject: [PATCH 07/17] freedreno/a3xx/compiler: handle saturate on dst
Sometimes things other than color dst need saturating, like if there is
a 'clamp(foo, 0.0, 1.0)'. So for saturated dst add the extra
instructions to fix up dst.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 07bede4..e2c7853 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -131,6 +131,11 @@ struct fd3_compile_context {
struct tgsi_src_register tmp_src;
};
+
+static void vectorize(struct fd3_compile_context *ctx,
+ struct ir3_instruction *instr, struct tgsi_dst_register *dst,
+ int nsrcs, ...);
+
static unsigned
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
const struct tgsi_token *tokens)
@@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
flags |= IR3_REG_CONST;
num = src->Index + ctx->base_reg[src->File];
break;
+ case TGSI_FILE_OUTPUT:
+ /* NOTE: we should only end up w/ OUTPUT file for things like
+ * clamp()'ing saturated dst instructions
+ */
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
num = src->Index + ctx->base_reg[src->File];
@@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
}
+static void
+create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+ struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
+{
+ struct ir3_instruction *instr;
+ struct tgsi_src_register src;
+
+ src_from_dst(&src, dst);
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
+
+ instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+ vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
+}
+
+static void
+create_clamp_imm(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *dst,
+ uint32_t minval, uint32_t maxval)
+{
+ struct tgsi_src_register minconst, maxconst;
+
+ get_immediate(ctx, &minconst, minval);
+ get_immediate(ctx, &maxconst, maxval);
+
+ create_clamp(ctx, dst, &minconst, &maxconst);
+}
+
static struct tgsi_dst_register *
get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
{
@@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx)
assert(0);
}
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_ZERO_ONE:
+ create_clamp_imm(ctx, &inst->Dst[0].Register,
+ fui(0.0), fui(1.0));
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ create_clamp_imm(ctx, &inst->Dst[0].Register,
+ fui(-1.0), fui(1.0));
+ break;
+ }
+
break;
}
default:
--
1.8.4.2

View File

@ -0,0 +1,59 @@
From 5394a872f30022f64e6b2b58ef983b1fe5f6c08d Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 20 Aug 2013 13:54:01 -0400
Subject: [PATCH 08/17] freedreno/a3xx/compiler: use max_reg rather than
file_count
Our current (rather naive) register assignment is based on mapping
different register files (INPUT, OUTPUT, TEMP, CONST, etc) based on the
max register index of the preceding file. But in some cases, the lowest
used register in a file might not be zero. In which case
file_count[file] != file_max[file] + 1.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index e2c7853..dc5c873 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -159,19 +159,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
/* Immediates go after constants: */
ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
ctx->base_reg[TGSI_FILE_IMMEDIATE] =
- ctx->info.file_count[TGSI_FILE_CONSTANT];
+ ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
/* Temporaries after outputs after inputs: */
ctx->base_reg[TGSI_FILE_INPUT] = 0;
ctx->base_reg[TGSI_FILE_OUTPUT] =
- ctx->info.file_count[TGSI_FILE_INPUT];
+ ctx->info.file_max[TGSI_FILE_INPUT] + 1;
ctx->base_reg[TGSI_FILE_TEMPORARY] =
- ctx->info.file_count[TGSI_FILE_INPUT] +
- ctx->info.file_count[TGSI_FILE_OUTPUT];
+ ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
+ ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
- ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
- ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
+ ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+ ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, tokens);
if (ret != TGSI_PARSE_OK)
@@ -309,7 +309,7 @@ get_internal_temp(struct fd3_compile_context *ctx,
/* assign next temporary: */
n = ctx->num_internal_temps++;
- tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
+ tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
src_from_dst(tmp_src, tmp_dst);
}
--
1.8.4.2

View File

@ -0,0 +1,104 @@
From f3a7e28fe47ec547c1c9b561b04af208ae2f0f04 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 20 Aug 2013 13:57:22 -0400
Subject: [PATCH 09/17] freedreno/a3xx/compiler: cat4 cannot use const reg as
src
Category 4 instructions (rsq, rcp, sqrt, etc) seem to be unable to take
a const register as src. In these cases we need to move the src to a
temporary gpr first.
This is the second case of such a restriction, where the instruction
encoding appears to support a const src, but in fact the hw appears to
ignore that bit. So split things out into a helper that can be re-used
for any instructions which have this limitation.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 37 +++++++++++++++++------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index dc5c873..772c7d2 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -135,6 +135,8 @@ struct fd3_compile_context {
static void vectorize(struct fd3_compile_context *ctx,
struct ir3_instruction *instr, struct tgsi_dst_register *dst,
int nsrcs, ...);
+static void create_mov(struct fd3_compile_context *ctx,
+ struct tgsi_dst_register *dst, struct tgsi_src_register *src);
static unsigned
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
@@ -374,6 +376,23 @@ get_immediate(struct fd3_compile_context *ctx,
reg->SwizzleW = swiz2tgsi[swiz];
}
+/* for instructions that cannot take a const register as src, if needed
+ * generate a move to temporary gpr:
+ */
+static struct tgsi_src_register *
+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
+ struct tgsi_src_register *tmp_src)
+{
+ static struct tgsi_dst_register tmp_dst;
+ if ((src->File == TGSI_FILE_CONSTANT) ||
+ (src->File == TGSI_FILE_IMMEDIATE)) {
+ get_internal_temp(ctx, &tmp_dst, tmp_src);
+ create_mov(ctx, &tmp_dst, src);
+ src = tmp_src;
+ }
+ return src;
+}
+
static type_t
get_type(struct fd3_compile_context *ctx)
{
@@ -1027,8 +1046,7 @@ instr_cat3(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src1 = &inst->Src[1].Register;
- struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *src1;
struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
@@ -1038,12 +1056,7 @@ instr_cat3(const struct instr_translater *t,
* const. Not sure if this is a hw bug, or simply that the
* disassembler lies.
*/
- if ((src1->File == TGSI_FILE_CONSTANT) ||
- (src1->File == TGSI_FILE_IMMEDIATE)) {
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
- create_mov(ctx, &tmp_dst, src1);
- src1 = &tmp_src;
- }
+ src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
instr = ir3_instr_create(ctx->ir, 3,
ctx->so->half_precision ? t->hopc : t->opc);
@@ -1060,13 +1073,17 @@ instr_cat4(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src;
+ struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
+ /* seems like blob compiler avoids const as src.. */
+ src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
+
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
instr = ir3_instr_create(ctx->ir, 4, t->opc);
- vectorize(ctx, instr, dst, 1,
- &inst->Src[0].Register, 0);
+ vectorize(ctx, instr, dst, 1, src, 0);
regmask_set(ctx->needs_ss, instr->regs[0]);
--
1.8.4.2

View File

@ -0,0 +1,216 @@
From 12da4c1a6aa4b2a9cc337f669986a63c59fc3095 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Wed, 21 Aug 2013 13:20:05 -0400
Subject: [PATCH 10/17] freedreno: fix segfault when no color buffer bound
Don't crash when no color buffer bound. Something caught when starting
to run piglit, fixes a hanful of piglit tests.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 +++---
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 15 +++++++++++----
src/gallium/drivers/freedreno/freedreno_context.c | 3 ++-
src/gallium/drivers/freedreno/freedreno_draw.c | 4 ++--
src/gallium/drivers/freedreno/freedreno_gmem.c | 18 +++++++++++-------
src/gallium/drivers/freedreno/freedreno_state.c | 2 +-
src/gallium/drivers/freedreno/freedreno_util.h | 10 ++++++++++
7 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index e239eed..93695bc 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx)
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
uint32_t reg;
OUT_PKT3(ring, CP_SET_CONSTANT, 4);
@@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
@@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
{
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- enum pipe_format format = pfb->cbufs[0]->format;
+ enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 9050166..b9d0580 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -214,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
}, 1);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- uint32_t base = depth_base(&ctx->gmem) *
- fd_resource(pfb->cbufs[0]->texture)->cpp;
+ uint32_t base = 0;
+ if (pfb->cbufs[0]) {
+ struct fd_resource *rsc =
+ fd_resource(pfb->cbufs[0]->texture);
+ base = depth_base(&ctx->gmem) * rsc->cpp;
+ }
emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
}
@@ -410,8 +414,11 @@ static void
fd3_emit_sysmem_prep(struct fd_context *ctx)
{
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture);
struct fd_ringbuffer *ring = ctx->ring;
+ uint32_t pitch = 0;
+
+ if (pfb->cbufs[0])
+ pitch = fd_resource(pfb->cbufs[0]->texture)->pitch;
fd3_emit_restore(ctx);
@@ -422,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
fd3_emit_rbrc_tile_state(ring,
- A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch));
+ A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 44d525b..1d03351 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx)
ctx->gmem_reason = 0;
ctx->num_draws = 0;
- fd_resource(pfb->cbufs[0]->texture)->dirty = false;
+ if (pfb->cbufs[0])
+ fd_resource(pfb->cbufs[0]->texture)->dirty = false;
if (pfb->zsbuf)
fd_resource(pfb->zsbuf->texture)->dirty = false;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index b02b8b9..d4f8d34 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
}
DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
ctx->clear(ctx, buffers, color, depth, stencil);
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 197d1d9..3d959c6 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -72,12 +72,15 @@ calculate_tiles(struct fd_context *ctx)
struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct pipe_scissor_state *scissor = &ctx->max_scissor;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
uint32_t gmem_size = ctx->screen->gmemsize_bytes;
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
uint32_t max_width = 992;
+ uint32_t cpp = 4;
+
+ if (pfb->cbufs[0])
+ cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
if ((gmem->cpp == cpp) &&
!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
@@ -211,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
if (sysmem) {
DBG("rendering sysmem (%s/%s)",
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
render_sysmem(ctx);
} else {
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
- util_format_name(pfb->cbufs[0]->format),
- pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
calculate_tiles(ctx);
+ DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
+ util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+ util_format_short_name(pipe_surface_format(pfb->zsbuf)));
render_tiles(ctx);
}
@@ -231,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
/* update timestamps on render targets: */
timestamp = fd_ringbuffer_timestamp(ctx->ring);
- fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
+ if (pfb->cbufs[0])
+ fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
if (pfb->zsbuf)
fd_resource(pfb->zsbuf->texture)->timestamp = timestamp;
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 2f5d52c..f5290a9 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
unsigned i;
DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
- cso->cbufs[0], cso->zsbuf);
+ framebuffer->cbufs[0], framebuffer->zsbuf);
fd_context_render(pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 22857d2..9f10686 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -33,6 +33,7 @@
#include <freedreno_ringbuffer.h>
#include "pipe/p_format.h"
+#include "pipe/p_state.h"
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_half.h"
@@ -79,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
(1 << 14);
}
+
+static inline enum pipe_format
+pipe_surface_format(struct pipe_surface *psurf)
+{
+ if (!psurf)
+ return PIPE_FORMAT_NONE;
+ return psurf->format;
+}
+
#define LOG_DWORDS 0
--
1.8.4.2

View File

@ -0,0 +1,172 @@
From c726a6a907f119dfc4fb1c26fef7babf51dc1dea Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 24 Aug 2013 12:56:22 -0400
Subject: [PATCH 11/17] freedreno/a3xx/compiler: make compiler errors more
useful
We probably should get rid of assert() entirely, but at this stage it is
more useful for things to crash where we can catch it in a debugger.
With compile_error() we have a single place to set an error flag (to
bail out and return an error on the next instruction) so that will be a
small change later when enough of the compiler bugs are sorted.
But re-arrange/cleanup the error/assert stuff so we at least get a dump
of the TGSI that triggered it. So we see some useful output in piglit
logs.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 +++++++++++++++--------
src/gallium/drivers/freedreno/a3xx/ir-a3xx.h | 3 +-
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 772c7d2..e6c5bb7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -185,6 +185,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
}
static void
+compile_error(struct fd3_compile_context *ctx, const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ _debug_vprintf(format, ap);
+ va_end(ap);
+ tgsi_dump(ctx->tokens, 0);
+ assert(0);
+}
+
+#define compile_assert(ctx, cond) do { \
+ if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
+ } while (0)
+
+static void
compile_free(struct fd3_compile_context *ctx)
{
tgsi_parse_free(&ctx->parser);
@@ -212,9 +227,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
num = dst->Index + ctx->base_reg[dst->File];
break;
default:
- DBG("unsupported dst register file: %s",
+ compile_error(ctx, "unsupported dst register file: %s\n",
tgsi_file_name(dst->File));
- assert(0);
break;
}
@@ -250,9 +264,8 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
num = src->Index + ctx->base_reg[src->File];
break;
default:
- DBG("unsupported src register file: %s",
+ compile_error(ctx, "unsupported src register file: %s\n",
tgsi_file_name(src->File));
- assert(0);
break;
}
@@ -329,6 +342,13 @@ get_internal_temp_repl(struct fd3_compile_context *ctx,
tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
}
+static inline bool
+is_const(struct tgsi_src_register *src)
+{
+ return (src->File == TGSI_FILE_CONSTANT) ||
+ (src->File == TGSI_FILE_IMMEDIATE);
+}
+
static void
get_immediate(struct fd3_compile_context *ctx,
struct tgsi_src_register *reg, uint32_t val)
@@ -578,8 +598,7 @@ trans_dotp(const struct instr_translater *t,
* is a const. Not sure if this is a hw bug, or simply that the
* disassembler lies.
*/
- if ((src1->File == TGSI_FILE_IMMEDIATE) ||
- (src1->File == TGSI_FILE_CONSTANT)) {
+ if (is_const(src1)) {
/* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
*/
@@ -768,7 +787,7 @@ trans_samp(const struct instr_translater *t,
flags |= IR3_INSTR_P;
break;
default:
- assert(0);
+ compile_assert(ctx, 0);
break;
}
@@ -1187,7 +1206,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
unsigned name = decl->Semantic.Name;
unsigned i;
- assert(decl->Declaration.Semantic); // TODO is this ever not true?
+ compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true?
DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX
@@ -1207,9 +1226,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->outputs[so->outputs_count++].regid = regid(i + base, 0);
break;
default:
- DBG("unknown VS semantic name: %s",
+ compile_error(ctx, "unknown VS semantic name: %s\n",
tgsi_semantic_names[name]);
- assert(0);
}
} else {
switch (name) {
@@ -1217,9 +1235,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->color_regid = regid(decl->Range.First + base, 0);
break;
default:
- DBG("unknown VS semantic name: %s",
+ compile_error(ctx, "unknown VS semantic name: %s\n",
tgsi_semantic_names[name]);
- assert(0);
}
}
}
@@ -1278,10 +1295,8 @@ compile_instructions(struct fd3_compile_context *ctx)
t->fxn(t, ctx, inst);
ctx->num_internal_temps = 0;
} else {
- debug_printf("unknown TGSI opc: %s\n",
+ compile_error(ctx, "unknown TGSI opc: %s\n",
tgsi_get_opcode_name(opc));
- tgsi_dump(ctx->tokens, 0);
- assert(0);
}
switch (inst->Instruction.Saturate) {
@@ -1319,6 +1334,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so,
so->ir = ir3_shader_create();
+ assert(so->ir);
+
so->color_regid = regid(63,0);
so->pos_regid = regid(63,0);
so->psize_regid = regid(63,0);
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
index 2fedc7b..61c01a7 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
@@ -166,8 +166,7 @@ struct ir3_instruction {
};
};
-/* this is just large to cope w/ the large test *.asm: */
-#define MAX_INSTRS 10240
+#define MAX_INSTRS 1024
struct ir3_shader {
unsigned instrs_count;
--
1.8.4.2

View File

@ -0,0 +1,420 @@
From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 24 Aug 2013 13:00:07 -0400
Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup
It seems there are a number of cases where instructions have limitations
about taking reading src's from const register file, so make
get_unconst() a bit easier to use.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++----------
1 file changed, 71 insertions(+), 61 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index e6c5bb7..b5cdda8 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -91,6 +91,7 @@ struct fd3_compile_context {
unsigned next_inloc;
unsigned num_internal_temps;
+ struct tgsi_src_register internal_temps[6];
/* track registers which need to synchronize w/ "complex alu" cat3
* instruction pipeline:
@@ -128,7 +129,7 @@ struct fd3_compile_context {
* up the vector operation
*/
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
};
@@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
/* Get internal-temp src/dst to use for a sequence of instructions
* generated by a single TGSI op.
*/
-static void
+static struct tgsi_src_register *
get_internal_temp(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst,
- struct tgsi_src_register *tmp_src)
+ struct tgsi_dst_register *tmp_dst)
{
+ struct tgsi_src_register *tmp_src;
int n;
tmp_dst->File = TGSI_FILE_TEMPORARY;
@@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx,
/* assign next temporary: */
n = ctx->num_internal_temps++;
+ compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
+ tmp_src = &ctx->internal_temps[n];
tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
src_from_dst(tmp_src, tmp_dst);
+
+ return tmp_src;
}
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
* replicate their results)
*/
-static void
+static struct tgsi_src_register *
get_internal_temp_repl(struct fd3_compile_context *ctx,
- struct tgsi_dst_register *tmp_dst,
- struct tgsi_src_register *tmp_src)
+ struct tgsi_dst_register *tmp_dst)
{
- get_internal_temp(ctx, tmp_dst, tmp_src);
+ struct tgsi_src_register *tmp_src =
+ get_internal_temp(ctx, tmp_dst);
tmp_src->SwizzleX = tmp_src->SwizzleY =
tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
+ return tmp_src;
}
static inline bool
@@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src)
(src->File == TGSI_FILE_IMMEDIATE);
}
+/* for instructions that cannot take a const register as src, if needed
+ * generate a move to temporary gpr:
+ */
+static struct tgsi_src_register *
+get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
+{
+ if (is_const(src)) {
+ static struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src =
+ get_internal_temp(ctx, &tmp_dst);
+ create_mov(ctx, &tmp_dst, src);
+ src = tmp_src;
+ }
+ return src;
+}
+
static void
get_immediate(struct fd3_compile_context *ctx,
struct tgsi_src_register *reg, uint32_t val)
@@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx,
reg->SwizzleW = swiz2tgsi[swiz];
}
-/* for instructions that cannot take a const register as src, if needed
- * generate a move to temporary gpr:
- */
-static struct tgsi_src_register *
-get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
- struct tgsi_src_register *tmp_src)
+static type_t
+get_ftype(struct fd3_compile_context *ctx)
{
- static struct tgsi_dst_register tmp_dst;
- if ((src->File == TGSI_FILE_CONSTANT) ||
- (src->File == TGSI_FILE_IMMEDIATE)) {
- get_internal_temp(ctx, &tmp_dst, tmp_src);
- create_mov(ctx, &tmp_dst, src);
- src = tmp_src;
- }
- return src;
+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
}
static type_t
-get_type(struct fd3_compile_context *ctx)
+get_utype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
}
static unsigned
@@ -436,7 +447,7 @@ static void
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
struct tgsi_src_register *src)
{
- type_t type_mov = get_type(ctx);
+ type_t type_mov = get_ftype(ctx);
unsigned i;
for (i = 0; i < 4; i++) {
@@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_src_register *src = &inst->Src[i].Register;
if ((src->File == dst->File) && (src->Index == dst->Index)) {
- get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
+ ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
ctx->tmp_dst.WriteMask = dst->WriteMask;
dst = &ctx->tmp_dst;
break;
@@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
{
/* if necessary, add mov back into original dst: */
if (dst != &inst->Dst[0].Register) {
- create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
+ create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
}
}
@@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
@@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t,
unsigned n = t->arg; /* number of components */
unsigned i;
- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
/* Blob compiler never seems to use a const in src1 position for
* mad.*, although there does seem (according to disassembler
@@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t,
* because after that point we no longer need tmp.x:
*/
create_mov(ctx, &tmp_dst, src1);
- src1 = &tmp_src;
+ src1 = tmp_src;
}
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
@@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t,
add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, swiz0[i]);
add_src_reg(ctx, instr, src1, swiz1[i]);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
}
/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
@@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src1, swiz1[i]);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
n++;
}
@@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t,
ir3_instr_create(ctx->ir, 0, OPC_NOP);
}
- create_mov(ctx, dst, &tmp_src);
+ create_mov(ctx, dst, tmp_src);
}
/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
@@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst1, tmp_dst2;
- struct tgsi_src_register tmp_src1, tmp_src2;
+ struct tgsi_src_register *tmp_src1, *tmp_src2;
struct tgsi_src_register tmp_const;
- get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
- get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+ tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
+ tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
get_immediate(ctx, &tmp_const, fui(1.0));
@@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t,
/* tmp2 = tmp2 * c */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
vectorize(ctx, instr, &tmp_dst2, 2,
- &tmp_src2, 0,
+ tmp_src2, 0,
&inst->Src[2].Register, 0);
/* dst = tmp1 + tmp2 */
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
vectorize(ctx, instr, &inst->Dst[0].Register, 2,
- &tmp_src1, 0,
- &tmp_src2, 0);
+ tmp_src1, 0,
+ tmp_src2, 0);
}
/* FRC(x) = x - FLOOR(x) */
@@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
/* tmp = FLOOR(x) */
instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
@@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
vectorize(ctx, instr, &inst->Dst[0].Register, 2,
&inst->Src[0].Register, 0,
- &tmp_src, IR3_REG_NEGATE);
+ tmp_src, IR3_REG_NEGATE);
}
/* POW(a,b) = EXP2(b * LOG2(a)) */
@@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t,
struct ir3_instruction *instr;
struct ir3_register *r;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
- get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
/* log2 Rtmp, Rsrc0 */
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
@@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t,
/* mul.f Rtmp, Rtmp, Rsrc1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
add_src_reg(ctx, instr, src1, src1->SwizzleX);
/* blob compiler seems to ensure there are at least 6 instructions
@@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t,
/* exp2 Rdst, Rtmp */
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0);
+ add_src_reg(ctx, instr, tmp_src, 0);
regmask_set(ctx->needs_ss, r);
- create_mov(ctx, dst, &tmp_src);
+ create_mov(ctx, dst, tmp_src);
}
/* texture fetch/sample instructions: */
@@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t,
{
struct ir3_register *r;
struct ir3_instruction *instr;
- struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
struct tgsi_src_register *coord = &inst->Src[0].Register;
struct tgsi_src_register *samp = &inst->Src[1].Register;
unsigned tex = inst->Texture.Texture;
@@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t,
*/
for (i = 1; (i < 4) && (order[i] >= 0); i++) {
if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
- type_t type_mov = get_type(ctx);
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+
+ type_t type_mov = get_ftype(ctx);
/* need to move things around: */
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
for (j = 0; (j < 4) && (order[j] >= 0); j++) {
instr = ir3_instr_create(ctx->ir, 1, 0);
@@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t,
src_swiz(coord, order[j]));
}
- coord = &tmp_src;
+ coord = tmp_src;
if (j < 4)
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
@@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t,
}
instr = ir3_instr_create(ctx->ir, 5, t->opc);
- instr->cat5.type = get_type(ctx);
+ instr->cat5.type = get_ftype(ctx);
instr->cat5.samp = samp->Index;
instr->cat5.tex = samp->Index;
instr->flags |= flags;
@@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t,
{
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register tmp_src;
+ struct tgsi_src_register *tmp_src;
struct tgsi_src_register constval;
/* final instruction uses original src1 and src2, so we need get_dst() */
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- get_internal_temp(ctx, &tmp_dst, &tmp_src);
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
/* cmps.f.ge tmp, src0, 0.0 */
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
@@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
instr->repeat = 3;
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
/* sel.{f32,f16} dst, src2, tmp, src1 */
@@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t,
OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&inst->Src[2].Register, 0,
- &tmp_src, 0,
+ tmp_src, 0,
&inst->Src[1].Register, 0);
put_dst(ctx, inst, dst);
@@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t,
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
struct tgsi_src_register *src1;
- struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
/* Blob compiler never seems to use a const in src1 position..
@@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t,
* const. Not sure if this is a hw bug, or simply that the
* disassembler lies.
*/
- src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
+ src1 = get_unconst(ctx, &inst->Src[1].Register);
instr = ir3_instr_create(ctx->ir, 3,
ctx->so->half_precision ? t->hopc : t->opc);
@@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t,
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
struct tgsi_src_register *src;
- struct tgsi_src_register tmp_src;
struct ir3_instruction *instr;
/* seems like blob compiler avoids const as src.. */
- src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
+ src = get_unconst(ctx, &inst->Src[0].Register);
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
instr = ir3_instr_create(ctx->ir, 4, t->opc);
--
1.8.4.2

View File

@ -0,0 +1,231 @@
From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 24 Aug 2013 13:02:53 -0400
Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc
The cmps.f.* instruction doesn't actually seem to give a float 1.0 or
0.0 output. It either needs a cov.u16f16 or add.s + sel.f16. This
makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp().
This fixes a bunch of piglit tests.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++----
1 file changed, 125 insertions(+), 29 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index b5cdda8..477053b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t,
regmask_set(ctx->needs_sy, r);
}
-/* CMP(a,b,c) = (a < 0) ? b : c */
+/*
+ * SEQ(a,b) = (a == b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SNE(a,b) = (a != b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SGE(a,b) = (a >= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * cov.u16f16 dst, tmp0
+ *
+ * SLE(a,b) = (a <= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SGT(a,b) = (a > b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SLT(a,b) = (a < b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * CMP(a,b,c) = (a < 0.0) ? b : c
+ * cmps.f.ge tmp0, a, {0.0}
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, c, tmp0, b
+ */
static void
trans_cmp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
@@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t,
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval;
- /* final instruction uses original src1 and src2, so we need get_dst() */
+ struct tgsi_src_register constval0, constval1;
+ /* final instruction for CMP() uses orig src1 and src2: */
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *a0, *a1;
+ unsigned condition;
tmp_src = get_internal_temp(ctx, &tmp_dst);
- /* cmps.f.ge tmp, src0, 0.0 */
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_EQ;
+ break;
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLT:
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &inst->Src[1].Register; /* b */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGT:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_CMP:
+ get_immediate(ctx, &constval0, fui(0.0));
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &constval0; /* {0.0} */
+ condition = IR3_COND_GE;
+ break;
+ default:
+ compile_assert(ctx, 0);
+ return;
+ }
+
+ /* NOTE: seems blob compiler will move a const to a gpr if both
+ * src args to cmps.f are const. Need to check if this applies
+ * to other instructions..
+ */
+ if (is_const(a0) && is_const(a1))
+ a0 = get_unconst(ctx, a0);
+
+ /* cmps.f.ge tmp, a0, a1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_GE;
- get_immediate(ctx, &constval, fui(0.0));
- vectorize(ctx, instr, &tmp_dst, 2,
- &inst->Src[0].Register, 0,
- &constval, 0);
+ instr->cat2.condition = condition;
+ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
- /* add.s tmp, tmp, -1 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
- instr->repeat = 3;
- add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLE:
+ /* cov.u16f16 dst, tmp0 */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_utype(ctx);
+ instr->cat1.dst_type = get_ftype(ctx);
+ vectorize(ctx, instr, dst, 1, tmp_src, 0);
+ break;
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_CMP:
+ /* add.s tmp, tmp, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr->repeat = 3;
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+ if (t->tgsi_opc == TGSI_OPCODE_CMP) {
+ /* sel.{f32,f16} dst, src2, tmp, src1 */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &inst->Src[2].Register, 0,
+ tmp_src, 0,
+ &inst->Src[1].Register, 0);
+ } else {
+ get_immediate(ctx, &constval0, fui(0.0));
+ get_immediate(ctx, &constval1, fui(1.0));
+ /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &constval0, 0, tmp_src, 0, &constval1, 0);
+ }
- /* sel.{f32,f16} dst, src2, tmp, src1 */
- instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
- OPC_SEL_F16 : OPC_SEL_F32);
- vectorize(ctx, instr, dst, 3,
- &inst->Src[2].Register, 0,
- tmp_src, 0,
- &inst->Src[1].Register, 0);
+ break;
+ }
put_dst(ctx, inst, dst);
}
@@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
- add_src_reg(ctx, instr, &constval, constval.SwizzleX);
add_src_reg(ctx, instr, src, src->SwizzleX);
+ add_src_reg(ctx, instr, &constval, constval.SwizzleX);
instr->cat2.condition = IR3_COND_EQ;
instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
@@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, t->opc);
switch (t->tgsi_opc) {
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- instr->cat2.condition = t->arg;
- break;
case TGSI_OPCODE_ABS:
src0_flags = IR3_REG_ABS;
break;
@@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
- INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
- INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
INSTR(LRP, trans_lrp),
INSTR(FRC, trans_frac),
INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
+ INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F),
INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
INSTR(POW, trans_pow),
@@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(SIN, instr_cat4, .opc = OPC_COS),
INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
+ INSTR(SGT, trans_cmp),
+ INSTR(SLT, trans_cmp),
+ INSTR(SGE, trans_cmp),
+ INSTR(SLE, trans_cmp),
+ INSTR(SNE, trans_cmp),
+ INSTR(SEQ, trans_cmp),
INSTR(CMP, trans_cmp),
INSTR(IF, trans_if),
INSTR(ELSE, trans_else),
--
1.8.4.2

View File

@ -0,0 +1,36 @@
From 0b2c5119cb772751edb3c42c9c0545443e26fd7f Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Mon, 17 Jun 2013 20:11:54 -0400
Subject: [PATCH 14/17] freedreno/a3xx: don't leak so much
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_context.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 3ae9b29..589aeed 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -40,7 +40,18 @@
static void
fd3_context_destroy(struct pipe_context *pctx)
{
+ struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
+
fd3_prog_fini(pctx);
+
+ fd_bo_del(fd3_ctx->vs_pvt_mem);
+ fd_bo_del(fd3_ctx->fs_pvt_mem);
+ fd_bo_del(fd3_ctx->vsc_size_mem);
+ fd_bo_del(fd3_ctx->vsc_pipe_mem);
+
+ pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
+ pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
+
fd_context_destroy(pctx);
}
--
1.8.4.2

View File

@ -0,0 +1,376 @@
From f1998c8aa7d82006f9ef7e6710a0f68f30bfc109 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 24 Aug 2013 17:30:50 -0400
Subject: [PATCH 15/17] freedreno/a3xx/compiler: better const handling
Seems like most/all instructions have some restrictions about const src
registers. In seems like the 2 src (cat2) instructions can take at most
one const, and the 3 src (cat3) instructions can take at most one const
in the first 2 arguments. And so on. Handle this properly now.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 211 +++++++++++++---------
1 file changed, 121 insertions(+), 90 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 477053b..dcdd2d9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -355,20 +355,47 @@ is_const(struct tgsi_src_register *src)
(src->File == TGSI_FILE_IMMEDIATE);
}
+static type_t
+get_ftype(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+}
+
+static type_t
+get_utype(struct fd3_compile_context *ctx)
+{
+ return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+}
+
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+ switch (chan) {
+ case 0: return src->SwizzleX;
+ case 1: return src->SwizzleY;
+ case 2: return src->SwizzleZ;
+ case 3: return src->SwizzleW;
+ }
+ assert(0);
+ return 0;
+}
+
/* for instructions that cannot take a const register as src, if needed
* generate a move to temporary gpr:
*/
static struct tgsi_src_register *
get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
{
- if (is_const(src)) {
- static struct tgsi_dst_register tmp_dst;
- struct tgsi_src_register *tmp_src =
- get_internal_temp(ctx, &tmp_dst);
- create_mov(ctx, &tmp_dst, src);
- src = tmp_src;
- }
- return src;
+ struct tgsi_dst_register tmp_dst;
+ struct tgsi_src_register *tmp_src;
+
+ compile_assert(ctx, is_const(src));
+
+ tmp_src = get_internal_temp(ctx, &tmp_dst);
+
+ create_mov(ctx, &tmp_dst, src);
+
+ return tmp_src;
}
static void
@@ -418,31 +445,6 @@ get_immediate(struct fd3_compile_context *ctx,
reg->SwizzleW = swiz2tgsi[swiz];
}
-static type_t
-get_ftype(struct fd3_compile_context *ctx)
-{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
-}
-
-static type_t
-get_utype(struct fd3_compile_context *ctx)
-{
- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
-}
-
-static unsigned
-src_swiz(struct tgsi_src_register *src, int chan)
-{
- switch (chan) {
- case 0: return src->SwizzleX;
- case 1: return src->SwizzleY;
- case 2: return src->SwizzleZ;
- case 3: return src->SwizzleW;
- }
- assert(0);
- return 0;
-}
-
static void
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
struct tgsi_src_register *src)
@@ -463,7 +465,6 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
ir3_instr_create(ctx->ir, 0, OPC_NOP);
}
}
-
}
static void
@@ -584,6 +585,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
* native instructions:
*/
+static inline void
+get_swiz(unsigned *swiz, struct tgsi_src_register *src)
+{
+ swiz[0] = src->SwizzleX;
+ swiz[1] = src->SwizzleY;
+ swiz[2] = src->SwizzleZ;
+ swiz[3] = src->SwizzleW;
+}
+
static void
trans_dotp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
@@ -595,34 +605,31 @@ trans_dotp(const struct instr_translater *t,
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
struct tgsi_src_register *src0 = &inst->Src[0].Register;
struct tgsi_src_register *src1 = &inst->Src[1].Register;
- unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
- unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
+ unsigned swiz0[4];
+ unsigned swiz1[4];
opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
unsigned n = t->arg; /* number of components */
- unsigned i;
+ unsigned i, swapped = 0;
tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
- /* Blob compiler never seems to use a const in src1 position for
- * mad.*, although there does seem (according to disassembler
- * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
- * is a const. Not sure if this is a hw bug, or simply that the
- * disassembler lies.
+ /* in particular, can't handle const for src1 for cat3/mad:
*/
if (is_const(src1)) {
-
- /* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
- */
- for (i = 0; i < 4; i++)
- swiz1[i] = i;
-
- /* the first mul.f will clobber tmp.x, but that is ok
- * because after that point we no longer need tmp.x:
- */
- create_mov(ctx, &tmp_dst, src1);
- src1 = tmp_src;
+ if (!is_const(src0)) {
+ struct tgsi_src_register *tmp;
+ tmp = src0;
+ src0 = src1;
+ src1 = tmp;
+ swapped = 1;
+ } else {
+ src0 = get_unconst(ctx, src0);
+ }
}
+ get_swiz(swiz0, src0);
+ get_swiz(swiz1, src1);
+
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, swiz0[0]);
@@ -640,22 +647,20 @@ trans_dotp(const struct instr_translater *t,
/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
if (t->tgsi_opc == TGSI_OPCODE_DPH) {
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, src1, swiz1[i]);
+ if (swapped)
+ add_src_reg(ctx, instr, src0, swiz0[i]);
+ else
+ add_src_reg(ctx, instr, src1, swiz1[i]);
add_src_reg(ctx, instr, tmp_src, 0);
n++;
}
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
-
- /* pad out to multiple of 4 scalar instructions: */
- for (i = 2 * n; i % 4; i++) {
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
- }
+ ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
create_mov(ctx, dst, tmp_src);
}
@@ -670,6 +675,11 @@ trans_lrp(const struct instr_translater *t,
struct tgsi_dst_register tmp_dst1, tmp_dst2;
struct tgsi_src_register *tmp_src1, *tmp_src2;
struct tgsi_src_register tmp_const;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
+
+ if (is_const(src0) && is_const(src1))
+ src0 = get_unconst(ctx, src0);
tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
@@ -678,15 +688,12 @@ trans_lrp(const struct instr_translater *t,
/* tmp1 = (a * b) */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
- vectorize(ctx, instr, &tmp_dst1, 2,
- &inst->Src[0].Register, 0,
- &inst->Src[1].Register, 0);
+ vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
/* tmp2 = (1 - a) */
instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
- vectorize(ctx, instr, &tmp_dst2, 2,
- &tmp_const, 0,
- &inst->Src[0].Register, IR3_REG_NEGATE);
+ vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
+ src0, IR3_REG_NEGATE);
/* tmp2 = tmp2 * c */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
@@ -930,10 +937,6 @@ trans_cmp(const struct instr_translater *t,
return;
}
- /* NOTE: seems blob compiler will move a const to a gpr if both
- * src args to cmps.f are const. Need to check if this applies
- * to other instructions..
- */
if (is_const(a0) && is_const(a1))
a0 = get_unconst(ctx, a0);
@@ -1041,6 +1044,9 @@ trans_if(const struct instr_translater *t,
get_immediate(ctx, &constval, fui(0.0));
+ if (is_const(src))
+ src = get_unconst(ctx, src);
+
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
add_src_reg(ctx, instr, src, src->SwizzleX);
@@ -1122,11 +1128,11 @@ instr_cat2(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
struct ir3_instruction *instr;
unsigned src0_flags = 0;
- instr = ir3_instr_create(ctx->ir, 2, t->opc);
-
switch (t->tgsi_opc) {
case TGSI_OPCODE_ABS:
src0_flags = IR3_REG_ABS;
@@ -1149,41 +1155,65 @@ instr_cat2(const struct instr_translater *t,
case OPC_SETRM:
case OPC_CBITS_B:
/* these only have one src reg */
- vectorize(ctx, instr, dst, 1,
- &inst->Src[0].Register, src0_flags);
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ vectorize(ctx, instr, dst, 1, src0, src0_flags);
break;
default:
- vectorize(ctx, instr, dst, 2,
- &inst->Src[0].Register, src0_flags,
- &inst->Src[1].Register, 0);
+ if (is_const(src0) && is_const(src1))
+ src0 = get_unconst(ctx, src0);
+
+ instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
break;
}
put_dst(ctx, inst, dst);
}
+static bool is_mad(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_U16:
+ case OPC_MADSH_U16:
+ case OPC_MAD_S16:
+ case OPC_MADSH_M16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void
instr_cat3(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src1;
+ struct tgsi_src_register *src0 = &inst->Src[0].Register;
+ struct tgsi_src_register *src1 = &inst->Src[1].Register;
struct ir3_instruction *instr;
- /* Blob compiler never seems to use a const in src1 position..
- * although there does seem (according to disassembler hidden
- * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
- * const. Not sure if this is a hw bug, or simply that the
- * disassembler lies.
+ /* in particular, can't handle const for src1 for cat3..
+ * for mad, we can swap first two src's if needed:
*/
- src1 = get_unconst(ctx, &inst->Src[1].Register);
+ if (is_const(src1)) {
+ if (is_mad(t->opc) && !is_const(src0)) {
+ struct tgsi_src_register *tmp;
+ tmp = src0;
+ src0 = src1;
+ src1 = tmp;
+ } else {
+ src0 = get_unconst(ctx, src0);
+ }
+ }
instr = ir3_instr_create(ctx->ir, 3,
ctx->so->half_precision ? t->hopc : t->opc);
- vectorize(ctx, instr, dst, 3,
- &inst->Src[0].Register, 0,
- src1, 0,
+ vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
}
@@ -1194,11 +1224,12 @@ instr_cat4(const struct instr_translater *t,
struct tgsi_full_instruction *inst)
{
struct tgsi_dst_register *dst = get_dst(ctx, inst);
- struct tgsi_src_register *src;
+ struct tgsi_src_register *src = &inst->Src[0].Register;
struct ir3_instruction *instr;
/* seems like blob compiler avoids const as src.. */
- src = get_unconst(ctx, &inst->Src[0].Register);
+ if (is_const(src))
+ src = get_unconst(ctx, src);
ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
instr = ir3_instr_create(ctx->ir, 4, t->opc);
--
1.8.4.2

View File

@ -0,0 +1,128 @@
From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 27 Aug 2013 19:24:53 -0400
Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better
We need to set the flag on all the .xyzw components that are written by
the instruction, not just on .x. Otherwise a later use of rN.y (for
example) will not trigger the appropriate sync bit to be set.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++--------
1 file changed, 34 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index dcdd2d9..5115411 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg)
return num;
}
-static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+static void regmask_set(regmask_t regmask, struct ir3_register *reg,
+ unsigned wrmask)
{
- unsigned idx = regmask_idx(reg);
- regmask[idx / 8] |= 1 << (idx % 8);
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (wrmask & (1 << i)) {
+ unsigned idx = regmask_idx(reg) + i;
+ regmask[idx / 8] |= 1 << (idx % 8);
+ }
+ }
}
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
@@ -216,6 +222,24 @@ struct instr_translater {
unsigned arg;
};
+static unsigned
+src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
+{
+ unsigned flags = 0;
+
+ if (regmask_get(ctx->needs_ss, reg)) {
+ flags |= IR3_INSTR_SS;
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ }
+
+ if (regmask_get(ctx->needs_sy, reg)) {
+ flags |= IR3_INSTR_SY;
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ }
+
+ return flags;
+}
+
static struct ir3_register *
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
const struct tgsi_dst_register *dst, unsigned chan)
@@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
reg = ir3_reg_create(instr, regid(num, chan), flags);
- if (regmask_get(ctx->needs_ss, reg)) {
- instr->flags |= IR3_INSTR_SS;
- memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
- }
-
- if (regmask_get(ctx->needs_sy, reg)) {
- instr->flags |= IR3_INSTR_SY;
- memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
- }
+ instr->flags |= src_flags(ctx, reg);
return reg;
}
@@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
cur->regs[j+1]->num =
regid(cur->regs[j+1]->num >> 2,
src_swiz(src, i));
+ cur->flags |= src_flags(ctx, cur->regs[j+1]);
}
va_end(ap);
}
@@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, src0->SwizzleX);
- regmask_set(ctx->needs_ss, r);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
/* mul.f Rtmp, Rtmp, Rsrc1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
@@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, tmp_src, 0);
- regmask_set(ctx->needs_ss, r);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
create_mov(ctx, dst, tmp_src);
}
@@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t,
add_src_reg(ctx, instr, coord, coord->SwizzleX);
- regmask_set(ctx->needs_sy, r);
+ regmask_set(ctx->needs_sy, r, r->wrmask);
}
/*
@@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t,
vectorize(ctx, instr, dst, 1, src, 0);
- regmask_set(ctx->needs_ss, instr->regs[0]);
+ regmask_set(ctx->needs_ss, instr->regs[0],
+ inst->Dst[0].Register.WriteMask);
put_dst(ctx, inst, dst);
}
--
1.8.4.2

View File

@ -0,0 +1,328 @@
From 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Thu, 29 Aug 2013 17:24:33 -0400
Subject: [PATCH 17/17] freedreno: updates for msm drm/kms driver
There where some small API tweaks in libdrm_freedreno to enable support
for msm drm/kms driver.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 4 +--
src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 6 ++---
src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 14 +++++------
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 +++---
src/gallium/drivers/freedreno/a3xx/fd3_program.c | 4 +--
src/gallium/drivers/freedreno/freedreno_draw.c | 2 +-
src/gallium/drivers/freedreno/freedreno_resource.c | 18 ++++++++++++--
src/gallium/drivers/freedreno/freedreno_util.h | 29 +++++++++++++++-------
8 files changed, 55 insertions(+), 30 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
index b03390e..35511ba 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
@@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
OUT_RING(ring, sampler->tex0 | view->tex0);
- OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
+ OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0);
OUT_RING(ring, view->tex2);
OUT_RING(ring, sampler->tex3 | view->tex3);
OUT_RING(ring, sampler->tex4);
@@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
for (i = 0; i < n; i++) {
struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
OUT_RING (ring, vbufs[i].size);
}
}
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
index 93695bc..89f5a4d 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
@@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base,
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
- OUT_RELOC(ring, rsc->bo, 0, 0); /* RB_COPY_DEST_BASE */
+ OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */
OUT_RING(ring, rsc->pitch >> 5); /* RB_COPY_DEST_PITCH */
OUT_RING(ring, /* RB_COPY_DEST_INFO */
A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
@@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
A2XX_SQ_TEX_0_PITCH(rsc->pitch));
OUT_RELOC(ring, rsc->bo, 0,
- fd2_pipe2surface(psurf->format) | 0x800);
+ fd2_pipe2surface(psurf->format) | 0x800, 0);
OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
OUT_RING(ring, 0x01000000 | // XXX
@@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
y0 = ((float)yoff) / ((float)pfb->height);
y1 = ((float)yoff + bin_h) / ((float)pfb->height);
OUT_PKT3(ring, CP_MEM_WRITE, 9);
- OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0);
+ OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
OUT_RING(ring, fui(x0));
OUT_RING(ring, fui(y0));
OUT_RING(ring, fui(x1));
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 5ffd561..5e58618 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
OUT_RELOC(ring, bo, offset,
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
} else {
OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
@@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring,
for (i = 0; i < tex->num_textures; i++) {
struct fd3_pipe_sampler_view *view =
fd3_pipe_sampler_view(tex->textures[i]);
- OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
+ OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0);
/* I think each entry is a ptr to mipmap level.. for now, just
* pad w/ null's until I get around to actually implementing
* mipmap support..
@@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
CP_LOAD_STATE_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RELOC(ring, rsc->bo, 0, 0);
+ OUT_RELOC(ring, rsc->bo, 0, 0, 0);
}
void
@@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
@@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */
+ OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */
OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */
- OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */
+ OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
@@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
+ OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index b9d0580..8d2df47 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
if (bin_w || (i >= nr_bufs)) {
OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
} else {
- OUT_RELOCS(ring, res->bo, 0, 0, -1);
+ OUT_RELOCW(ring, res->bo, 0, 0, -1);
}
OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
@@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring,
OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
A3XX_RB_COPY_CONTROL_MODE(mode) |
A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
- OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */
+ OUT_RELOCW(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp));
OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
@@ -272,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
y1 = ((float)yoff + bin_h) / ((float)pfb->height);
OUT_PKT3(ring, CP_MEM_WRITE, 5);
- OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
+ OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
OUT_RING(ring, fui(x0));
OUT_RING(ring, fui(y0));
OUT_RING(ring, fui(x1));
@@ -395,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx)
A3XX_VSC_PIPE_CONFIG_Y(0) |
A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) |
A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y));
- OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */
+ OUT_RELOC(ring, bo, 0, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */
OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */
for (i = 1; i < 8; i++) {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 259c2dd..c6c51b1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
- OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */
+ OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
#endif
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
@@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
- OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */
+ OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
#endif
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index d4f8d34..4a98ab4 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
src_sel, idx_type, IGNORE_VISIBILITY));
OUT_RING(ring, info->count); /* NumIndices */
if (info->indexed) {
- OUT_RELOC(ring, idx_bo, idx_offset, 0);
+ OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
OUT_RING (ring, idx_size);
}
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 1b1eaa5..3e051ea 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
struct pipe_transfer *ptrans)
{
struct fd_context *ctx = fd_context(pctx);
+ struct fd_resource *rsc = fd_resource(ptrans->resource);
+ if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ fd_bo_cpu_fini(rsc->bo);
pipe_resource_reference(&ptrans->resource, NULL);
util_slab_free(&ctx->transfer_pool, ptrans);
}
@@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx,
struct fd_resource *rsc = fd_resource(prsc);
struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool);
enum pipe_format format = prsc->format;
+ uint32_t op = 0;
char *buf;
if (!ptrans)
return NULL;
- /* util_slap_alloc() doesn't zero: */
+ /* util_slab_alloc() doesn't zero: */
memset(ptrans, 0, sizeof(*ptrans));
pipe_resource_reference(&ptrans->resource, prsc);
@@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
ptrans->layer_stride = ptrans->stride;
/* some state trackers (at least XA) don't do this.. */
- fd_resource_transfer_flush_region(pctx, ptrans, box);
+ if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
+ fd_resource_transfer_flush_region(pctx, ptrans, box);
buf = fd_bo_map(rsc->bo);
if (!buf) {
@@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
return NULL;
}
+ if (usage & PIPE_TRANSFER_READ)
+ op |= DRM_FREEDRENO_PREP_READ;
+
+ if (usage & PIPE_TRANSFER_WRITE)
+ op |= DRM_FREEDRENO_PREP_WRITE;
+
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+ fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
+
*pptrans = ptrans;
return buf +
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index 9f10686..7bbbe80 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -104,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
- uint32_t offset, uint32_t or)
+ uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RELOC %04x: %p+%u", ring,
- (uint32_t)(ring->cur - ring->last_start), bo, offset);
+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
+ (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
- fd_ringbuffer_emit_reloc(ring, bo, offset, or);
+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = FD_RELOC_READ,
+ .offset = offset,
+ .or = or,
+ .shift = shift,
+ });
}
-/* shifted reloc: */
static inline void
-OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo,
+OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint32_t or, int32_t shift)
{
if (LOG_DWORDS) {
- DBG("ring[%p]: OUT_RELOCS %04x: %p+%u << %d", ring,
+ DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
}
- fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift);
+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = FD_RELOC_READ | FD_RELOC_WRITE,
+ .offset = offset,
+ .or = or,
+ .shift = shift,
+ });
}
static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
@@ -155,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
struct fd_ringmarker *end)
{
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
- fd_ringbuffer_emit_reloc_ring(ring, start);
+ fd_ringbuffer_emit_reloc_ring(ring, start, end);
OUT_RING(ring, fd_ringmarker_dwords(start, end));
}
--
1.8.4.2

View File

@ -48,12 +48,12 @@
%define _default_patch_fuzz 2
%define gitdate 20131113
%define gitdate 20131114
#% define snapshot
Summary: Mesa graphics libraries
Name: mesa
Version: 9.2.2
Version: 9.2.3
Release: 1.%{gitdate}%{?dist}
License: MIT
Group: System Environment/Libraries
@ -77,6 +77,25 @@ Patch15: mesa-9.2-hardware-float.patch
Patch16: mesa-9.2-no-useless-vdpau.patch
Patch20: mesa-9.2-evergreen-big-endian.patch
# https://bugs.freedesktop.org/show_bug.cgi?id=71573
Patch21: 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch
Patch22: 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
Patch23: 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
Patch24: 0004-freedreno-update-register-headers.patch
Patch25: 0005-freedreno-a3xx-some-texture-fixes.patch
Patch26: 0006-freedreno-a3xx-compiler-fix-CMP.patch
Patch27: 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
Patch28: 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch
Patch29: 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
Patch30: 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
Patch31: 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
Patch32: 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
Patch33: 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
Patch34: 0014-freedreno-a3xx-don-t-leak-so-much.patch
Patch35: 0015-freedreno-a3xx-compiler-better-const-handling.patch
Patch36: 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
Patch37: 0017-freedreno-updates-for-msm-drm-kms-driver.patch
BuildRequires: pkgconfig autoconf automake libtool
%if %{with_hardware}
BuildRequires: kernel-headers
@ -302,6 +321,24 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1
%patch16 -p1 -b .vdpau
%patch20 -p1 -b .egbe
%patch21 -p1
%patch22 -p1
%patch23 -p1
%patch24 -p1
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch29 -p1
%patch30 -p1
%patch31 -p1
%patch32 -p1
%patch33 -p1
%patch34 -p1
%patch35 -p1
%patch36 -p1
%patch37 -p1
%if 0%{with_private_llvm}
sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac
sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac
@ -600,6 +637,9 @@ rm -rf $RPM_BUILD_ROOT
%endif
%changelog
* Thu Nov 14 2013 Igor Gnatenko <i.gnatenko.brain@gmail.com> - 9.2.3-1.20131114
- 9.2.3 upstream release
* Wed Nov 13 2013 Igor Gnatenko <i.gnatenko.brain@gmail.com> - 9.2.2-1.20131113
- 9.2.2 upstream release + fixes from git 9.2 branch

View File

@ -1 +1 @@
eafb41bd8c3160972e192b1d502ad8b6 mesa-20131113.tar.xz
54f46fc070303e0d467779ab39103d58 mesa-20131114.tar.xz