mesa/0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
Igor Gnatenko 8002493cec 9.2.3 upstream release
Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
2013-11-14 17:11:00 +04:00

129 lines
3.9 KiB
Diff

From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 27 Aug 2013 19:24:53 -0400
Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better
We need to set the flag on all the .xyzw components that are written by
the instruction, not just on .x. Otherwise a later use of rN.y (for
example) will not trigger the appropriate sync bit to be set.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++--------
1 file changed, 34 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index dcdd2d9..5115411 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg)
return num;
}
-static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+static void regmask_set(regmask_t regmask, struct ir3_register *reg,
+ unsigned wrmask)
{
- unsigned idx = regmask_idx(reg);
- regmask[idx / 8] |= 1 << (idx % 8);
+ unsigned i;
+ for (i = 0; i < 4; i++) {
+ if (wrmask & (1 << i)) {
+ unsigned idx = regmask_idx(reg) + i;
+ regmask[idx / 8] |= 1 << (idx % 8);
+ }
+ }
}
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
@@ -216,6 +222,24 @@ struct instr_translater {
unsigned arg;
};
+static unsigned
+src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
+{
+ unsigned flags = 0;
+
+ if (regmask_get(ctx->needs_ss, reg)) {
+ flags |= IR3_INSTR_SS;
+ memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+ }
+
+ if (regmask_get(ctx->needs_sy, reg)) {
+ flags |= IR3_INSTR_SY;
+ memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+ }
+
+ return flags;
+}
+
static struct ir3_register *
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
const struct tgsi_dst_register *dst, unsigned chan)
@@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
reg = ir3_reg_create(instr, regid(num, chan), flags);
- if (regmask_get(ctx->needs_ss, reg)) {
- instr->flags |= IR3_INSTR_SS;
- memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
- }
-
- if (regmask_get(ctx->needs_sy, reg)) {
- instr->flags |= IR3_INSTR_SY;
- memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
- }
+ instr->flags |= src_flags(ctx, reg);
return reg;
}
@@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
cur->regs[j+1]->num =
regid(cur->regs[j+1]->num >> 2,
src_swiz(src, i));
+ cur->flags |= src_flags(ctx, cur->regs[j+1]);
}
va_end(ap);
}
@@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, src0, src0->SwizzleX);
- regmask_set(ctx->needs_ss, r);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
/* mul.f Rtmp, Rtmp, Rsrc1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
@@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
r = add_dst_reg(ctx, instr, &tmp_dst, 0);
add_src_reg(ctx, instr, tmp_src, 0);
- regmask_set(ctx->needs_ss, r);
+ regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
create_mov(ctx, dst, tmp_src);
}
@@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t,
add_src_reg(ctx, instr, coord, coord->SwizzleX);
- regmask_set(ctx->needs_sy, r);
+ regmask_set(ctx->needs_sy, r, r->wrmask);
}
/*
@@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t,
vectorize(ctx, instr, dst, 1, src, 0);
- regmask_set(ctx->needs_ss, instr->regs[0]);
+ regmask_set(ctx->needs_ss, instr->regs[0],
+ inst->Dst[0].Register.WriteMask);
put_dst(ctx, inst, dst);
}
--
1.8.4.2