1
0
forked from rpms/mesa
mesa/0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
Igor Gnatenko 8002493cec 9.2.3 upstream release
Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
2013-11-14 17:11:00 +04:00

232 lines
7.7 KiB
Diff

From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Sat, 24 Aug 2013 13:02:53 -0400
Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc
The cmps.f.* instruction doesn't actually seem to give a float 1.0 or
0.0 output. It either needs a cov.u16f16 or add.s + sel.f16. This
makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp().
This fixes a bunch of piglit tests.
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++----
1 file changed, 125 insertions(+), 29 deletions(-)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index b5cdda8..477053b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t,
regmask_set(ctx->needs_sy, r);
}
-/* CMP(a,b,c) = (a < 0) ? b : c */
+/*
+ * SEQ(a,b) = (a == b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SNE(a,b) = (a != b) ? 1.0 : 0.0
+ * cmps.f.eq tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SGE(a,b) = (a >= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * cov.u16f16 dst, tmp0
+ *
+ * SLE(a,b) = (a <= b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * cov.u16f16 dst, tmp0
+ *
+ * SGT(a,b) = (a > b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, b, a
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * SLT(a,b) = (a < b) ? 1.0 : 0.0
+ * cmps.f.ge tmp0, a, b
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, {0.0}, tmp0, {1.0}
+ *
+ * CMP(a,b,c) = (a < 0.0) ? b : c
+ * cmps.f.ge tmp0, a, {0.0}
+ * add.s tmp0, tmp0, -1
+ * sel.f16 dst, c, tmp0, b
+ */
static void
trans_cmp(const struct instr_translater *t,
struct fd3_compile_context *ctx,
@@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t,
struct ir3_instruction *instr;
struct tgsi_dst_register tmp_dst;
struct tgsi_src_register *tmp_src;
- struct tgsi_src_register constval;
- /* final instruction uses original src1 and src2, so we need get_dst() */
+ struct tgsi_src_register constval0, constval1;
+ /* final instruction for CMP() uses orig src1 and src2: */
struct tgsi_dst_register *dst = get_dst(ctx, inst);
+ struct tgsi_src_register *a0, *a1;
+ unsigned condition;
tmp_src = get_internal_temp(ctx, &tmp_dst);
- /* cmps.f.ge tmp, src0, 0.0 */
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SNE:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_EQ;
+ break;
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLT:
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &inst->Src[1].Register; /* b */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SGT:
+ a0 = &inst->Src[1].Register; /* b */
+ a1 = &inst->Src[0].Register; /* a */
+ condition = IR3_COND_GE;
+ break;
+ case TGSI_OPCODE_CMP:
+ get_immediate(ctx, &constval0, fui(0.0));
+ a0 = &inst->Src[0].Register; /* a */
+ a1 = &constval0; /* {0.0} */
+ condition = IR3_COND_GE;
+ break;
+ default:
+ compile_assert(ctx, 0);
+ return;
+ }
+
+ /* NOTE: seems blob compiler will move a const to a gpr if both
+ * src args to cmps.f are const. Need to check if this applies
+ * to other instructions..
+ */
+ if (is_const(a0) && is_const(a1))
+ a0 = get_unconst(ctx, a0);
+
+ /* cmps.f.ge tmp, a0, a1 */
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
- instr->cat2.condition = IR3_COND_GE;
- get_immediate(ctx, &constval, fui(0.0));
- vectorize(ctx, instr, &tmp_dst, 2,
- &inst->Src[0].Register, 0,
- &constval, 0);
+ instr->cat2.condition = condition;
+ vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
- /* add.s tmp, tmp, -1 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
- instr->repeat = 3;
- add_dst_reg(ctx, instr, &tmp_dst, 0);
- add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
- ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+ switch (t->tgsi_opc) {
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SLE:
+ /* cov.u16f16 dst, tmp0 */
+ instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr->cat1.src_type = get_utype(ctx);
+ instr->cat1.dst_type = get_ftype(ctx);
+ vectorize(ctx, instr, dst, 1, tmp_src, 0);
+ break;
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_CMP:
+ /* add.s tmp, tmp, -1 */
+ instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr->repeat = 3;
+ add_dst_reg(ctx, instr, &tmp_dst, 0);
+ add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+ if (t->tgsi_opc == TGSI_OPCODE_CMP) {
+ /* sel.{f32,f16} dst, src2, tmp, src1 */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &inst->Src[2].Register, 0,
+ tmp_src, 0,
+ &inst->Src[1].Register, 0);
+ } else {
+ get_immediate(ctx, &constval0, fui(0.0));
+ get_immediate(ctx, &constval1, fui(1.0));
+ /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
+ instr = ir3_instr_create(ctx->ir, 3,
+ ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ vectorize(ctx, instr, dst, 3,
+ &constval0, 0, tmp_src, 0, &constval1, 0);
+ }
- /* sel.{f32,f16} dst, src2, tmp, src1 */
- instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
- OPC_SEL_F16 : OPC_SEL_F32);
- vectorize(ctx, instr, dst, 3,
- &inst->Src[2].Register, 0,
- tmp_src, 0,
- &inst->Src[1].Register, 0);
+ break;
+ }
put_dst(ctx, inst, dst);
}
@@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
- add_src_reg(ctx, instr, &constval, constval.SwizzleX);
add_src_reg(ctx, instr, src, src->SwizzleX);
+ add_src_reg(ctx, instr, &constval, constval.SwizzleX);
instr->cat2.condition = IR3_COND_EQ;
instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
@@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t,
instr = ir3_instr_create(ctx->ir, 2, t->opc);
switch (t->tgsi_opc) {
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- instr->cat2.condition = t->arg;
- break;
case TGSI_OPCODE_ABS:
src0_flags = IR3_REG_ABS;
break;
@@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
- INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
- INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
INSTR(LRP, trans_lrp),
INSTR(FRC, trans_frac),
INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F),
+ INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F),
INSTR(EX2, instr_cat4, .opc = OPC_EXP2),
INSTR(LG2, instr_cat4, .opc = OPC_LOG2),
INSTR(POW, trans_pow),
@@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
INSTR(SIN, instr_cat4, .opc = OPC_COS),
INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
+ INSTR(SGT, trans_cmp),
+ INSTR(SLT, trans_cmp),
+ INSTR(SGE, trans_cmp),
+ INSTR(SLE, trans_cmp),
+ INSTR(SNE, trans_cmp),
+ INSTR(SEQ, trans_cmp),
INSTR(CMP, trans_cmp),
INSTR(IF, trans_if),
INSTR(ELSE, trans_else),
--
1.8.4.2