9.2.3 upstream release
Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
This commit is contained in:
		
							parent
							
								
									ea1c6e950a
								
							
						
					
					
						commit
						8002493cec
					
				
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -57,3 +57,4 @@ mesa-20100720.tar.bz2 | |||||||
| /mesa-20130902.tar.xz | /mesa-20130902.tar.xz | ||||||
| /mesa-20130919.tar.xz | /mesa-20130919.tar.xz | ||||||
| /mesa-20131113.tar.xz | /mesa-20131113.tar.xz | ||||||
|  | /mesa-20131114.tar.xz | ||||||
|  | |||||||
| @ -0,0 +1,31 @@ | |||||||
|  | From 2d844be97fd5b6b0f02a94d4bb194c0bd19de6f9 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 13 Jul 2013 13:07:46 -0400 | ||||||
|  | Subject: [PATCH 01/17] freedreno/a3xx: fix color inversion on mem->gmem | ||||||
|  |  restore | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 6 +++--- | ||||||
|  |  1 file changed, 3 insertions(+), 3 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | index a7a4bf7..b8436c9 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | @@ -279,9 +279,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
 | ||||||
|  |  			CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); | ||||||
|  |  	OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) | | ||||||
|  |  			0x40000000 | // XXX | ||||||
|  | -			fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
 | ||||||
|  | -					PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
 | ||||||
|  | -	OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
 | ||||||
|  | +			fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
 | ||||||
|  | +					PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
 | ||||||
|  | +	OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
 | ||||||
|  |  			A3XX_TEX_CONST_1_WIDTH(psurf->width) | | ||||||
|  |  			A3XX_TEX_CONST_1_HEIGHT(psurf->height)); | ||||||
|  |  	OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) | | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										32
									
								
								0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,32 @@ | |||||||
|  | From b2a32254d65c356604bbffda6e771dca0509e9ed Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 13 Jul 2013 13:08:22 -0400 | ||||||
|  | Subject: [PATCH 02/17] freedreno/a3xx: fix viewport on gmem->mem resolve | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 8 ++++++++ | ||||||
|  |  1 file changed, 8 insertions(+) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | index 1cb170a..9050166 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | @@ -168,6 +168,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); | ||||||
|  |  	OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */ | ||||||
|  |   | ||||||
|  | +	OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
 | ||||||
|  | +	OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
 | ||||||
|  | +
 | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); | ||||||
|  |  	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | | ||||||
|  |  			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										113
									
								
								0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | |||||||
|  | From 8b167d34bebcc9aaf67838be71cc3272728d4fe1 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Wed, 29 May 2013 10:16:33 -0400 | ||||||
|  | Subject: [PATCH 03/17] freedreno: add debug option to disable scissor | ||||||
|  |  optimization | ||||||
|  | 
 | ||||||
|  | Useful for testing and debugging. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_gmem.c   | 26 +++++++++++++++--------- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_screen.c |  1 + | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_util.h   |  9 ++++---- | ||||||
|  |  3 files changed, 22 insertions(+), 14 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | index 12633bd..197d1d9 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | @@ -71,7 +71,8 @@ calculate_tiles(struct fd_context *ctx)
 | ||||||
|  |  { | ||||||
|  |  	struct fd_gmem_stateobj *gmem = &ctx->gmem; | ||||||
|  |  	struct pipe_scissor_state *scissor = &ctx->max_scissor; | ||||||
|  | -	uint32_t cpp = util_format_get_blocksize(ctx->framebuffer.cbufs[0]->format);
 | ||||||
|  | +	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 | ||||||
|  | +	uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
 | ||||||
|  |  	uint32_t gmem_size = ctx->screen->gmemsize_bytes; | ||||||
|  |  	uint32_t minx, miny, width, height; | ||||||
|  |  	uint32_t nbins_x = 1, nbins_y = 1; | ||||||
|  | @@ -84,10 +85,17 @@ calculate_tiles(struct fd_context *ctx)
 | ||||||
|  |  		return; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | -	minx = scissor->minx & ~31; /* round down to multiple of 32 */
 | ||||||
|  | -	miny = scissor->miny & ~31;
 | ||||||
|  | -	width = scissor->maxx - minx;
 | ||||||
|  | -	height = scissor->maxy - miny;
 | ||||||
|  | +	if (fd_mesa_debug & FD_DBG_DSCIS) {
 | ||||||
|  | +		minx = 0;
 | ||||||
|  | +		miny = 0;
 | ||||||
|  | +		width = pfb->width;
 | ||||||
|  | +		height = pfb->height;
 | ||||||
|  | +	} else {
 | ||||||
|  | +		minx = scissor->minx & ~31; /* round down to multiple of 32 */
 | ||||||
|  | +		miny = scissor->miny & ~31;
 | ||||||
|  | +		width = scissor->maxx - minx;
 | ||||||
|  | +		height = scissor->maxy - miny;
 | ||||||
|  | +	}
 | ||||||
|  |   | ||||||
|  |  // TODO we probably could optimize this a bit if we know that | ||||||
|  |  // Z or stencil is not enabled for any of the draw calls.. | ||||||
|  | @@ -132,9 +140,7 @@ static void
 | ||||||
|  |  render_tiles(struct fd_context *ctx) | ||||||
|  |  { | ||||||
|  |  	struct fd_gmem_stateobj *gmem = &ctx->gmem; | ||||||
|  | -	uint32_t i, yoff = 0;
 | ||||||
|  | -
 | ||||||
|  | -	yoff= gmem->miny;
 | ||||||
|  | +	uint32_t i, yoff = gmem->miny;
 | ||||||
|  |   | ||||||
|  |  	ctx->emit_tile_init(ctx); | ||||||
|  |   | ||||||
|  | @@ -143,13 +149,13 @@ render_tiles(struct fd_context *ctx)
 | ||||||
|  |  		uint32_t bh = gmem->bin_h; | ||||||
|  |   | ||||||
|  |  		/* clip bin height: */ | ||||||
|  | -		bh = MIN2(bh, gmem->height - yoff);
 | ||||||
|  | +		bh = MIN2(bh, gmem->miny + gmem->height - yoff);
 | ||||||
|  |   | ||||||
|  |  		for (j = 0; j < gmem->nbins_x; j++) { | ||||||
|  |  			uint32_t bw = gmem->bin_w; | ||||||
|  |   | ||||||
|  |  			/* clip bin width: */ | ||||||
|  | -			bw = MIN2(bw, gmem->width - xoff);
 | ||||||
|  | +			bw = MIN2(bw, gmem->minx + gmem->width - xoff);
 | ||||||
|  |   | ||||||
|  |  			DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", | ||||||
|  |  					bh, yoff, bw, xoff); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
 | ||||||
|  | index 52d51c2..36ef8b0 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_screen.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
 | ||||||
|  | @@ -60,6 +60,7 @@ static const struct debug_named_value debug_options[] = {
 | ||||||
|  |  		{"disasm",    FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"}, | ||||||
|  |  		{"dclear",    FD_DBG_DCLEAR, "Mark all state dirty after clear"}, | ||||||
|  |  		{"dgmem",     FD_DBG_DGMEM,  "Mark all state dirty after GMEM tile pass"}, | ||||||
|  | +		{"dscis",     FD_DBG_DSCIS,  "Disable scissor optimization"},
 | ||||||
|  |  		DEBUG_NAMED_VALUE_END | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | index f18f0fe..b49cdfc 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | @@ -47,10 +47,11 @@ enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
 | ||||||
|  |  enum adreno_stencil_op fd_stencil_op(unsigned op); | ||||||
|  |   | ||||||
|  |   | ||||||
|  | -#define FD_DBG_MSGS   0x1
 | ||||||
|  | -#define FD_DBG_DISASM 0x2
 | ||||||
|  | -#define FD_DBG_DCLEAR 0x4
 | ||||||
|  | -#define FD_DBG_DGMEM  0x8
 | ||||||
|  | +#define FD_DBG_MSGS     0x01
 | ||||||
|  | +#define FD_DBG_DISASM   0x02
 | ||||||
|  | +#define FD_DBG_DCLEAR   0x04
 | ||||||
|  | +#define FD_DBG_DGMEM    0x08
 | ||||||
|  | +#define FD_DBG_DSCIS    0x10
 | ||||||
|  |  extern int fd_mesa_debug; | ||||||
|  |   | ||||||
|  |  #define DBG(fmt, ...) \ | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										1238
									
								
								0004-freedreno-update-register-headers.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1238
									
								
								0004-freedreno-update-register-headers.patch
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										65
									
								
								0005-freedreno-a3xx-some-texture-fixes.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								0005-freedreno-a3xx-some-texture-fixes.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | From 3da8868b5df98d8544091feeea7b6bb0f736324f Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Mon, 5 Aug 2013 18:03:33 -0400 | ||||||
|  | Subject: [PATCH 05/17] freedreno/a3xx: some texture fixes | ||||||
|  | 
 | ||||||
|  | Stop hard coding bits that indicate texture type (2d/3d/cube/etc). | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 25 +++++++++++++++++++++++- | ||||||
|  |  1 file changed, 24 insertions(+), 1 deletion(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
 | ||||||
|  | index ae08b8a..e56325b 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
 | ||||||
|  | @@ -87,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
 | ||||||
|  |  	so->base = *cso; | ||||||
|  |   | ||||||
|  |  	so->texsamp0 = | ||||||
|  | +			COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
 | ||||||
|  |  			A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | | ||||||
|  |  			A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | | ||||||
|  |  			A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | | ||||||
|  | @@ -97,6 +98,28 @@ fd3_sampler_state_create(struct pipe_context *pctx,
 | ||||||
|  |  	return so; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static enum a3xx_tex_type
 | ||||||
|  | +tex_type(unsigned target)
 | ||||||
|  | +{
 | ||||||
|  | +	switch (target) {
 | ||||||
|  | +	default:
 | ||||||
|  | +		assert(0);
 | ||||||
|  | +	case PIPE_BUFFER:
 | ||||||
|  | +	case PIPE_TEXTURE_1D:
 | ||||||
|  | +	case PIPE_TEXTURE_1D_ARRAY:
 | ||||||
|  | +		return A3XX_TEX_1D;
 | ||||||
|  | +	case PIPE_TEXTURE_RECT:
 | ||||||
|  | +	case PIPE_TEXTURE_2D:
 | ||||||
|  | +	case PIPE_TEXTURE_2D_ARRAY:
 | ||||||
|  | +		return A3XX_TEX_2D;
 | ||||||
|  | +	case PIPE_TEXTURE_3D:
 | ||||||
|  | +		return A3XX_TEX_3D;
 | ||||||
|  | +	case PIPE_TEXTURE_CUBE:
 | ||||||
|  | +	case PIPE_TEXTURE_CUBE_ARRAY:
 | ||||||
|  | +		return A3XX_TEX_CUBE;
 | ||||||
|  | +	}
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static struct pipe_sampler_view * | ||||||
|  |  fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, | ||||||
|  |  		const struct pipe_sampler_view *cso) | ||||||
|  | @@ -116,7 +139,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 | ||||||
|  |  	so->tex_resource =  rsc; | ||||||
|  |   | ||||||
|  |  	so->texconst0 = | ||||||
|  | -			0x40000000 | /* ??? */
 | ||||||
|  | +			A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
 | ||||||
|  |  			A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | | ||||||
|  |  			fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, | ||||||
|  |  						cso->swizzle_b, cso->swizzle_a); | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										45
									
								
								0006-freedreno-a3xx-compiler-fix-CMP.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								0006-freedreno-a3xx-compiler-fix-CMP.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,45 @@ | |||||||
|  | From 83e65320012f327d2e8f1573443b2e20f059e76f Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Tue, 20 Aug 2013 13:46:30 -0400 | ||||||
|  | Subject: [PATCH 06/17] freedreno/a3xx/compiler: fix CMP | ||||||
|  | 
 | ||||||
|  | The 1st src to add.s needs (r) flag (repeat), otherwise it will end up: | ||||||
|  | 
 | ||||||
|  |   add.s dst.xyzw, tmp.xxxx -1 | ||||||
|  | 
 | ||||||
|  | instead of: | ||||||
|  | 
 | ||||||
|  |   add.s dst.xyzw, tmp.xyzw, -1 | ||||||
|  | 
 | ||||||
|  | Also, if we are using a temporary dst to avoid clobbering one of the src | ||||||
|  | registers, we actually need to use that as the dst for the sel | ||||||
|  | instruction. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 4 ++-- | ||||||
|  |  1 file changed, 2 insertions(+), 2 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index eabe21c..07bede4 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -790,13 +790,13 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); | ||||||
|  |  	instr->repeat = 3; | ||||||
|  |  	add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  | -	add_src_reg(ctx, instr, &tmp_src, 0);
 | ||||||
|  | +	add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
 | ||||||
|  |  	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; | ||||||
|  |   | ||||||
|  |  	/* sel.{f32,f16} dst, src2, tmp, src1 */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? | ||||||
|  |  			OPC_SEL_F16 : OPC_SEL_F32); | ||||||
|  | -	vectorize(ctx, instr, &inst->Dst[0].Register, 3,
 | ||||||
|  | +	vectorize(ctx, instr, dst, 3,
 | ||||||
|  |  			&inst->Src[2].Register, 0, | ||||||
|  |  			&tmp_src, 0, | ||||||
|  |  			&inst->Src[1].Register, 0); | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										98
									
								
								0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,98 @@ | |||||||
|  | From c83387438633233ae6bcc55e1f4eaa2793ce7449 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Tue, 20 Aug 2013 13:51:35 -0400 | ||||||
|  | Subject: [PATCH 07/17] freedreno/a3xx/compiler: handle saturate on dst | ||||||
|  | 
 | ||||||
|  | Sometimes things other than color dst need saturating, like if there is | ||||||
|  | a 'clamp(foo, 0.0, 1.0)'.  So for saturated dst add the extra | ||||||
|  | instructions to fix up dst. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 49 +++++++++++++++++++++++ | ||||||
|  |  1 file changed, 49 insertions(+) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index 07bede4..e2c7853 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -131,6 +131,11 @@ struct fd3_compile_context {
 | ||||||
|  |  	struct tgsi_src_register tmp_src; | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | +
 | ||||||
|  | +static void vectorize(struct fd3_compile_context *ctx,
 | ||||||
|  | +		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
 | ||||||
|  | +		int nsrcs, ...);
 | ||||||
|  | +
 | ||||||
|  |  static unsigned | ||||||
|  |  compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, | ||||||
|  |  		const struct tgsi_token *tokens) | ||||||
|  | @@ -234,6 +239,10 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |  		flags |= IR3_REG_CONST; | ||||||
|  |  		num = src->Index + ctx->base_reg[src->File]; | ||||||
|  |  		break; | ||||||
|  | +	case TGSI_FILE_OUTPUT:
 | ||||||
|  | +		/* NOTE: we should only end up w/ OUTPUT file for things like
 | ||||||
|  | +		 * clamp()'ing saturated dst instructions
 | ||||||
|  | +		 */
 | ||||||
|  |  	case TGSI_FILE_INPUT: | ||||||
|  |  	case TGSI_FILE_TEMPORARY: | ||||||
|  |  		num = src->Index + ctx->base_reg[src->File]; | ||||||
|  | @@ -407,6 +416,35 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
 | ||||||
|  |   | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static void
 | ||||||
|  | +create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
 | ||||||
|  | +		struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
 | ||||||
|  | +{
 | ||||||
|  | +	struct ir3_instruction *instr;
 | ||||||
|  | +	struct tgsi_src_register src;
 | ||||||
|  | +
 | ||||||
|  | +	src_from_dst(&src, dst);
 | ||||||
|  | +
 | ||||||
|  | +	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
 | ||||||
|  | +	vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
 | ||||||
|  | +
 | ||||||
|  | +	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
 | ||||||
|  | +	vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static void
 | ||||||
|  | +create_clamp_imm(struct fd3_compile_context *ctx,
 | ||||||
|  | +		struct tgsi_dst_register *dst,
 | ||||||
|  | +		uint32_t minval, uint32_t maxval)
 | ||||||
|  | +{
 | ||||||
|  | +	struct tgsi_src_register minconst, maxconst;
 | ||||||
|  | +
 | ||||||
|  | +	get_immediate(ctx, &minconst, minval);
 | ||||||
|  | +	get_immediate(ctx, &maxconst, maxval);
 | ||||||
|  | +
 | ||||||
|  | +	create_clamp(ctx, dst, &minconst, &maxconst);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static struct tgsi_dst_register * | ||||||
|  |  get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  | @@ -1229,6 +1267,17 @@ compile_instructions(struct fd3_compile_context *ctx)
 | ||||||
|  |  				assert(0); | ||||||
|  |  			} | ||||||
|  |   | ||||||
|  | +			switch (inst->Instruction.Saturate) {
 | ||||||
|  | +			case TGSI_SAT_ZERO_ONE:
 | ||||||
|  | +				create_clamp_imm(ctx, &inst->Dst[0].Register,
 | ||||||
|  | +						fui(0.0), fui(1.0));
 | ||||||
|  | +				break;
 | ||||||
|  | +			case TGSI_SAT_MINUS_PLUS_ONE:
 | ||||||
|  | +				create_clamp_imm(ctx, &inst->Dst[0].Register,
 | ||||||
|  | +						fui(-1.0), fui(1.0));
 | ||||||
|  | +				break;
 | ||||||
|  | +			}
 | ||||||
|  | +
 | ||||||
|  |  			break; | ||||||
|  |  		} | ||||||
|  |  		default: | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
| @ -0,0 +1,59 @@ | |||||||
|  | From 5394a872f30022f64e6b2b58ef983b1fe5f6c08d Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Tue, 20 Aug 2013 13:54:01 -0400 | ||||||
|  | Subject: [PATCH 08/17] freedreno/a3xx/compiler: use max_reg rather than | ||||||
|  |  file_count | ||||||
|  | 
 | ||||||
|  | Our current (rather naive) register assignment is based on mapping | ||||||
|  | different register files (INPUT, OUTPUT, TEMP, CONST, etc) based on the
 | ||||||
|  | max register index of the preceding file.  But in some cases, the lowest | ||||||
|  | used register in a file might not be zero.  In which case | ||||||
|  | file_count[file] != file_max[file] + 1. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 14 +++++++------- | ||||||
|  |  1 file changed, 7 insertions(+), 7 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index e2c7853..dc5c873 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -159,19 +159,19 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 | ||||||
|  |  	/* Immediates go after constants: */ | ||||||
|  |  	ctx->base_reg[TGSI_FILE_CONSTANT]  = 0; | ||||||
|  |  	ctx->base_reg[TGSI_FILE_IMMEDIATE] = | ||||||
|  | -			ctx->info.file_count[TGSI_FILE_CONSTANT];
 | ||||||
|  | +			ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
 | ||||||
|  |   | ||||||
|  |  	/* Temporaries after outputs after inputs: */ | ||||||
|  |  	ctx->base_reg[TGSI_FILE_INPUT]     = 0; | ||||||
|  |  	ctx->base_reg[TGSI_FILE_OUTPUT]    = | ||||||
|  | -			ctx->info.file_count[TGSI_FILE_INPUT];
 | ||||||
|  | +			ctx->info.file_max[TGSI_FILE_INPUT] + 1;
 | ||||||
|  |  	ctx->base_reg[TGSI_FILE_TEMPORARY] = | ||||||
|  | -			ctx->info.file_count[TGSI_FILE_INPUT] +
 | ||||||
|  | -			ctx->info.file_count[TGSI_FILE_OUTPUT];
 | ||||||
|  | +			ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
 | ||||||
|  | +			ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
 | ||||||
|  |   | ||||||
|  |  	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; | ||||||
|  | -	ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
 | ||||||
|  | -			ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
 | ||||||
|  | +	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
 | ||||||
|  | +			ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
 | ||||||
|  |   | ||||||
|  |  	ret = tgsi_parse_init(&ctx->parser, tokens); | ||||||
|  |  	if (ret != TGSI_PARSE_OK) | ||||||
|  | @@ -309,7 +309,7 @@ get_internal_temp(struct fd3_compile_context *ctx,
 | ||||||
|  |  	/* assign next temporary: */ | ||||||
|  |  	n = ctx->num_internal_temps++; | ||||||
|  |   | ||||||
|  | -	tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
 | ||||||
|  | +	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
 | ||||||
|  |   | ||||||
|  |  	src_from_dst(tmp_src, tmp_dst); | ||||||
|  |  } | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										104
									
								
								0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,104 @@ | |||||||
|  | From f3a7e28fe47ec547c1c9b561b04af208ae2f0f04 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Tue, 20 Aug 2013 13:57:22 -0400 | ||||||
|  | Subject: [PATCH 09/17] freedreno/a3xx/compiler: cat4 cannot use const reg as | ||||||
|  |  src | ||||||
|  | 
 | ||||||
|  | Category 4 instructions (rsq, rcp, sqrt, etc) seem to be unable to take | ||||||
|  | a const register as src.  In these cases we need to move the src to a | ||||||
|  | temporary gpr first. | ||||||
|  | 
 | ||||||
|  | This is the second case of such a restriction, where the instruction | ||||||
|  | encoding appears to support a const src, but in fact the hw appears to | ||||||
|  | ignore that bit.  So split things out into a helper that can be re-used | ||||||
|  | for any instructions which have this limitation. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 37 +++++++++++++++++------ | ||||||
|  |  1 file changed, 27 insertions(+), 10 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index dc5c873..772c7d2 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -135,6 +135,8 @@ struct fd3_compile_context {
 | ||||||
|  |  static void vectorize(struct fd3_compile_context *ctx, | ||||||
|  |  		struct ir3_instruction *instr, struct tgsi_dst_register *dst, | ||||||
|  |  		int nsrcs, ...); | ||||||
|  | +static void create_mov(struct fd3_compile_context *ctx,
 | ||||||
|  | +		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
 | ||||||
|  |   | ||||||
|  |  static unsigned | ||||||
|  |  compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, | ||||||
|  | @@ -374,6 +376,23 @@ get_immediate(struct fd3_compile_context *ctx,
 | ||||||
|  |  	reg->SwizzleW  = swiz2tgsi[swiz]; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* for instructions that cannot take a const register as src, if needed
 | ||||||
|  | + * generate a move to temporary gpr:
 | ||||||
|  | + */
 | ||||||
|  | +static struct tgsi_src_register *
 | ||||||
|  | +get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
 | ||||||
|  | +		struct tgsi_src_register *tmp_src)
 | ||||||
|  | +{
 | ||||||
|  | +	static struct tgsi_dst_register tmp_dst;
 | ||||||
|  | +	if ((src->File == TGSI_FILE_CONSTANT) ||
 | ||||||
|  | +			(src->File == TGSI_FILE_IMMEDIATE)) {
 | ||||||
|  | +		get_internal_temp(ctx, &tmp_dst, tmp_src);
 | ||||||
|  | +		create_mov(ctx, &tmp_dst, src);
 | ||||||
|  | +		src = tmp_src;
 | ||||||
|  | +	}
 | ||||||
|  | +	return src;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static type_t | ||||||
|  |  get_type(struct fd3_compile_context *ctx) | ||||||
|  |  { | ||||||
|  | @@ -1027,8 +1046,7 @@ instr_cat3(const struct instr_translater *t,
 | ||||||
|  |  		struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | -	struct tgsi_src_register *src1 = &inst->Src[1].Register;
 | ||||||
|  | -	struct tgsi_dst_register tmp_dst;
 | ||||||
|  | +	struct tgsi_src_register *src1;
 | ||||||
|  |  	struct tgsi_src_register tmp_src; | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  | @@ -1038,12 +1056,7 @@ instr_cat3(const struct instr_translater *t,
 | ||||||
|  |  	 * const.  Not sure if this is a hw bug, or simply that the | ||||||
|  |  	 * disassembler lies. | ||||||
|  |  	 */ | ||||||
|  | -	if ((src1->File == TGSI_FILE_CONSTANT) ||
 | ||||||
|  | -			(src1->File == TGSI_FILE_IMMEDIATE)) {
 | ||||||
|  | -		get_internal_temp(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | -		create_mov(ctx, &tmp_dst, src1);
 | ||||||
|  | -		src1 = &tmp_src;
 | ||||||
|  | -	}
 | ||||||
|  | +	src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
 | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 3, | ||||||
|  |  			ctx->so->half_precision ? t->hopc : t->opc); | ||||||
|  | @@ -1060,13 +1073,17 @@ instr_cat4(const struct instr_translater *t,
 | ||||||
|  |  		struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | +	struct tgsi_src_register *src;
 | ||||||
|  | +	struct tgsi_src_register tmp_src;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  | +	/* seems like blob compiler avoids const as src.. */
 | ||||||
|  | +	src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
 | ||||||
|  | +
 | ||||||
|  |  	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, t->opc); | ||||||
|  |   | ||||||
|  | -	vectorize(ctx, instr, dst, 1,
 | ||||||
|  | -			&inst->Src[0].Register, 0);
 | ||||||
|  | +	vectorize(ctx, instr, dst, 1, src, 0);
 | ||||||
|  |   | ||||||
|  |  	regmask_set(ctx->needs_ss, instr->regs[0]); | ||||||
|  |   | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										216
									
								
								0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										216
									
								
								0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,216 @@ | |||||||
|  | From 12da4c1a6aa4b2a9cc337f669986a63c59fc3095 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Wed, 21 Aug 2013 13:20:05 -0400 | ||||||
|  | Subject: [PATCH 10/17] freedreno: fix segfault when no color buffer bound | ||||||
|  | 
 | ||||||
|  | Don't crash when no color buffer bound.  Something caught when starting | ||||||
|  | to run piglit, fixes a hanful of piglit tests. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a2xx/fd2_gmem.c     |  6 +++--- | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_gmem.c     | 15 +++++++++++---- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_context.c |  3 ++- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_draw.c    |  4 ++-- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_gmem.c    | 18 +++++++++++------- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_state.c   |  2 +- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_util.h    | 10 ++++++++++ | ||||||
|  |  7 files changed, 40 insertions(+), 18 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | index e239eed..93695bc 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | @@ -337,7 +337,7 @@ fd2_emit_tile_init(struct fd_context *ctx)
 | ||||||
|  |  	struct fd_ringbuffer *ring = ctx->ring; | ||||||
|  |  	struct pipe_framebuffer_state *pfb = &ctx->framebuffer; | ||||||
|  |  	struct fd_gmem_stateobj *gmem = &ctx->gmem; | ||||||
|  | -	enum pipe_format format = pfb->cbufs[0]->format;
 | ||||||
|  | +	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 | ||||||
|  |  	uint32_t reg; | ||||||
|  |   | ||||||
|  |  	OUT_PKT3(ring, CP_SET_CONSTANT, 4); | ||||||
|  | @@ -358,7 +358,7 @@ fd2_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  { | ||||||
|  |  	struct fd_ringbuffer *ring = ctx->ring; | ||||||
|  |  	struct pipe_framebuffer_state *pfb = &ctx->framebuffer; | ||||||
|  | -	enum pipe_format format = pfb->cbufs[0]->format;
 | ||||||
|  | +	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 | ||||||
|  |   | ||||||
|  |  	OUT_PKT3(ring, CP_SET_CONSTANT, 2); | ||||||
|  |  	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); | ||||||
|  | @@ -379,7 +379,7 @@ fd2_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  { | ||||||
|  |  	struct fd_ringbuffer *ring = ctx->ring; | ||||||
|  |  	struct pipe_framebuffer_state *pfb = &ctx->framebuffer; | ||||||
|  | -	enum pipe_format format = pfb->cbufs[0]->format;
 | ||||||
|  | +	enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
 | ||||||
|  |   | ||||||
|  |  	OUT_PKT3(ring, CP_SET_CONSTANT, 2); | ||||||
|  |  	OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | index 9050166..b9d0580 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | @@ -214,8 +214,12 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  		}, 1); | ||||||
|  |   | ||||||
|  |  	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { | ||||||
|  | -		uint32_t base = depth_base(&ctx->gmem) *
 | ||||||
|  | -				fd_resource(pfb->cbufs[0]->texture)->cpp;
 | ||||||
|  | +		uint32_t base = 0;
 | ||||||
|  | +		if (pfb->cbufs[0]) {
 | ||||||
|  | +			struct fd_resource *rsc =
 | ||||||
|  | +					fd_resource(pfb->cbufs[0]->texture);
 | ||||||
|  | +			base = depth_base(&ctx->gmem) * rsc->cpp;
 | ||||||
|  | +		}
 | ||||||
|  |  		emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -410,8 +414,11 @@ static void
 | ||||||
|  |  fd3_emit_sysmem_prep(struct fd_context *ctx) | ||||||
|  |  { | ||||||
|  |  	struct pipe_framebuffer_state *pfb = &ctx->framebuffer; | ||||||
|  | -	struct fd_resource *rsc = fd_resource(pfb->cbufs[0]->texture);
 | ||||||
|  |  	struct fd_ringbuffer *ring = ctx->ring; | ||||||
|  | +	uint32_t pitch = 0;
 | ||||||
|  | +
 | ||||||
|  | +	if (pfb->cbufs[0])
 | ||||||
|  | +		pitch = fd_resource(pfb->cbufs[0]->texture)->pitch;
 | ||||||
|  |   | ||||||
|  |  	fd3_emit_restore(ctx); | ||||||
|  |   | ||||||
|  | @@ -422,7 +429,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 | ||||||
|  |  	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); | ||||||
|  |   | ||||||
|  |  	fd3_emit_rbrc_tile_state(ring, | ||||||
|  | -			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(rsc->pitch));
 | ||||||
|  | +			A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
 | ||||||
|  |   | ||||||
|  |  	/* setup scissor/offset for current tile: */ | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
 | ||||||
|  | index 44d525b..1d03351 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_context.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_context.c
 | ||||||
|  | @@ -86,7 +86,8 @@ fd_context_render(struct pipe_context *pctx)
 | ||||||
|  |  	ctx->gmem_reason = 0; | ||||||
|  |  	ctx->num_draws = 0; | ||||||
|  |   | ||||||
|  | -	fd_resource(pfb->cbufs[0]->texture)->dirty = false;
 | ||||||
|  | +	if (pfb->cbufs[0])
 | ||||||
|  | +		fd_resource(pfb->cbufs[0]->texture)->dirty = false;
 | ||||||
|  |  	if (pfb->zsbuf) | ||||||
|  |  		fd_resource(pfb->zsbuf->texture)->dirty = false; | ||||||
|  |  } | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | index b02b8b9..d4f8d34 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | @@ -193,8 +193,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
 | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, | ||||||
|  | -			util_format_name(pfb->cbufs[0]->format),
 | ||||||
|  | -			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
 | ||||||
|  | +		util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 | ||||||
|  | +		util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 | ||||||
|  |   | ||||||
|  |  	ctx->clear(ctx, buffers, color, depth, stencil); | ||||||
|  |   | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | index 197d1d9..3d959c6 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
 | ||||||
|  | @@ -72,12 +72,15 @@ calculate_tiles(struct fd_context *ctx)
 | ||||||
|  |  	struct fd_gmem_stateobj *gmem = &ctx->gmem; | ||||||
|  |  	struct pipe_scissor_state *scissor = &ctx->max_scissor; | ||||||
|  |  	struct pipe_framebuffer_state *pfb = &ctx->framebuffer; | ||||||
|  | -	uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
 | ||||||
|  |  	uint32_t gmem_size = ctx->screen->gmemsize_bytes; | ||||||
|  |  	uint32_t minx, miny, width, height; | ||||||
|  |  	uint32_t nbins_x = 1, nbins_y = 1; | ||||||
|  |  	uint32_t bin_w, bin_h; | ||||||
|  |  	uint32_t max_width = 992; | ||||||
|  | +	uint32_t cpp = 4;
 | ||||||
|  | +
 | ||||||
|  | +	if (pfb->cbufs[0])
 | ||||||
|  | +		cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
 | ||||||
|  |   | ||||||
|  |  	if ((gmem->cpp == cpp) && | ||||||
|  |  			!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { | ||||||
|  | @@ -211,15 +214,15 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 | ||||||
|  |   | ||||||
|  |  	if (sysmem) { | ||||||
|  |  		DBG("rendering sysmem (%s/%s)", | ||||||
|  | -			util_format_name(pfb->cbufs[0]->format),
 | ||||||
|  | -			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
 | ||||||
|  | +			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 | ||||||
|  | +			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 | ||||||
|  |  		render_sysmem(ctx); | ||||||
|  |  	} else { | ||||||
|  |  		struct fd_gmem_stateobj *gmem = &ctx->gmem; | ||||||
|  | -		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
 | ||||||
|  | -			util_format_name(pfb->cbufs[0]->format),
 | ||||||
|  | -			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
 | ||||||
|  |  		calculate_tiles(ctx); | ||||||
|  | +		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
 | ||||||
|  | +			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 | ||||||
|  | +			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
 | ||||||
|  |  		render_tiles(ctx); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -231,7 +234,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
 | ||||||
|  |   | ||||||
|  |  	/* update timestamps on render targets: */ | ||||||
|  |  	timestamp = fd_ringbuffer_timestamp(ctx->ring); | ||||||
|  | -	fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
 | ||||||
|  | +	if (pfb->cbufs[0])
 | ||||||
|  | +		fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
 | ||||||
|  |  	if (pfb->zsbuf) | ||||||
|  |  		fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; | ||||||
|  |   | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
 | ||||||
|  | index 2f5d52c..f5290a9 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_state.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_state.c
 | ||||||
|  | @@ -120,7 +120,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 | ||||||
|  |  	unsigned i; | ||||||
|  |   | ||||||
|  |  	DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush, | ||||||
|  | -			cso->cbufs[0], cso->zsbuf);
 | ||||||
|  | +			framebuffer->cbufs[0], framebuffer->zsbuf);
 | ||||||
|  |   | ||||||
|  |  	fd_context_render(pctx); | ||||||
|  |   | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | index 22857d2..9f10686 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | @@ -33,6 +33,7 @@
 | ||||||
|  |  #include <freedreno_ringbuffer.h> | ||||||
|  |   | ||||||
|  |  #include "pipe/p_format.h" | ||||||
|  | +#include "pipe/p_state.h"
 | ||||||
|  |  #include "util/u_debug.h" | ||||||
|  |  #include "util/u_math.h" | ||||||
|  |  #include "util/u_half.h" | ||||||
|  | @@ -79,6 +80,15 @@ static inline uint32_t DRAW(enum pc_di_primtype prim_type,
 | ||||||
|  |  			(1                 << 14); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +
 | ||||||
|  | +static inline enum pipe_format
 | ||||||
|  | +pipe_surface_format(struct pipe_surface *psurf)
 | ||||||
|  | +{
 | ||||||
|  | +	if (!psurf)
 | ||||||
|  | +		return PIPE_FORMAT_NONE;
 | ||||||
|  | +	return psurf->format;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  #define LOG_DWORDS 0 | ||||||
|  |   | ||||||
|  |   | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										172
									
								
								0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,172 @@ | |||||||
|  | From c726a6a907f119dfc4fb1c26fef7babf51dc1dea Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 24 Aug 2013 12:56:22 -0400 | ||||||
|  | Subject: [PATCH 11/17] freedreno/a3xx/compiler: make compiler errors more | ||||||
|  |  useful | ||||||
|  | 
 | ||||||
|  | We probably should get rid of assert() entirely, but at this stage it is | ||||||
|  | more useful for things to crash where we can catch it in a debugger. | ||||||
|  | With compile_error() we have a single place to set an error flag (to | ||||||
|  | bail out and return an error on the next instruction) so that will be a | ||||||
|  | small change later when enough of the compiler bugs are sorted. | ||||||
|  | 
 | ||||||
|  | But re-arrange/cleanup the error/assert stuff so we at least get a dump | ||||||
|  | of the TGSI that triggered it.  So we see some useful output in piglit | ||||||
|  | logs. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 +++++++++++++++-------- | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/ir-a3xx.h      |  3 +- | ||||||
|  |  2 files changed, 33 insertions(+), 17 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index 772c7d2..e6c5bb7 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -185,6 +185,21 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void | ||||||
|  | +compile_error(struct fd3_compile_context *ctx, const char *format, ...)
 | ||||||
|  | +{
 | ||||||
|  | +	va_list ap;
 | ||||||
|  | +	va_start(ap, format);
 | ||||||
|  | +	_debug_vprintf(format, ap);
 | ||||||
|  | +	va_end(ap);
 | ||||||
|  | +	tgsi_dump(ctx->tokens, 0);
 | ||||||
|  | +	assert(0);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +#define compile_assert(ctx, cond) do { \
 | ||||||
|  | +		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
 | ||||||
|  | +	} while (0)
 | ||||||
|  | +
 | ||||||
|  | +static void
 | ||||||
|  |  compile_free(struct fd3_compile_context *ctx) | ||||||
|  |  { | ||||||
|  |  	tgsi_parse_free(&ctx->parser); | ||||||
|  | @@ -212,9 +227,8 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |  		num = dst->Index + ctx->base_reg[dst->File]; | ||||||
|  |  		break; | ||||||
|  |  	default: | ||||||
|  | -		DBG("unsupported dst register file: %s",
 | ||||||
|  | +		compile_error(ctx, "unsupported dst register file: %s\n",
 | ||||||
|  |  			tgsi_file_name(dst->File)); | ||||||
|  | -		assert(0);
 | ||||||
|  |  		break; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -250,9 +264,8 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |  		num = src->Index + ctx->base_reg[src->File]; | ||||||
|  |  		break; | ||||||
|  |  	default: | ||||||
|  | -		DBG("unsupported src register file: %s",
 | ||||||
|  | +		compile_error(ctx, "unsupported src register file: %s\n",
 | ||||||
|  |  			tgsi_file_name(src->File)); | ||||||
|  | -		assert(0);
 | ||||||
|  |  		break; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -329,6 +342,13 @@ get_internal_temp_repl(struct fd3_compile_context *ctx,
 | ||||||
|  |  		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static inline bool
 | ||||||
|  | +is_const(struct tgsi_src_register *src)
 | ||||||
|  | +{
 | ||||||
|  | +	return (src->File == TGSI_FILE_CONSTANT) ||
 | ||||||
|  | +			(src->File == TGSI_FILE_IMMEDIATE);
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static void | ||||||
|  |  get_immediate(struct fd3_compile_context *ctx, | ||||||
|  |  		struct tgsi_src_register *reg, uint32_t val) | ||||||
|  | @@ -578,8 +598,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  	 * is a const.  Not sure if this is a hw bug, or simply that the | ||||||
|  |  	 * disassembler lies. | ||||||
|  |  	 */ | ||||||
|  | -	if ((src1->File == TGSI_FILE_IMMEDIATE) ||
 | ||||||
|  | -			(src1->File == TGSI_FILE_CONSTANT)) {
 | ||||||
|  | +	if (is_const(src1)) {
 | ||||||
|  |   | ||||||
|  |  		/* the mov to tmp unswizzles src1, so now we have tmp.xyzw: | ||||||
|  |  		 */ | ||||||
|  | @@ -768,7 +787,7 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  		flags |= IR3_INSTR_P; | ||||||
|  |  		break; | ||||||
|  |  	default: | ||||||
|  | -		assert(0);
 | ||||||
|  | +		compile_assert(ctx, 0);
 | ||||||
|  |  		break; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | @@ -1187,7 +1206,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 | ||||||
|  |  	unsigned name = decl->Semantic.Name; | ||||||
|  |  	unsigned i; | ||||||
|  |   | ||||||
|  | -	assert(decl->Declaration.Semantic);  // TODO is this ever not true?
 | ||||||
|  | +	compile_assert(ctx, decl->Declaration.Semantic);  // TODO is this ever not true?
 | ||||||
|  |   | ||||||
|  |  	DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX | ||||||
|  |   | ||||||
|  | @@ -1207,9 +1226,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 | ||||||
|  |  				so->outputs[so->outputs_count++].regid = regid(i + base, 0); | ||||||
|  |  			break; | ||||||
|  |  		default: | ||||||
|  | -			DBG("unknown VS semantic name: %s",
 | ||||||
|  | +			compile_error(ctx, "unknown VS semantic name: %s\n",
 | ||||||
|  |  					tgsi_semantic_names[name]); | ||||||
|  | -			assert(0);
 | ||||||
|  |  		} | ||||||
|  |  	} else { | ||||||
|  |  		switch (name) { | ||||||
|  | @@ -1217,9 +1235,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
 | ||||||
|  |  			so->color_regid = regid(decl->Range.First + base, 0); | ||||||
|  |  			break; | ||||||
|  |  		default: | ||||||
|  | -			DBG("unknown VS semantic name: %s",
 | ||||||
|  | +			compile_error(ctx, "unknown VS semantic name: %s\n",
 | ||||||
|  |  					tgsi_semantic_names[name]); | ||||||
|  | -			assert(0);
 | ||||||
|  |  		} | ||||||
|  |  	} | ||||||
|  |  } | ||||||
|  | @@ -1278,10 +1295,8 @@ compile_instructions(struct fd3_compile_context *ctx)
 | ||||||
|  |  				t->fxn(t, ctx, inst); | ||||||
|  |  				ctx->num_internal_temps = 0; | ||||||
|  |  			} else { | ||||||
|  | -				debug_printf("unknown TGSI opc: %s\n",
 | ||||||
|  | +				compile_error(ctx, "unknown TGSI opc: %s\n",
 | ||||||
|  |  						tgsi_get_opcode_name(opc)); | ||||||
|  | -				tgsi_dump(ctx->tokens, 0);
 | ||||||
|  | -				assert(0);
 | ||||||
|  |  			} | ||||||
|  |   | ||||||
|  |  			switch (inst->Instruction.Saturate) { | ||||||
|  | @@ -1319,6 +1334,8 @@ fd3_compile_shader(struct fd3_shader_stateobj *so,
 | ||||||
|  |   | ||||||
|  |  	so->ir = ir3_shader_create(); | ||||||
|  |   | ||||||
|  | +	assert(so->ir);
 | ||||||
|  | +
 | ||||||
|  |  	so->color_regid = regid(63,0); | ||||||
|  |  	so->pos_regid   = regid(63,0); | ||||||
|  |  	so->psize_regid = regid(63,0); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
 | ||||||
|  | index 2fedc7b..61c01a7 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
 | ||||||
|  | @@ -166,8 +166,7 @@ struct ir3_instruction {
 | ||||||
|  |  	}; | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | -/* this is just large to cope w/ the large test *.asm: */
 | ||||||
|  | -#define MAX_INSTRS 10240
 | ||||||
|  | +#define MAX_INSTRS 1024
 | ||||||
|  |   | ||||||
|  |  struct ir3_shader { | ||||||
|  |  	unsigned instrs_count; | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										420
									
								
								0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										420
									
								
								0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,420 @@ | |||||||
|  | From ca5514b85161d480fb711ac26d74fc447e1e9bda Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 24 Aug 2013 13:00:07 -0400 | ||||||
|  | Subject: [PATCH 12/17] freedreno/a3xx/compiler: bit of re-arrange/cleanup | ||||||
|  | 
 | ||||||
|  | It seems there are a number of cases where instructions have limitations | ||||||
|  | about taking reading src's from const register file, so make | ||||||
|  | get_unconst() a bit easier to use. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 132 ++++++++++++---------- | ||||||
|  |  1 file changed, 71 insertions(+), 61 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index e6c5bb7..b5cdda8 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -91,6 +91,7 @@ struct fd3_compile_context {
 | ||||||
|  |   | ||||||
|  |  	unsigned next_inloc; | ||||||
|  |  	unsigned num_internal_temps; | ||||||
|  | +	struct tgsi_src_register internal_temps[6];
 | ||||||
|  |   | ||||||
|  |  	/* track registers which need to synchronize w/ "complex alu" cat3 | ||||||
|  |  	 * instruction pipeline: | ||||||
|  | @@ -128,7 +129,7 @@ struct fd3_compile_context {
 | ||||||
|  |  	 * up the vector operation | ||||||
|  |  	 */ | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |   | ||||||
|  | @@ -309,11 +310,11 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
 | ||||||
|  |  /* Get internal-temp src/dst to use for a sequence of instructions | ||||||
|  |   * generated by a single TGSI op. | ||||||
|  |   */ | ||||||
|  | -static void
 | ||||||
|  | +static struct tgsi_src_register *
 | ||||||
|  |  get_internal_temp(struct fd3_compile_context *ctx, | ||||||
|  | -		struct tgsi_dst_register *tmp_dst,
 | ||||||
|  | -		struct tgsi_src_register *tmp_src)
 | ||||||
|  | +		struct tgsi_dst_register *tmp_dst)
 | ||||||
|  |  { | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |  	int n; | ||||||
|  |   | ||||||
|  |  	tmp_dst->File      = TGSI_FILE_TEMPORARY; | ||||||
|  | @@ -323,23 +324,28 @@ get_internal_temp(struct fd3_compile_context *ctx,
 | ||||||
|  |   | ||||||
|  |  	/* assign next temporary: */ | ||||||
|  |  	n = ctx->num_internal_temps++; | ||||||
|  | +	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
 | ||||||
|  | +	tmp_src = &ctx->internal_temps[n];
 | ||||||
|  |   | ||||||
|  |  	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; | ||||||
|  |   | ||||||
|  |  	src_from_dst(tmp_src, tmp_dst); | ||||||
|  | +
 | ||||||
|  | +	return tmp_src;
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* same as get_internal_temp, but w/ src.xxxx (for instructions that | ||||||
|  |   * replicate their results) | ||||||
|  |   */ | ||||||
|  | -static void
 | ||||||
|  | +static struct tgsi_src_register *
 | ||||||
|  |  get_internal_temp_repl(struct fd3_compile_context *ctx, | ||||||
|  | -		struct tgsi_dst_register *tmp_dst,
 | ||||||
|  | -		struct tgsi_src_register *tmp_src)
 | ||||||
|  | +		struct tgsi_dst_register *tmp_dst)
 | ||||||
|  |  { | ||||||
|  | -	get_internal_temp(ctx, tmp_dst, tmp_src);
 | ||||||
|  | +	struct tgsi_src_register *tmp_src =
 | ||||||
|  | +			get_internal_temp(ctx, tmp_dst);
 | ||||||
|  |  	tmp_src->SwizzleX = tmp_src->SwizzleY = | ||||||
|  |  		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; | ||||||
|  | +	return tmp_src;
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static inline bool | ||||||
|  | @@ -349,6 +355,22 @@ is_const(struct tgsi_src_register *src)
 | ||||||
|  |  			(src->File == TGSI_FILE_IMMEDIATE); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/* for instructions that cannot take a const register as src, if needed
 | ||||||
|  | + * generate a move to temporary gpr:
 | ||||||
|  | + */
 | ||||||
|  | +static struct tgsi_src_register *
 | ||||||
|  | +get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
 | ||||||
|  | +{
 | ||||||
|  | +	if (is_const(src)) {
 | ||||||
|  | +		static struct tgsi_dst_register tmp_dst;
 | ||||||
|  | +		struct tgsi_src_register *tmp_src =
 | ||||||
|  | +				get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  | +		create_mov(ctx, &tmp_dst, src);
 | ||||||
|  | +		src = tmp_src;
 | ||||||
|  | +	}
 | ||||||
|  | +	return src;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static void | ||||||
|  |  get_immediate(struct fd3_compile_context *ctx, | ||||||
|  |  		struct tgsi_src_register *reg, uint32_t val) | ||||||
|  | @@ -396,27 +418,16 @@ get_immediate(struct fd3_compile_context *ctx,
 | ||||||
|  |  	reg->SwizzleW  = swiz2tgsi[swiz]; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -/* for instructions that cannot take a const register as src, if needed
 | ||||||
|  | - * generate a move to temporary gpr:
 | ||||||
|  | - */
 | ||||||
|  | -static struct tgsi_src_register *
 | ||||||
|  | -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src,
 | ||||||
|  | -		struct tgsi_src_register *tmp_src)
 | ||||||
|  | +static type_t
 | ||||||
|  | +get_ftype(struct fd3_compile_context *ctx)
 | ||||||
|  |  { | ||||||
|  | -	static struct tgsi_dst_register tmp_dst;
 | ||||||
|  | -	if ((src->File == TGSI_FILE_CONSTANT) ||
 | ||||||
|  | -			(src->File == TGSI_FILE_IMMEDIATE)) {
 | ||||||
|  | -		get_internal_temp(ctx, &tmp_dst, tmp_src);
 | ||||||
|  | -		create_mov(ctx, &tmp_dst, src);
 | ||||||
|  | -		src = tmp_src;
 | ||||||
|  | -	}
 | ||||||
|  | -	return src;
 | ||||||
|  | +	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static type_t | ||||||
|  | -get_type(struct fd3_compile_context *ctx)
 | ||||||
|  | +get_utype(struct fd3_compile_context *ctx)
 | ||||||
|  |  { | ||||||
|  | -	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
 | ||||||
|  | +	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static unsigned | ||||||
|  | @@ -436,7 +447,7 @@ static void
 | ||||||
|  |  create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, | ||||||
|  |  		struct tgsi_src_register *src) | ||||||
|  |  { | ||||||
|  | -	type_t type_mov = get_type(ctx);
 | ||||||
|  | +	type_t type_mov = get_ftype(ctx);
 | ||||||
|  |  	unsigned i; | ||||||
|  |   | ||||||
|  |  	for (i = 0; i < 4; i++) { | ||||||
|  | @@ -492,7 +503,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
 | ||||||
|  |  	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { | ||||||
|  |  		struct tgsi_src_register *src = &inst->Src[i].Register; | ||||||
|  |  		if ((src->File == dst->File) && (src->Index == dst->Index)) { | ||||||
|  | -			get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
 | ||||||
|  | +			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
 | ||||||
|  |  			ctx->tmp_dst.WriteMask = dst->WriteMask; | ||||||
|  |  			dst = &ctx->tmp_dst; | ||||||
|  |  			break; | ||||||
|  | @@ -507,7 +518,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
 | ||||||
|  |  { | ||||||
|  |  	/* if necessary, add mov back into original dst: */ | ||||||
|  |  	if (dst != &inst->Dst[0].Register) { | ||||||
|  | -		create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
 | ||||||
|  | +		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
 | ||||||
|  |  	} | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | @@ -580,7 +591,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |  	struct tgsi_dst_register *dst  = &inst->Dst[0].Register; | ||||||
|  |  	struct tgsi_src_register *src0 = &inst->Src[0].Register; | ||||||
|  |  	struct tgsi_src_register *src1 = &inst->Src[1].Register; | ||||||
|  | @@ -590,7 +601,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  	unsigned n = t->arg;     /* number of components */ | ||||||
|  |  	unsigned i; | ||||||
|  |   | ||||||
|  | -	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | +	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
 | ||||||
|  |   | ||||||
|  |  	/* Blob compiler never seems to use a const in src1 position for | ||||||
|  |  	 * mad.*, although there does seem (according to disassembler | ||||||
|  | @@ -609,7 +620,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  		 * because after that point we no longer need tmp.x: | ||||||
|  |  		 */ | ||||||
|  |  		create_mov(ctx, &tmp_dst, src1); | ||||||
|  | -		src1 = &tmp_src;
 | ||||||
|  | +		src1 = tmp_src;
 | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  | @@ -624,7 +635,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  		add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  |  		add_src_reg(ctx, instr, src0, swiz0[i]); | ||||||
|  |  		add_src_reg(ctx, instr, src1, swiz1[i]); | ||||||
|  | -		add_src_reg(ctx, instr, &tmp_src, 0);
 | ||||||
|  | +		add_src_reg(ctx, instr, tmp_src, 0);
 | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ | ||||||
|  | @@ -634,7 +645,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); | ||||||
|  |  		add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  |  		add_src_reg(ctx, instr, src1, swiz1[i]); | ||||||
|  | -		add_src_reg(ctx, instr, &tmp_src, 0);
 | ||||||
|  | +		add_src_reg(ctx, instr, tmp_src, 0);
 | ||||||
|  |   | ||||||
|  |  		n++; | ||||||
|  |  	} | ||||||
|  | @@ -646,7 +657,7 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  		ir3_instr_create(ctx->ir, 0, OPC_NOP); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | -	create_mov(ctx, dst, &tmp_src);
 | ||||||
|  | +	create_mov(ctx, dst, tmp_src);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ | ||||||
|  | @@ -657,11 +668,11 @@ trans_lrp(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct tgsi_dst_register tmp_dst1, tmp_dst2; | ||||||
|  | -	struct tgsi_src_register tmp_src1, tmp_src2;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src1, *tmp_src2;
 | ||||||
|  |  	struct tgsi_src_register tmp_const; | ||||||
|  |   | ||||||
|  | -	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
 | ||||||
|  | -	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
 | ||||||
|  | +	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
 | ||||||
|  | +	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
 | ||||||
|  |   | ||||||
|  |  	get_immediate(ctx, &tmp_const, fui(1.0)); | ||||||
|  |   | ||||||
|  | @@ -680,14 +691,14 @@ trans_lrp(const struct instr_translater *t,
 | ||||||
|  |  	/* tmp2 = tmp2 * c */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  |  	vectorize(ctx, instr, &tmp_dst2, 2, | ||||||
|  | -			&tmp_src2, 0,
 | ||||||
|  | +			tmp_src2, 0,
 | ||||||
|  |  			&inst->Src[2].Register, 0); | ||||||
|  |   | ||||||
|  |  	/* dst = tmp1 + tmp2 */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); | ||||||
|  |  	vectorize(ctx, instr, &inst->Dst[0].Register, 2, | ||||||
|  | -			&tmp_src1, 0,
 | ||||||
|  | -			&tmp_src2, 0);
 | ||||||
|  | +			tmp_src1, 0,
 | ||||||
|  | +			tmp_src2, 0);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* FRC(x) = x - FLOOR(x) */ | ||||||
|  | @@ -698,9 +709,9 @@ trans_frac(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |   | ||||||
|  | -	get_internal_temp(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | +	tmp_src = get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  |   | ||||||
|  |  	/* tmp = FLOOR(x) */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); | ||||||
|  | @@ -711,7 +722,7 @@ trans_frac(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); | ||||||
|  |  	vectorize(ctx, instr, &inst->Dst[0].Register, 2, | ||||||
|  |  			&inst->Src[0].Register, 0, | ||||||
|  | -			&tmp_src, IR3_REG_NEGATE);
 | ||||||
|  | +			tmp_src, IR3_REG_NEGATE);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* POW(a,b) = EXP2(b * LOG2(a)) */ | ||||||
|  | @@ -723,12 +734,12 @@ trans_pow(const struct instr_translater *t,
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct ir3_register *r; | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |  	struct tgsi_dst_register *dst  = &inst->Dst[0].Register; | ||||||
|  |  	struct tgsi_src_register *src0 = &inst->Src[0].Register; | ||||||
|  |  	struct tgsi_src_register *src1 = &inst->Src[1].Register; | ||||||
|  |   | ||||||
|  | -	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | +	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
 | ||||||
|  |   | ||||||
|  |  	/* log2 Rtmp, Rsrc0 */ | ||||||
|  |  	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; | ||||||
|  | @@ -740,7 +751,7 @@ trans_pow(const struct instr_translater *t,
 | ||||||
|  |  	/* mul.f Rtmp, Rtmp, Rsrc1 */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  |  	add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  | -	add_src_reg(ctx, instr, &tmp_src, 0);
 | ||||||
|  | +	add_src_reg(ctx, instr, tmp_src, 0);
 | ||||||
|  |  	add_src_reg(ctx, instr, src1, src1->SwizzleX); | ||||||
|  |   | ||||||
|  |  	/* blob compiler seems to ensure there are at least 6 instructions | ||||||
|  | @@ -752,10 +763,10 @@ trans_pow(const struct instr_translater *t,
 | ||||||
|  |  	/* exp2 Rdst, Rtmp */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); | ||||||
|  |  	r = add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  | -	add_src_reg(ctx, instr, &tmp_src, 0);
 | ||||||
|  | +	add_src_reg(ctx, instr, tmp_src, 0);
 | ||||||
|  |  	regmask_set(ctx->needs_ss, r); | ||||||
|  |   | ||||||
|  | -	create_mov(ctx, dst, &tmp_src);
 | ||||||
|  | +	create_mov(ctx, dst, tmp_src);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* texture fetch/sample instructions: */ | ||||||
|  | @@ -766,8 +777,6 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct ir3_register *r; | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  | -	struct tgsi_dst_register tmp_dst;
 | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  |  	struct tgsi_src_register *coord = &inst->Src[0].Register; | ||||||
|  |  	struct tgsi_src_register *samp  = &inst->Src[1].Register; | ||||||
|  |  	unsigned tex = inst->Texture.Texture; | ||||||
|  | @@ -802,10 +811,13 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  	 */ | ||||||
|  |  	for (i = 1; (i < 4) && (order[i] >= 0); i++) { | ||||||
|  |  		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { | ||||||
|  | -			type_t type_mov = get_type(ctx);
 | ||||||
|  | +			struct tgsi_dst_register tmp_dst;
 | ||||||
|  | +			struct tgsi_src_register *tmp_src;
 | ||||||
|  | +
 | ||||||
|  | +			type_t type_mov = get_ftype(ctx);
 | ||||||
|  |   | ||||||
|  |  			/* need to move things around: */ | ||||||
|  | -			get_internal_temp(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | +			tmp_src = get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  |   | ||||||
|  |  			for (j = 0; (j < 4) && (order[j] >= 0); j++) { | ||||||
|  |  				instr = ir3_instr_create(ctx->ir, 1, 0); | ||||||
|  | @@ -816,7 +828,7 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  						src_swiz(coord, order[j])); | ||||||
|  |  			} | ||||||
|  |   | ||||||
|  | -			coord = &tmp_src;
 | ||||||
|  | +			coord = tmp_src;
 | ||||||
|  |   | ||||||
|  |  			if (j < 4) | ||||||
|  |  				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; | ||||||
|  | @@ -826,7 +838,7 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 5, t->opc); | ||||||
|  | -	instr->cat5.type = get_type(ctx);
 | ||||||
|  | +	instr->cat5.type = get_ftype(ctx);
 | ||||||
|  |  	instr->cat5.samp = samp->Index; | ||||||
|  |  	instr->cat5.tex  = samp->Index; | ||||||
|  |  	instr->flags |= flags; | ||||||
|  | @@ -847,12 +859,12 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  |  	struct tgsi_src_register constval; | ||||||
|  |  	/* final instruction uses original src1 and src2, so we need get_dst() */ | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  |   | ||||||
|  | -	get_internal_temp(ctx, &tmp_dst, &tmp_src);
 | ||||||
|  | +	tmp_src = get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  |   | ||||||
|  |  	/* cmps.f.ge tmp, src0, 0.0 */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); | ||||||
|  | @@ -866,7 +878,7 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); | ||||||
|  |  	instr->repeat = 3; | ||||||
|  |  	add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  | -	add_src_reg(ctx, instr, &tmp_src, 0)->flags |= IR3_REG_R;
 | ||||||
|  | +	add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
 | ||||||
|  |  	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; | ||||||
|  |   | ||||||
|  |  	/* sel.{f32,f16} dst, src2, tmp, src1 */ | ||||||
|  | @@ -874,7 +886,7 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  			OPC_SEL_F16 : OPC_SEL_F32); | ||||||
|  |  	vectorize(ctx, instr, dst, 3, | ||||||
|  |  			&inst->Src[2].Register, 0, | ||||||
|  | -			&tmp_src, 0,
 | ||||||
|  | +			tmp_src, 0,
 | ||||||
|  |  			&inst->Src[1].Register, 0); | ||||||
|  |   | ||||||
|  |  	put_dst(ctx, inst, dst); | ||||||
|  | @@ -1066,7 +1078,6 @@ instr_cat3(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  |  	struct tgsi_src_register *src1; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  |  	/* Blob compiler never seems to use a const in src1 position.. | ||||||
|  | @@ -1075,7 +1086,7 @@ instr_cat3(const struct instr_translater *t,
 | ||||||
|  |  	 * const.  Not sure if this is a hw bug, or simply that the | ||||||
|  |  	 * disassembler lies. | ||||||
|  |  	 */ | ||||||
|  | -	src1 = get_unconst(ctx, &inst->Src[1].Register, &tmp_src);
 | ||||||
|  | +	src1 = get_unconst(ctx, &inst->Src[1].Register);
 | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 3, | ||||||
|  |  			ctx->so->half_precision ? t->hopc : t->opc); | ||||||
|  | @@ -1093,11 +1104,10 @@ instr_cat4(const struct instr_translater *t,
 | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  |  	struct tgsi_src_register *src; | ||||||
|  | -	struct tgsi_src_register tmp_src;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  |  	/* seems like blob compiler avoids const as src.. */ | ||||||
|  | -	src = get_unconst(ctx, &inst->Src[0].Register, &tmp_src);
 | ||||||
|  | +	src = get_unconst(ctx, &inst->Src[0].Register);
 | ||||||
|  |   | ||||||
|  |  	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, t->opc); | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										231
									
								
								0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										231
									
								
								0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,231 @@ | |||||||
|  | From c20aa295ec0e1f7b70986a32ef2d74e5097cf640 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 24 Aug 2013 13:02:53 -0400 | ||||||
|  | Subject: [PATCH 13/17] freedreno/a3xx/compiler: fix SGT/SLT/etc | ||||||
|  | 
 | ||||||
|  | The cmps.f.* instruction doesn't actually seem to give a float 1.0 or | ||||||
|  | 0.0 output.  It either needs a cov.u16f16 or add.s + sel.f16.  This | ||||||
|  | makes SGT/SLT/etc more similar to CMP, so handle them in trans_cmp(). | ||||||
|  | 
 | ||||||
|  | This fixes a bunch of piglit tests. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 154 ++++++++++++++++++---- | ||||||
|  |  1 file changed, 125 insertions(+), 29 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index b5cdda8..477053b 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -851,7 +851,39 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |  	regmask_set(ctx->needs_sy, r); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -/* CMP(a,b,c) = (a < 0) ? b : c */
 | ||||||
|  | +/*
 | ||||||
|  | + * SEQ(a,b) = (a == b) ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.eq tmp0, b, a
 | ||||||
|  | + *   cov.u16f16 dst, tmp0
 | ||||||
|  | + *
 | ||||||
|  | + * SNE(a,b) = (a != b) ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.eq tmp0, b, a
 | ||||||
|  | + *   add.s tmp0, tmp0, -1
 | ||||||
|  | + *   sel.f16 dst, {0.0}, tmp0, {1.0}
 | ||||||
|  | + *
 | ||||||
|  | + * SGE(a,b) = (a >= b) ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.ge tmp0, a, b
 | ||||||
|  | + *   cov.u16f16 dst, tmp0
 | ||||||
|  | + *
 | ||||||
|  | + * SLE(a,b) = (a <= b) ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.ge tmp0, b, a
 | ||||||
|  | + *   cov.u16f16 dst, tmp0
 | ||||||
|  | + *
 | ||||||
|  | + * SGT(a,b) = (a > b)  ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.ge tmp0, b, a
 | ||||||
|  | + *   add.s tmp0, tmp0, -1
 | ||||||
|  | + *   sel.f16 dst, {0.0}, tmp0, {1.0}
 | ||||||
|  | + *
 | ||||||
|  | + * SLT(a,b) = (a < b)  ? 1.0 : 0.0
 | ||||||
|  | + *   cmps.f.ge tmp0, a, b
 | ||||||
|  | + *   add.s tmp0, tmp0, -1
 | ||||||
|  | + *   sel.f16 dst, {0.0}, tmp0, {1.0}
 | ||||||
|  | + *
 | ||||||
|  | + * CMP(a,b,c) = (a < 0.0) ? b : c
 | ||||||
|  | + *   cmps.f.ge tmp0, a, {0.0}
 | ||||||
|  | + *   add.s tmp0, tmp0, -1
 | ||||||
|  | + *   sel.f16 dst, c, tmp0, b
 | ||||||
|  | + */
 | ||||||
|  |  static void | ||||||
|  |  trans_cmp(const struct instr_translater *t, | ||||||
|  |  		struct fd3_compile_context *ctx, | ||||||
|  | @@ -860,34 +892,97 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	struct tgsi_dst_register tmp_dst; | ||||||
|  |  	struct tgsi_src_register *tmp_src; | ||||||
|  | -	struct tgsi_src_register constval;
 | ||||||
|  | -	/* final instruction uses original src1 and src2, so we need get_dst() */
 | ||||||
|  | +	struct tgsi_src_register constval0, constval1;
 | ||||||
|  | +	/* final instruction for CMP() uses orig src1 and src2: */
 | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | +	struct tgsi_src_register *a0, *a1;
 | ||||||
|  | +	unsigned condition;
 | ||||||
|  |   | ||||||
|  |  	tmp_src = get_internal_temp(ctx, &tmp_dst); | ||||||
|  |   | ||||||
|  | -	/* cmps.f.ge tmp, src0, 0.0 */
 | ||||||
|  | +	switch (t->tgsi_opc) {
 | ||||||
|  | +	case TGSI_OPCODE_SEQ:
 | ||||||
|  | +	case TGSI_OPCODE_SNE:
 | ||||||
|  | +		a0 = &inst->Src[1].Register;  /* b */
 | ||||||
|  | +		a1 = &inst->Src[0].Register;  /* a */
 | ||||||
|  | +		condition = IR3_COND_EQ;
 | ||||||
|  | +		break;
 | ||||||
|  | +	case TGSI_OPCODE_SGE:
 | ||||||
|  | +	case TGSI_OPCODE_SLT:
 | ||||||
|  | +		a0 = &inst->Src[0].Register;  /* a */
 | ||||||
|  | +		a1 = &inst->Src[1].Register;  /* b */
 | ||||||
|  | +		condition = IR3_COND_GE;
 | ||||||
|  | +		break;
 | ||||||
|  | +	case TGSI_OPCODE_SLE:
 | ||||||
|  | +	case TGSI_OPCODE_SGT:
 | ||||||
|  | +		a0 = &inst->Src[1].Register;  /* b */
 | ||||||
|  | +		a1 = &inst->Src[0].Register;  /* a */
 | ||||||
|  | +		condition = IR3_COND_GE;
 | ||||||
|  | +		break;
 | ||||||
|  | +	case TGSI_OPCODE_CMP:
 | ||||||
|  | +		get_immediate(ctx, &constval0, fui(0.0));
 | ||||||
|  | +		a0 = &inst->Src[0].Register;  /* a */
 | ||||||
|  | +		a1 = &constval0;              /* {0.0} */
 | ||||||
|  | +		condition = IR3_COND_GE;
 | ||||||
|  | +		break;
 | ||||||
|  | +	default:
 | ||||||
|  | +		compile_assert(ctx, 0);
 | ||||||
|  | +		return;
 | ||||||
|  | +	}
 | ||||||
|  | +
 | ||||||
|  | +	/* NOTE: seems blob compiler will move a const to a gpr if both
 | ||||||
|  | +	 * src args to cmps.f are const.  Need to check if this applies
 | ||||||
|  | +	 * to other instructions..
 | ||||||
|  | +	 */
 | ||||||
|  | +	if (is_const(a0) && is_const(a1))
 | ||||||
|  | +		a0 = get_unconst(ctx, a0);
 | ||||||
|  | +
 | ||||||
|  | +	/* cmps.f.ge tmp, a0, a1 */
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); | ||||||
|  | -	instr->cat2.condition = IR3_COND_GE;
 | ||||||
|  | -	get_immediate(ctx, &constval, fui(0.0));
 | ||||||
|  | -	vectorize(ctx, instr, &tmp_dst, 2,
 | ||||||
|  | -			&inst->Src[0].Register, 0,
 | ||||||
|  | -			&constval, 0);
 | ||||||
|  | +	instr->cat2.condition = condition;
 | ||||||
|  | +	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
 | ||||||
|  |   | ||||||
|  | -	/* add.s tmp, tmp, -1 */
 | ||||||
|  | -	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
 | ||||||
|  | -	instr->repeat = 3;
 | ||||||
|  | -	add_dst_reg(ctx, instr, &tmp_dst, 0);
 | ||||||
|  | -	add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
 | ||||||
|  | -	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
 | ||||||
|  | +	switch (t->tgsi_opc) {
 | ||||||
|  | +	case TGSI_OPCODE_SEQ:
 | ||||||
|  | +	case TGSI_OPCODE_SGE:
 | ||||||
|  | +	case TGSI_OPCODE_SLE:
 | ||||||
|  | +		/* cov.u16f16 dst, tmp0 */
 | ||||||
|  | +		instr = ir3_instr_create(ctx->ir, 1, 0);
 | ||||||
|  | +		instr->cat1.src_type = get_utype(ctx);
 | ||||||
|  | +		instr->cat1.dst_type = get_ftype(ctx);
 | ||||||
|  | +		vectorize(ctx, instr, dst, 1, tmp_src, 0);
 | ||||||
|  | +		break;
 | ||||||
|  | +	case TGSI_OPCODE_SNE:
 | ||||||
|  | +	case TGSI_OPCODE_SGT:
 | ||||||
|  | +	case TGSI_OPCODE_SLT:
 | ||||||
|  | +	case TGSI_OPCODE_CMP:
 | ||||||
|  | +		/* add.s tmp, tmp, -1 */
 | ||||||
|  | +		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
 | ||||||
|  | +		instr->repeat = 3;
 | ||||||
|  | +		add_dst_reg(ctx, instr, &tmp_dst, 0);
 | ||||||
|  | +		add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
 | ||||||
|  | +		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
 | ||||||
|  | +
 | ||||||
|  | +		if (t->tgsi_opc == TGSI_OPCODE_CMP) {
 | ||||||
|  | +			/* sel.{f32,f16} dst, src2, tmp, src1 */
 | ||||||
|  | +			instr = ir3_instr_create(ctx->ir, 3,
 | ||||||
|  | +					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
 | ||||||
|  | +			vectorize(ctx, instr, dst, 3,
 | ||||||
|  | +					&inst->Src[2].Register, 0,
 | ||||||
|  | +					tmp_src, 0,
 | ||||||
|  | +					&inst->Src[1].Register, 0);
 | ||||||
|  | +		} else {
 | ||||||
|  | +			get_immediate(ctx, &constval0, fui(0.0));
 | ||||||
|  | +			get_immediate(ctx, &constval1, fui(1.0));
 | ||||||
|  | +			/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
 | ||||||
|  | +			instr = ir3_instr_create(ctx->ir, 3,
 | ||||||
|  | +					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
 | ||||||
|  | +			vectorize(ctx, instr, dst, 3,
 | ||||||
|  | +					&constval0, 0, tmp_src, 0, &constval1, 0);
 | ||||||
|  | +		}
 | ||||||
|  |   | ||||||
|  | -	/* sel.{f32,f16} dst, src2, tmp, src1 */
 | ||||||
|  | -	instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
 | ||||||
|  | -			OPC_SEL_F16 : OPC_SEL_F32);
 | ||||||
|  | -	vectorize(ctx, instr, dst, 3,
 | ||||||
|  | -			&inst->Src[2].Register, 0,
 | ||||||
|  | -			tmp_src, 0,
 | ||||||
|  | -			&inst->Src[1].Register, 0);
 | ||||||
|  | +		break;
 | ||||||
|  | +	}
 | ||||||
|  |   | ||||||
|  |  	put_dst(ctx, inst, dst); | ||||||
|  |  } | ||||||
|  | @@ -948,8 +1043,8 @@ trans_if(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); | ||||||
|  |  	ir3_reg_create(instr, regid(REG_P0, 0), 0); | ||||||
|  | -	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
 | ||||||
|  |  	add_src_reg(ctx, instr, src, src->SwizzleX); | ||||||
|  | +	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
 | ||||||
|  |  	instr->cat2.condition = IR3_COND_EQ; | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 0, OPC_BR); | ||||||
|  | @@ -1033,10 +1128,6 @@ instr_cat2(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, t->opc); | ||||||
|  |   | ||||||
|  |  	switch (t->tgsi_opc) { | ||||||
|  | -	case TGSI_OPCODE_SLT:
 | ||||||
|  | -	case TGSI_OPCODE_SGE:
 | ||||||
|  | -		instr->cat2.condition = t->arg;
 | ||||||
|  | -		break;
 | ||||||
|  |  	case TGSI_OPCODE_ABS: | ||||||
|  |  		src0_flags = IR3_REG_ABS; | ||||||
|  |  		break; | ||||||
|  | @@ -1135,12 +1226,11 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
 | ||||||
|  |  	INSTR(DPH,          trans_dotp, .arg = 3),   /* almost like DP3 */ | ||||||
|  |  	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F), | ||||||
|  |  	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F), | ||||||
|  | -	INSTR(SLT,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
 | ||||||
|  | -	INSTR(SGE,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
 | ||||||
|  |  	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), | ||||||
|  |  	INSTR(LRP,          trans_lrp), | ||||||
|  |  	INSTR(FRC,          trans_frac), | ||||||
|  |  	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F), | ||||||
|  | +	INSTR(ARL,          instr_cat2, .opc = OPC_FLOOR_F),
 | ||||||
|  |  	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2), | ||||||
|  |  	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2), | ||||||
|  |  	INSTR(POW,          trans_pow), | ||||||
|  | @@ -1149,6 +1239,12 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
 | ||||||
|  |  	INSTR(SIN,          instr_cat4, .opc = OPC_COS), | ||||||
|  |  	INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), | ||||||
|  |  	INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), | ||||||
|  | +	INSTR(SGT,          trans_cmp),
 | ||||||
|  | +	INSTR(SLT,          trans_cmp),
 | ||||||
|  | +	INSTR(SGE,          trans_cmp),
 | ||||||
|  | +	INSTR(SLE,          trans_cmp),
 | ||||||
|  | +	INSTR(SNE,          trans_cmp),
 | ||||||
|  | +	INSTR(SEQ,          trans_cmp),
 | ||||||
|  |  	INSTR(CMP,          trans_cmp), | ||||||
|  |  	INSTR(IF,           trans_if), | ||||||
|  |  	INSTR(ELSE,         trans_else), | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										36
									
								
								0014-freedreno-a3xx-don-t-leak-so-much.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								0014-freedreno-a3xx-don-t-leak-so-much.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | |||||||
|  | From 0b2c5119cb772751edb3c42c9c0545443e26fd7f Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Mon, 17 Jun 2013 20:11:54 -0400 | ||||||
|  | Subject: [PATCH 14/17] freedreno/a3xx: don't leak so much | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_context.c | 11 +++++++++++ | ||||||
|  |  1 file changed, 11 insertions(+) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
 | ||||||
|  | index 3ae9b29..589aeed 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
 | ||||||
|  | @@ -40,7 +40,18 @@
 | ||||||
|  |  static void | ||||||
|  |  fd3_context_destroy(struct pipe_context *pctx) | ||||||
|  |  { | ||||||
|  | +	struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
 | ||||||
|  | +
 | ||||||
|  |  	fd3_prog_fini(pctx); | ||||||
|  | +
 | ||||||
|  | +	fd_bo_del(fd3_ctx->vs_pvt_mem);
 | ||||||
|  | +	fd_bo_del(fd3_ctx->fs_pvt_mem);
 | ||||||
|  | +	fd_bo_del(fd3_ctx->vsc_size_mem);
 | ||||||
|  | +	fd_bo_del(fd3_ctx->vsc_pipe_mem);
 | ||||||
|  | +
 | ||||||
|  | +	pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
 | ||||||
|  | +	pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
 | ||||||
|  | +
 | ||||||
|  |  	fd_context_destroy(pctx); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										376
									
								
								0015-freedreno-a3xx-compiler-better-const-handling.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										376
									
								
								0015-freedreno-a3xx-compiler-better-const-handling.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,376 @@ | |||||||
|  | From f1998c8aa7d82006f9ef7e6710a0f68f30bfc109 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Sat, 24 Aug 2013 17:30:50 -0400 | ||||||
|  | Subject: [PATCH 15/17] freedreno/a3xx/compiler: better const handling | ||||||
|  | 
 | ||||||
|  | Seems like most/all instructions have some restrictions about const src | ||||||
|  | registers.  In seems like the 2 src (cat2) instructions can take at most | ||||||
|  | one const, and the 3 src (cat3) instructions can take at most one const | ||||||
|  | in the first 2 arguments.  And so on.  Handle this properly now. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 211 +++++++++++++--------- | ||||||
|  |  1 file changed, 121 insertions(+), 90 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index 477053b..dcdd2d9 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -355,20 +355,47 @@ is_const(struct tgsi_src_register *src)
 | ||||||
|  |  			(src->File == TGSI_FILE_IMMEDIATE); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static type_t
 | ||||||
|  | +get_ftype(struct fd3_compile_context *ctx)
 | ||||||
|  | +{
 | ||||||
|  | +	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static type_t
 | ||||||
|  | +get_utype(struct fd3_compile_context *ctx)
 | ||||||
|  | +{
 | ||||||
|  | +	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static unsigned
 | ||||||
|  | +src_swiz(struct tgsi_src_register *src, int chan)
 | ||||||
|  | +{
 | ||||||
|  | +	switch (chan) {
 | ||||||
|  | +	case 0: return src->SwizzleX;
 | ||||||
|  | +	case 1: return src->SwizzleY;
 | ||||||
|  | +	case 2: return src->SwizzleZ;
 | ||||||
|  | +	case 3: return src->SwizzleW;
 | ||||||
|  | +	}
 | ||||||
|  | +	assert(0);
 | ||||||
|  | +	return 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  /* for instructions that cannot take a const register as src, if needed | ||||||
|  |   * generate a move to temporary gpr: | ||||||
|  |   */ | ||||||
|  |  static struct tgsi_src_register * | ||||||
|  |  get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) | ||||||
|  |  { | ||||||
|  | -	if (is_const(src)) {
 | ||||||
|  | -		static struct tgsi_dst_register tmp_dst;
 | ||||||
|  | -		struct tgsi_src_register *tmp_src =
 | ||||||
|  | -				get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  | -		create_mov(ctx, &tmp_dst, src);
 | ||||||
|  | -		src = tmp_src;
 | ||||||
|  | -	}
 | ||||||
|  | -	return src;
 | ||||||
|  | +	struct tgsi_dst_register tmp_dst;
 | ||||||
|  | +	struct tgsi_src_register *tmp_src;
 | ||||||
|  | +
 | ||||||
|  | +	compile_assert(ctx, is_const(src));
 | ||||||
|  | +
 | ||||||
|  | +	tmp_src = get_internal_temp(ctx, &tmp_dst);
 | ||||||
|  | +
 | ||||||
|  | +	create_mov(ctx, &tmp_dst, src);
 | ||||||
|  | +
 | ||||||
|  | +	return tmp_src;
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void | ||||||
|  | @@ -418,31 +445,6 @@ get_immediate(struct fd3_compile_context *ctx,
 | ||||||
|  |  	reg->SwizzleW  = swiz2tgsi[swiz]; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -static type_t
 | ||||||
|  | -get_ftype(struct fd3_compile_context *ctx)
 | ||||||
|  | -{
 | ||||||
|  | -	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
 | ||||||
|  | -}
 | ||||||
|  | -
 | ||||||
|  | -static type_t
 | ||||||
|  | -get_utype(struct fd3_compile_context *ctx)
 | ||||||
|  | -{
 | ||||||
|  | -	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
 | ||||||
|  | -}
 | ||||||
|  | -
 | ||||||
|  | -static unsigned
 | ||||||
|  | -src_swiz(struct tgsi_src_register *src, int chan)
 | ||||||
|  | -{
 | ||||||
|  | -	switch (chan) {
 | ||||||
|  | -	case 0: return src->SwizzleX;
 | ||||||
|  | -	case 1: return src->SwizzleY;
 | ||||||
|  | -	case 2: return src->SwizzleZ;
 | ||||||
|  | -	case 3: return src->SwizzleW;
 | ||||||
|  | -	}
 | ||||||
|  | -	assert(0);
 | ||||||
|  | -	return 0;
 | ||||||
|  | -}
 | ||||||
|  | -
 | ||||||
|  |  static void | ||||||
|  |  create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, | ||||||
|  |  		struct tgsi_src_register *src) | ||||||
|  | @@ -463,7 +465,6 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
 | ||||||
|  |  			ir3_instr_create(ctx->ir, 0, OPC_NOP); | ||||||
|  |  		} | ||||||
|  |  	} | ||||||
|  | -
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static void | ||||||
|  | @@ -584,6 +585,15 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |   * native instructions: | ||||||
|  |   */ | ||||||
|  |   | ||||||
|  | +static inline void
 | ||||||
|  | +get_swiz(unsigned *swiz, struct tgsi_src_register *src)
 | ||||||
|  | +{
 | ||||||
|  | +	swiz[0] = src->SwizzleX;
 | ||||||
|  | +	swiz[1] = src->SwizzleY;
 | ||||||
|  | +	swiz[2] = src->SwizzleZ;
 | ||||||
|  | +	swiz[3] = src->SwizzleW;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static void | ||||||
|  |  trans_dotp(const struct instr_translater *t, | ||||||
|  |  		struct fd3_compile_context *ctx, | ||||||
|  | @@ -595,34 +605,31 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |  	struct tgsi_dst_register *dst  = &inst->Dst[0].Register; | ||||||
|  |  	struct tgsi_src_register *src0 = &inst->Src[0].Register; | ||||||
|  |  	struct tgsi_src_register *src1 = &inst->Src[1].Register; | ||||||
|  | -	unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
 | ||||||
|  | -	unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
 | ||||||
|  | +	unsigned swiz0[4];
 | ||||||
|  | +	unsigned swiz1[4];
 | ||||||
|  |  	opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; | ||||||
|  |  	unsigned n = t->arg;     /* number of components */ | ||||||
|  | -	unsigned i;
 | ||||||
|  | +	unsigned i, swapped = 0;
 | ||||||
|  |   | ||||||
|  |  	tmp_src = get_internal_temp_repl(ctx, &tmp_dst); | ||||||
|  |   | ||||||
|  | -	/* Blob compiler never seems to use a const in src1 position for
 | ||||||
|  | -	 * mad.*, although there does seem (according to disassembler
 | ||||||
|  | -	 * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
 | ||||||
|  | -	 * is a const.  Not sure if this is a hw bug, or simply that the
 | ||||||
|  | -	 * disassembler lies.
 | ||||||
|  | +	/* in particular, can't handle const for src1 for cat3/mad:
 | ||||||
|  |  	 */ | ||||||
|  |  	if (is_const(src1)) { | ||||||
|  | -
 | ||||||
|  | -		/* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
 | ||||||
|  | -		 */
 | ||||||
|  | -		for (i = 0; i < 4; i++)
 | ||||||
|  | -			swiz1[i] = i;
 | ||||||
|  | -
 | ||||||
|  | -		/* the first mul.f will clobber tmp.x, but that is ok
 | ||||||
|  | -		 * because after that point we no longer need tmp.x:
 | ||||||
|  | -		 */
 | ||||||
|  | -		create_mov(ctx, &tmp_dst, src1);
 | ||||||
|  | -		src1 = tmp_src;
 | ||||||
|  | +		if (!is_const(src0)) {
 | ||||||
|  | +			struct tgsi_src_register *tmp;
 | ||||||
|  | +			tmp = src0;
 | ||||||
|  | +			src0 = src1;
 | ||||||
|  | +			src1 = tmp;
 | ||||||
|  | +			swapped = 1;
 | ||||||
|  | +		} else {
 | ||||||
|  | +			src0 = get_unconst(ctx, src0);
 | ||||||
|  | +		}
 | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | +	get_swiz(swiz0, src0);
 | ||||||
|  | +	get_swiz(swiz1, src1);
 | ||||||
|  | +
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  |  	add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  |  	add_src_reg(ctx, instr, src0, swiz0[0]); | ||||||
|  | @@ -640,22 +647,20 @@ trans_dotp(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ | ||||||
|  |  	if (t->tgsi_opc == TGSI_OPCODE_DPH) { | ||||||
|  | -		ir3_instr_create(ctx->ir, 0, OPC_NOP);
 | ||||||
|  | +		ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
 | ||||||
|  |   | ||||||
|  |  		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); | ||||||
|  |  		add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  | -		add_src_reg(ctx, instr, src1, swiz1[i]);
 | ||||||
|  | +		if (swapped)
 | ||||||
|  | +			add_src_reg(ctx, instr, src0, swiz0[i]);
 | ||||||
|  | +		else
 | ||||||
|  | +			add_src_reg(ctx, instr, src1, swiz1[i]);
 | ||||||
|  |  		add_src_reg(ctx, instr, tmp_src, 0); | ||||||
|  |   | ||||||
|  |  		n++; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | -	ir3_instr_create(ctx->ir, 0, OPC_NOP);
 | ||||||
|  | -
 | ||||||
|  | -	/* pad out to multiple of 4 scalar instructions: */
 | ||||||
|  | -	for (i = 2 * n; i % 4; i++) {
 | ||||||
|  | -		ir3_instr_create(ctx->ir, 0, OPC_NOP);
 | ||||||
|  | -	}
 | ||||||
|  | +	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
 | ||||||
|  |   | ||||||
|  |  	create_mov(ctx, dst, tmp_src); | ||||||
|  |  } | ||||||
|  | @@ -670,6 +675,11 @@ trans_lrp(const struct instr_translater *t,
 | ||||||
|  |  	struct tgsi_dst_register tmp_dst1, tmp_dst2; | ||||||
|  |  	struct tgsi_src_register *tmp_src1, *tmp_src2; | ||||||
|  |  	struct tgsi_src_register tmp_const; | ||||||
|  | +	struct tgsi_src_register *src0 = &inst->Src[0].Register;
 | ||||||
|  | +	struct tgsi_src_register *src1 = &inst->Src[1].Register;
 | ||||||
|  | +
 | ||||||
|  | +	if (is_const(src0) && is_const(src1))
 | ||||||
|  | +		src0 = get_unconst(ctx, src0);
 | ||||||
|  |   | ||||||
|  |  	tmp_src1 = get_internal_temp(ctx, &tmp_dst1); | ||||||
|  |  	tmp_src2 = get_internal_temp(ctx, &tmp_dst2); | ||||||
|  | @@ -678,15 +688,12 @@ trans_lrp(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	/* tmp1 = (a * b) */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  | -	vectorize(ctx, instr, &tmp_dst1, 2,
 | ||||||
|  | -			&inst->Src[0].Register, 0,
 | ||||||
|  | -			&inst->Src[1].Register, 0);
 | ||||||
|  | +	vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
 | ||||||
|  |   | ||||||
|  |  	/* tmp2 = (1 - a) */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); | ||||||
|  | -	vectorize(ctx, instr, &tmp_dst2, 2,
 | ||||||
|  | -			&tmp_const, 0,
 | ||||||
|  | -			&inst->Src[0].Register, IR3_REG_NEGATE);
 | ||||||
|  | +	vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
 | ||||||
|  | +			src0, IR3_REG_NEGATE);
 | ||||||
|  |   | ||||||
|  |  	/* tmp2 = tmp2 * c */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  | @@ -930,10 +937,6 @@ trans_cmp(const struct instr_translater *t,
 | ||||||
|  |  		return; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | -	/* NOTE: seems blob compiler will move a const to a gpr if both
 | ||||||
|  | -	 * src args to cmps.f are const.  Need to check if this applies
 | ||||||
|  | -	 * to other instructions..
 | ||||||
|  | -	 */
 | ||||||
|  |  	if (is_const(a0) && is_const(a1)) | ||||||
|  |  		a0 = get_unconst(ctx, a0); | ||||||
|  |   | ||||||
|  | @@ -1041,6 +1044,9 @@ trans_if(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	get_immediate(ctx, &constval, fui(0.0)); | ||||||
|  |   | ||||||
|  | +	if (is_const(src))
 | ||||||
|  | +		src = get_unconst(ctx, src);
 | ||||||
|  | +
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); | ||||||
|  |  	ir3_reg_create(instr, regid(REG_P0, 0), 0); | ||||||
|  |  	add_src_reg(ctx, instr, src, src->SwizzleX); | ||||||
|  | @@ -1122,11 +1128,11 @@ instr_cat2(const struct instr_translater *t,
 | ||||||
|  |  		struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | +	struct tgsi_src_register *src0 = &inst->Src[0].Register;
 | ||||||
|  | +	struct tgsi_src_register *src1 = &inst->Src[1].Register;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |  	unsigned src0_flags = 0; | ||||||
|  |   | ||||||
|  | -	instr = ir3_instr_create(ctx->ir, 2, t->opc);
 | ||||||
|  | -
 | ||||||
|  |  	switch (t->tgsi_opc) { | ||||||
|  |  	case TGSI_OPCODE_ABS: | ||||||
|  |  		src0_flags = IR3_REG_ABS; | ||||||
|  | @@ -1149,41 +1155,65 @@ instr_cat2(const struct instr_translater *t,
 | ||||||
|  |  	case OPC_SETRM: | ||||||
|  |  	case OPC_CBITS_B: | ||||||
|  |  		/* these only have one src reg */ | ||||||
|  | -		vectorize(ctx, instr, dst, 1,
 | ||||||
|  | -				&inst->Src[0].Register, src0_flags);
 | ||||||
|  | +		instr = ir3_instr_create(ctx->ir, 2, t->opc);
 | ||||||
|  | +		vectorize(ctx, instr, dst, 1, src0, src0_flags);
 | ||||||
|  |  		break; | ||||||
|  |  	default: | ||||||
|  | -		vectorize(ctx, instr, dst, 2,
 | ||||||
|  | -				&inst->Src[0].Register, src0_flags,
 | ||||||
|  | -				&inst->Src[1].Register, 0);
 | ||||||
|  | +		if (is_const(src0) && is_const(src1))
 | ||||||
|  | +			src0 = get_unconst(ctx, src0);
 | ||||||
|  | +
 | ||||||
|  | +		instr = ir3_instr_create(ctx->ir, 2, t->opc);
 | ||||||
|  | +		vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
 | ||||||
|  |  		break; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	put_dst(ctx, inst, dst); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +static bool is_mad(opc_t opc)
 | ||||||
|  | +{
 | ||||||
|  | +	switch (opc) {
 | ||||||
|  | +	case OPC_MAD_U16:
 | ||||||
|  | +	case OPC_MADSH_U16:
 | ||||||
|  | +	case OPC_MAD_S16:
 | ||||||
|  | +	case OPC_MADSH_M16:
 | ||||||
|  | +	case OPC_MAD_U24:
 | ||||||
|  | +	case OPC_MAD_S24:
 | ||||||
|  | +	case OPC_MAD_F16:
 | ||||||
|  | +	case OPC_MAD_F32:
 | ||||||
|  | +		return true;
 | ||||||
|  | +	default:
 | ||||||
|  | +		return false;
 | ||||||
|  | +	}
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static void | ||||||
|  |  instr_cat3(const struct instr_translater *t, | ||||||
|  |  		struct fd3_compile_context *ctx, | ||||||
|  |  		struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | -	struct tgsi_src_register *src1;
 | ||||||
|  | +	struct tgsi_src_register *src0 = &inst->Src[0].Register;
 | ||||||
|  | +	struct tgsi_src_register *src1 = &inst->Src[1].Register;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  | -	/* Blob compiler never seems to use a const in src1 position..
 | ||||||
|  | -	 * although there does seem (according to disassembler hidden
 | ||||||
|  | -	 * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
 | ||||||
|  | -	 * const.  Not sure if this is a hw bug, or simply that the
 | ||||||
|  | -	 * disassembler lies.
 | ||||||
|  | +	/* in particular, can't handle const for src1 for cat3..
 | ||||||
|  | +	 * for mad, we can swap first two src's if needed:
 | ||||||
|  |  	 */ | ||||||
|  | -	src1 = get_unconst(ctx, &inst->Src[1].Register);
 | ||||||
|  | +	if (is_const(src1)) {
 | ||||||
|  | +		if (is_mad(t->opc) && !is_const(src0)) {
 | ||||||
|  | +			struct tgsi_src_register *tmp;
 | ||||||
|  | +			tmp = src0;
 | ||||||
|  | +			src0 = src1;
 | ||||||
|  | +			src1 = tmp;
 | ||||||
|  | +		} else {
 | ||||||
|  | +			src0 = get_unconst(ctx, src0);
 | ||||||
|  | +		}
 | ||||||
|  | +	}
 | ||||||
|  |   | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 3, | ||||||
|  |  			ctx->so->half_precision ? t->hopc : t->opc); | ||||||
|  | -	vectorize(ctx, instr, dst, 3,
 | ||||||
|  | -			&inst->Src[0].Register, 0,
 | ||||||
|  | -			src1, 0,
 | ||||||
|  | +	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
 | ||||||
|  |  			&inst->Src[2].Register, 0); | ||||||
|  |  	put_dst(ctx, inst, dst); | ||||||
|  |  } | ||||||
|  | @@ -1194,11 +1224,12 @@ instr_cat4(const struct instr_translater *t,
 | ||||||
|  |  		struct tgsi_full_instruction *inst) | ||||||
|  |  { | ||||||
|  |  	struct tgsi_dst_register *dst = get_dst(ctx, inst); | ||||||
|  | -	struct tgsi_src_register *src;
 | ||||||
|  | +	struct tgsi_src_register *src = &inst->Src[0].Register;
 | ||||||
|  |  	struct ir3_instruction *instr; | ||||||
|  |   | ||||||
|  |  	/* seems like blob compiler avoids const as src.. */ | ||||||
|  | -	src = get_unconst(ctx, &inst->Src[0].Register);
 | ||||||
|  | +	if (is_const(src))
 | ||||||
|  | +		src = get_unconst(ctx, src);
 | ||||||
|  |   | ||||||
|  |  	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, t->opc); | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										128
									
								
								0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,128 @@ | |||||||
|  | From 4f0be333e7ee93fbb006c5570a594e49b4441731 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Tue, 27 Aug 2013 19:24:53 -0400 | ||||||
|  | Subject: [PATCH 16/17] freedreno/a3xx/compiler: handle sync flags better | ||||||
|  | 
 | ||||||
|  | We need to set the flag on all the .xyzw components that are written by | ||||||
|  | the instruction, not just on .x.  Otherwise a later use of rN.y (for | ||||||
|  | example) will not trigger the appropriate sync bit to be set. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 50 +++++++++++++++-------- | ||||||
|  |  1 file changed, 34 insertions(+), 16 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | index dcdd2d9..5115411 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
 | ||||||
|  | @@ -62,10 +62,16 @@ static unsigned regmask_idx(struct ir3_register *reg)
 | ||||||
|  |  	return num; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -static void regmask_set(regmask_t regmask, struct ir3_register *reg)
 | ||||||
|  | +static void regmask_set(regmask_t regmask, struct ir3_register *reg,
 | ||||||
|  | +		unsigned wrmask)
 | ||||||
|  |  { | ||||||
|  | -	unsigned idx = regmask_idx(reg);
 | ||||||
|  | -	regmask[idx / 8] |= 1 << (idx % 8);
 | ||||||
|  | +	unsigned i;
 | ||||||
|  | +	for (i = 0; i < 4; i++) {
 | ||||||
|  | +		if (wrmask & (1 << i)) {
 | ||||||
|  | +			unsigned idx = regmask_idx(reg) + i;
 | ||||||
|  | +			regmask[idx / 8] |= 1 << (idx % 8);
 | ||||||
|  | +		}
 | ||||||
|  | +	}
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) | ||||||
|  | @@ -216,6 +222,24 @@ struct instr_translater {
 | ||||||
|  |  	unsigned arg; | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | +static unsigned
 | ||||||
|  | +src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
 | ||||||
|  | +{
 | ||||||
|  | +	unsigned flags = 0;
 | ||||||
|  | +
 | ||||||
|  | +	if (regmask_get(ctx->needs_ss, reg)) {
 | ||||||
|  | +		flags |= IR3_INSTR_SS;
 | ||||||
|  | +		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
 | ||||||
|  | +	}
 | ||||||
|  | +
 | ||||||
|  | +	if (regmask_get(ctx->needs_sy, reg)) {
 | ||||||
|  | +		flags |= IR3_INSTR_SY;
 | ||||||
|  | +		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
 | ||||||
|  | +	}
 | ||||||
|  | +
 | ||||||
|  | +	return flags;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  static struct ir3_register * | ||||||
|  |  add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, | ||||||
|  |  		const struct tgsi_dst_register *dst, unsigned chan) | ||||||
|  | @@ -279,15 +303,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |   | ||||||
|  |  	reg = ir3_reg_create(instr, regid(num, chan), flags); | ||||||
|  |   | ||||||
|  | -	if (regmask_get(ctx->needs_ss, reg)) {
 | ||||||
|  | -		instr->flags |= IR3_INSTR_SS;
 | ||||||
|  | -		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
 | ||||||
|  | -	}
 | ||||||
|  | -
 | ||||||
|  | -	if (regmask_get(ctx->needs_sy, reg)) {
 | ||||||
|  | -		instr->flags |= IR3_INSTR_SY;
 | ||||||
|  | -		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
 | ||||||
|  | -	}
 | ||||||
|  | +	instr->flags |= src_flags(ctx, reg);
 | ||||||
|  |   | ||||||
|  |  	return reg; | ||||||
|  |  } | ||||||
|  | @@ -567,6 +583,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
 | ||||||
|  |  				cur->regs[j+1]->num = | ||||||
|  |  					regid(cur->regs[j+1]->num >> 2, | ||||||
|  |  						src_swiz(src, i)); | ||||||
|  | +				cur->flags |= src_flags(ctx, cur->regs[j+1]);
 | ||||||
|  |  			} | ||||||
|  |  			va_end(ap); | ||||||
|  |  		} | ||||||
|  | @@ -753,7 +770,7 @@ trans_pow(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); | ||||||
|  |  	r = add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  |  	add_src_reg(ctx, instr, src0, src0->SwizzleX); | ||||||
|  | -	regmask_set(ctx->needs_ss, r);
 | ||||||
|  | +	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
 | ||||||
|  |   | ||||||
|  |  	/* mul.f Rtmp, Rtmp, Rsrc1 */ | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); | ||||||
|  | @@ -771,7 +788,7 @@ trans_pow(const struct instr_translater *t,
 | ||||||
|  |  	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); | ||||||
|  |  	r = add_dst_reg(ctx, instr, &tmp_dst, 0); | ||||||
|  |  	add_src_reg(ctx, instr, tmp_src, 0); | ||||||
|  | -	regmask_set(ctx->needs_ss, r);
 | ||||||
|  | +	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
 | ||||||
|  |   | ||||||
|  |  	create_mov(ctx, dst, tmp_src); | ||||||
|  |  } | ||||||
|  | @@ -855,7 +872,7 @@ trans_samp(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	add_src_reg(ctx, instr, coord, coord->SwizzleX); | ||||||
|  |   | ||||||
|  | -	regmask_set(ctx->needs_sy, r);
 | ||||||
|  | +	regmask_set(ctx->needs_sy, r, r->wrmask);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  /* | ||||||
|  | @@ -1236,7 +1253,8 @@ instr_cat4(const struct instr_translater *t,
 | ||||||
|  |   | ||||||
|  |  	vectorize(ctx, instr, dst, 1, src, 0); | ||||||
|  |   | ||||||
|  | -	regmask_set(ctx->needs_ss, instr->regs[0]);
 | ||||||
|  | +	regmask_set(ctx->needs_ss, instr->regs[0],
 | ||||||
|  | +			inst->Dst[0].Register.WriteMask);
 | ||||||
|  |   | ||||||
|  |  	put_dst(ctx, inst, dst); | ||||||
|  |  } | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										328
									
								
								0017-freedreno-updates-for-msm-drm-kms-driver.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										328
									
								
								0017-freedreno-updates-for-msm-drm-kms-driver.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,328 @@ | |||||||
|  | From 4fd03f26aa1c2ddef24b2c4f8d1a10c96fbf7f40 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Rob Clark <robclark@freedesktop.org> | ||||||
|  | Date: Thu, 29 Aug 2013 17:24:33 -0400 | ||||||
|  | Subject: [PATCH 17/17] freedreno: updates for msm drm/kms driver | ||||||
|  | 
 | ||||||
|  | There where some small API tweaks in libdrm_freedreno to enable support | ||||||
|  | for msm drm/kms driver. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Rob Clark <robclark@freedesktop.org> | ||||||
|  | ---
 | ||||||
|  |  src/gallium/drivers/freedreno/a2xx/fd2_emit.c      |  4 +-- | ||||||
|  |  src/gallium/drivers/freedreno/a2xx/fd2_gmem.c      |  6 ++--- | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_emit.c      | 14 +++++------ | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_gmem.c      |  8 +++--- | ||||||
|  |  src/gallium/drivers/freedreno/a3xx/fd3_program.c   |  4 +-- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_draw.c     |  2 +- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_resource.c | 18 ++++++++++++-- | ||||||
|  |  src/gallium/drivers/freedreno/freedreno_util.h     | 29 +++++++++++++++------- | ||||||
|  |  8 files changed, 55 insertions(+), 30 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
 | ||||||
|  | index b03390e..35511ba 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
 | ||||||
|  | @@ -137,7 +137,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
 | ||||||
|  |  	OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); | ||||||
|  |   | ||||||
|  |  	OUT_RING(ring, sampler->tex0 | view->tex0); | ||||||
|  | -	OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
 | ||||||
|  | +	OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt, 0);
 | ||||||
|  |  	OUT_RING(ring, view->tex2); | ||||||
|  |  	OUT_RING(ring, sampler->tex3 | view->tex3); | ||||||
|  |  	OUT_RING(ring, sampler->tex4); | ||||||
|  | @@ -171,7 +171,7 @@ fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
 | ||||||
|  |  	OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); | ||||||
|  |  	for (i = 0; i < n; i++) { | ||||||
|  |  		struct fd_resource *rsc = fd_resource(vbufs[i].prsc); | ||||||
|  | -		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
 | ||||||
|  | +		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
 | ||||||
|  |  		OUT_RING (ring, vbufs[i].size); | ||||||
|  |  	} | ||||||
|  |  } | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | index 93695bc..89f5a4d 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 | ||||||
|  | @@ -70,7 +70,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t base,
 | ||||||
|  |  	OUT_PKT3(ring, CP_SET_CONSTANT, 5); | ||||||
|  |  	OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); | ||||||
|  |  	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */ | ||||||
|  | -	OUT_RELOC(ring, rsc->bo, 0, 0);         /* RB_COPY_DEST_BASE */
 | ||||||
|  | +	OUT_RELOCW(ring, rsc->bo, 0, 0, 0);     /* RB_COPY_DEST_BASE */
 | ||||||
|  |  	OUT_RING(ring, rsc->pitch >> 5);        /* RB_COPY_DEST_PITCH */ | ||||||
|  |  	OUT_RING(ring,                          /* RB_COPY_DEST_INFO */ | ||||||
|  |  			A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | | ||||||
|  | @@ -199,7 +199,7 @@ emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
 | ||||||
|  |  			A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | | ||||||
|  |  			A2XX_SQ_TEX_0_PITCH(rsc->pitch)); | ||||||
|  |  	OUT_RELOC(ring, rsc->bo, 0, | ||||||
|  | -			fd2_pipe2surface(psurf->format) | 0x800);
 | ||||||
|  | +			fd2_pipe2surface(psurf->format) | 0x800, 0);
 | ||||||
|  |  	OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | | ||||||
|  |  			A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); | ||||||
|  |  	OUT_RING(ring, 0x01000000 | // XXX | ||||||
|  | @@ -241,7 +241,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  	y0 = ((float)yoff) / ((float)pfb->height); | ||||||
|  |  	y1 = ((float)yoff + bin_h) / ((float)pfb->height); | ||||||
|  |  	OUT_PKT3(ring, CP_MEM_WRITE, 9); | ||||||
|  | -	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0);
 | ||||||
|  | +	OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
 | ||||||
|  |  	OUT_RING(ring, fui(x0)); | ||||||
|  |  	OUT_RING(ring, fui(y0)); | ||||||
|  |  	OUT_RING(ring, fui(x1)); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | index 5ffd561..5e58618 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 | ||||||
|  | @@ -81,7 +81,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
 | ||||||
|  |  	if (prsc) { | ||||||
|  |  		struct fd_bo *bo = fd_resource(prsc)->bo; | ||||||
|  |  		OUT_RELOC(ring, bo, offset, | ||||||
|  | -				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
 | ||||||
|  | +				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
 | ||||||
|  |  	} else { | ||||||
|  |  		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | | ||||||
|  |  				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); | ||||||
|  | @@ -212,7 +212,7 @@ emit_textures(struct fd_ringbuffer *ring,
 | ||||||
|  |  	for (i = 0; i < tex->num_textures; i++) { | ||||||
|  |  		struct fd3_pipe_sampler_view *view = | ||||||
|  |  				fd3_pipe_sampler_view(tex->textures[i]); | ||||||
|  | -		OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
 | ||||||
|  | +		OUT_RELOC(ring, view->tex_resource->bo, 0, 0, 0);
 | ||||||
|  |  		/* I think each entry is a ptr to mipmap level.. for now, just | ||||||
|  |  		 * pad w/ null's until I get around to actually implementing | ||||||
|  |  		 * mipmap support.. | ||||||
|  | @@ -296,7 +296,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
 | ||||||
|  |  			CP_LOAD_STATE_0_NUM_UNIT(1)); | ||||||
|  |  	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | | ||||||
|  |  			CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); | ||||||
|  | -	OUT_RELOC(ring, rsc->bo, 0, 0);
 | ||||||
|  | +	OUT_RELOC(ring, rsc->bo, 0, 0, 0);
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  void | ||||||
|  | @@ -322,7 +322,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
 | ||||||
|  |  				COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | | ||||||
|  |  				A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) | | ||||||
|  |  				A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); | ||||||
|  | -		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
 | ||||||
|  | +		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
 | ||||||
|  |   | ||||||
|  |  		OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1); | ||||||
|  |  		OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | | ||||||
|  | @@ -481,12 +481,12 @@ fd3_emit_restore(struct fd_context *ctx)
 | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3); | ||||||
|  |  	OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_CTRL_REG */ | ||||||
|  | -	OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0);  /* SP_VS_PVT_MEM_ADDR_REG */
 | ||||||
|  | +	OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
 | ||||||
|  |  	OUT_RING(ring, 0x00000000);                  /* SP_VS_PVT_MEM_SIZE_REG */ | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3); | ||||||
|  |  	OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_CTRL_REG */ | ||||||
|  | -	OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0);  /* SP_FS_PVT_MEM_ADDR_REG */
 | ||||||
|  | +	OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
 | ||||||
|  |  	OUT_RING(ring, 0x00000000);                  /* SP_FS_PVT_MEM_SIZE_REG */ | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); | ||||||
|  | @@ -549,7 +549,7 @@ fd3_emit_restore(struct fd_context *ctx)
 | ||||||
|  |  	OUT_RING(ring, 0x00000001);        /* UCHE_CACHE_MODE_CONTROL_REG */ | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); | ||||||
|  | -	OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
 | ||||||
|  | +	OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
 | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); | ||||||
|  |  	OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */ | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | index b9d0580..8d2df47 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 | ||||||
|  | @@ -89,7 +89,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
 | ||||||
|  |  		if (bin_w || (i >= nr_bufs)) { | ||||||
|  |  			OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); | ||||||
|  |  		} else { | ||||||
|  | -			OUT_RELOCS(ring, res->bo, 0, 0, -1);
 | ||||||
|  | +			OUT_RELOCW(ring, res->bo, 0, 0, -1);
 | ||||||
|  |  		} | ||||||
|  |   | ||||||
|  |  		OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); | ||||||
|  | @@ -116,7 +116,7 @@ emit_gmem2mem_surf(struct fd_ringbuffer *ring,
 | ||||||
|  |  	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | | ||||||
|  |  			A3XX_RB_COPY_CONTROL_MODE(mode) | | ||||||
|  |  			A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); | ||||||
|  | -	OUT_RELOCS(ring, rsc->bo, 0, 0, -1);    /* RB_COPY_DEST_BASE */
 | ||||||
|  | +	OUT_RELOCW(ring, rsc->bo, 0, 0, -1);    /* RB_COPY_DEST_BASE */
 | ||||||
|  |  	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp)); | ||||||
|  |  	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | | ||||||
|  |  			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | | ||||||
|  | @@ -272,7 +272,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
 | ||||||
|  |  	y1 = ((float)yoff + bin_h) / ((float)pfb->height); | ||||||
|  |   | ||||||
|  |  	OUT_PKT3(ring, CP_MEM_WRITE, 5); | ||||||
|  | -	OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
 | ||||||
|  | +	OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
 | ||||||
|  |  	OUT_RING(ring, fui(x0)); | ||||||
|  |  	OUT_RING(ring, fui(y0)); | ||||||
|  |  	OUT_RING(ring, fui(x1)); | ||||||
|  | @@ -395,7 +395,7 @@ update_vsc_pipe(struct fd_context *ctx)
 | ||||||
|  |  			A3XX_VSC_PIPE_CONFIG_Y(0) | | ||||||
|  |  			A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) | | ||||||
|  |  			A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y)); | ||||||
|  | -	OUT_RELOC(ring, bo, 0, 0);              /* VSC_PIPE[0].DATA_ADDRESS */
 | ||||||
|  | +	OUT_RELOC(ring, bo, 0, 0, 0);           /* VSC_PIPE[0].DATA_ADDRESS */
 | ||||||
|  |  	OUT_RING(ring, fd_bo_size(bo) - 32);    /* VSC_PIPE[0].DATA_LENGTH */ | ||||||
|  |   | ||||||
|  |  	for (i = 1; i < 8; i++) { | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
 | ||||||
|  | index 259c2dd..c6c51b1 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
 | ||||||
|  | @@ -320,7 +320,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); | ||||||
|  |  	OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | | ||||||
|  |  			A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); | ||||||
|  | -	OUT_RELOC(ring, vp->bo, 0, 0);    /* SP_VS_OBJ_START_REG */
 | ||||||
|  | +	OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */
 | ||||||
|  |  #endif | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); | ||||||
|  | @@ -345,7 +345,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); | ||||||
|  |  	OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | | ||||||
|  |  			A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen)); | ||||||
|  | -	OUT_RELOC(ring, fp->bo, 0, 0);    /* SP_FS_OBJ_START_REG */
 | ||||||
|  | +	OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
 | ||||||
|  |  #endif | ||||||
|  |   | ||||||
|  |  	OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | index d4f8d34..4a98ab4 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_draw.c
 | ||||||
|  | @@ -104,7 +104,7 @@ fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info)
 | ||||||
|  |  			src_sel, idx_type, IGNORE_VISIBILITY)); | ||||||
|  |  	OUT_RING(ring, info->count);       /* NumIndices */ | ||||||
|  |  	if (info->indexed) { | ||||||
|  | -		OUT_RELOC(ring, idx_bo, idx_offset, 0);
 | ||||||
|  | +		OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
 | ||||||
|  |  		OUT_RING (ring, idx_size); | ||||||
|  |  	} | ||||||
|  |  } | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
 | ||||||
|  | index 1b1eaa5..3e051ea 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_resource.c
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_resource.c
 | ||||||
|  | @@ -59,6 +59,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
 | ||||||
|  |  		struct pipe_transfer *ptrans) | ||||||
|  |  { | ||||||
|  |  	struct fd_context *ctx = fd_context(pctx); | ||||||
|  | +	struct fd_resource *rsc = fd_resource(ptrans->resource);
 | ||||||
|  | +	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
 | ||||||
|  | +		fd_bo_cpu_fini(rsc->bo);
 | ||||||
|  |  	pipe_resource_reference(&ptrans->resource, NULL); | ||||||
|  |  	util_slab_free(&ctx->transfer_pool, ptrans); | ||||||
|  |  } | ||||||
|  | @@ -74,12 +77,13 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 | ||||||
|  |  	struct fd_resource *rsc = fd_resource(prsc); | ||||||
|  |  	struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool); | ||||||
|  |  	enum pipe_format format = prsc->format; | ||||||
|  | +	uint32_t op = 0;
 | ||||||
|  |  	char *buf; | ||||||
|  |   | ||||||
|  |  	if (!ptrans) | ||||||
|  |  		return NULL; | ||||||
|  |   | ||||||
|  | -	/* util_slap_alloc() doesn't zero: */
 | ||||||
|  | +	/* util_slab_alloc() doesn't zero: */
 | ||||||
|  |  	memset(ptrans, 0, sizeof(*ptrans)); | ||||||
|  |   | ||||||
|  |  	pipe_resource_reference(&ptrans->resource, prsc); | ||||||
|  | @@ -90,7 +94,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 | ||||||
|  |  	ptrans->layer_stride = ptrans->stride; | ||||||
|  |   | ||||||
|  |  	/* some state trackers (at least XA) don't do this.. */ | ||||||
|  | -	fd_resource_transfer_flush_region(pctx, ptrans, box);
 | ||||||
|  | +	if (!(usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
 | ||||||
|  | +		fd_resource_transfer_flush_region(pctx, ptrans, box);
 | ||||||
|  |   | ||||||
|  |  	buf = fd_bo_map(rsc->bo); | ||||||
|  |  	if (!buf) { | ||||||
|  | @@ -98,6 +103,15 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 | ||||||
|  |  		return NULL; | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  | +	if (usage & PIPE_TRANSFER_READ)
 | ||||||
|  | +		op |= DRM_FREEDRENO_PREP_READ;
 | ||||||
|  | +
 | ||||||
|  | +	if (usage & PIPE_TRANSFER_WRITE)
 | ||||||
|  | +		op |= DRM_FREEDRENO_PREP_WRITE;
 | ||||||
|  | +
 | ||||||
|  | +	if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED))
 | ||||||
|  | +		fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
 | ||||||
|  | +
 | ||||||
|  |  	*pptrans = ptrans; | ||||||
|  |   | ||||||
|  |  	return buf + | ||||||
|  | diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | index 9f10686..7bbbe80 100644
 | ||||||
|  | --- a/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | +++ b/src/gallium/drivers/freedreno/freedreno_util.h
 | ||||||
|  | @@ -104,25 +104,36 @@ OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
 | ||||||
|  |   | ||||||
|  |  static inline void | ||||||
|  |  OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, | ||||||
|  | -		uint32_t offset, uint32_t or)
 | ||||||
|  | +		uint32_t offset, uint32_t or, int32_t shift)
 | ||||||
|  |  { | ||||||
|  |  	if (LOG_DWORDS) { | ||||||
|  | -		DBG("ring[%p]: OUT_RELOC  %04x:  %p+%u", ring,
 | ||||||
|  | -				(uint32_t)(ring->cur - ring->last_start), bo, offset);
 | ||||||
|  | +		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
 | ||||||
|  | +				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
 | ||||||
|  |  	} | ||||||
|  | -	fd_ringbuffer_emit_reloc(ring, bo, offset, or);
 | ||||||
|  | +	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
 | ||||||
|  | +		.bo = bo,
 | ||||||
|  | +		.flags = FD_RELOC_READ,
 | ||||||
|  | +		.offset = offset,
 | ||||||
|  | +		.or = or,
 | ||||||
|  | +		.shift = shift,
 | ||||||
|  | +	});
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -/* shifted reloc: */
 | ||||||
|  |  static inline void | ||||||
|  | -OUT_RELOCS(struct fd_ringbuffer *ring, struct fd_bo *bo,
 | ||||||
|  | +OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
 | ||||||
|  |  		uint32_t offset, uint32_t or, int32_t shift) | ||||||
|  |  { | ||||||
|  |  	if (LOG_DWORDS) { | ||||||
|  | -		DBG("ring[%p]: OUT_RELOCS  %04x:  %p+%u << %d", ring,
 | ||||||
|  | +		DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
 | ||||||
|  |  				(uint32_t)(ring->cur - ring->last_start), bo, offset, shift); | ||||||
|  |  	} | ||||||
|  | -	fd_ringbuffer_emit_reloc_shift(ring, bo, offset, or, shift);
 | ||||||
|  | +	fd_ringbuffer_reloc(ring, &(struct fd_reloc){
 | ||||||
|  | +		.bo = bo,
 | ||||||
|  | +		.flags = FD_RELOC_READ | FD_RELOC_WRITE,
 | ||||||
|  | +		.offset = offset,
 | ||||||
|  | +		.or = or,
 | ||||||
|  | +		.shift = shift,
 | ||||||
|  | +	});
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) | ||||||
|  | @@ -155,7 +166,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
 | ||||||
|  |  		struct fd_ringmarker *end) | ||||||
|  |  { | ||||||
|  |  	OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); | ||||||
|  | -	fd_ringbuffer_emit_reloc_ring(ring, start);
 | ||||||
|  | +	fd_ringbuffer_emit_reloc_ring(ring, start, end);
 | ||||||
|  |  	OUT_RING(ring, fd_ringmarker_dwords(start, end)); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -- 
 | ||||||
|  | 1.8.4.2 | ||||||
|  | 
 | ||||||
							
								
								
									
										44
									
								
								mesa.spec
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								mesa.spec
									
									
									
									
									
								
							| @ -48,12 +48,12 @@ | |||||||
| 
 | 
 | ||||||
| %define _default_patch_fuzz 2 | %define _default_patch_fuzz 2 | ||||||
| 
 | 
 | ||||||
| %define gitdate 20131113 | %define gitdate 20131114 | ||||||
| #% define snapshot  | #% define snapshot  | ||||||
| 
 | 
 | ||||||
| Summary: Mesa graphics libraries | Summary: Mesa graphics libraries | ||||||
| Name: mesa | Name: mesa | ||||||
| Version: 9.2.2 | Version: 9.2.3 | ||||||
| Release: 1.%{gitdate}%{?dist} | Release: 1.%{gitdate}%{?dist} | ||||||
| License: MIT | License: MIT | ||||||
| Group: System Environment/Libraries | Group: System Environment/Libraries | ||||||
| @ -77,6 +77,25 @@ Patch15: mesa-9.2-hardware-float.patch | |||||||
| Patch16: mesa-9.2-no-useless-vdpau.patch | Patch16: mesa-9.2-no-useless-vdpau.patch | ||||||
| Patch20: mesa-9.2-evergreen-big-endian.patch | Patch20: mesa-9.2-evergreen-big-endian.patch | ||||||
| 
 | 
 | ||||||
|  | # https://bugs.freedesktop.org/show_bug.cgi?id=71573 | ||||||
|  | Patch21: 0001-freedreno-a3xx-fix-color-inversion-on-mem-gmem-resto.patch | ||||||
|  | Patch22: 0002-freedreno-a3xx-fix-viewport-on-gmem-mem-resolve.patch | ||||||
|  | Patch23: 0003-freedreno-add-debug-option-to-disable-scissor-optimi.patch | ||||||
|  | Patch24: 0004-freedreno-update-register-headers.patch | ||||||
|  | Patch25: 0005-freedreno-a3xx-some-texture-fixes.patch | ||||||
|  | Patch26: 0006-freedreno-a3xx-compiler-fix-CMP.patch | ||||||
|  | Patch27: 0007-freedreno-a3xx-compiler-handle-saturate-on-dst.patch | ||||||
|  | Patch28: 0008-freedreno-a3xx-compiler-use-max_reg-rather-than-file.patch | ||||||
|  | Patch29: 0009-freedreno-a3xx-compiler-cat4-cannot-use-const-reg-as.patch | ||||||
|  | Patch30: 0010-freedreno-fix-segfault-when-no-color-buffer-bound.patch | ||||||
|  | Patch31: 0011-freedreno-a3xx-compiler-make-compiler-errors-more-us.patch | ||||||
|  | Patch32: 0012-freedreno-a3xx-compiler-bit-of-re-arrange-cleanup.patch | ||||||
|  | Patch33: 0013-freedreno-a3xx-compiler-fix-SGT-SLT-etc.patch | ||||||
|  | Patch34: 0014-freedreno-a3xx-don-t-leak-so-much.patch | ||||||
|  | Patch35: 0015-freedreno-a3xx-compiler-better-const-handling.patch | ||||||
|  | Patch36: 0016-freedreno-a3xx-compiler-handle-sync-flags-better.patch | ||||||
|  | Patch37: 0017-freedreno-updates-for-msm-drm-kms-driver.patch | ||||||
|  | 
 | ||||||
| BuildRequires: pkgconfig autoconf automake libtool | BuildRequires: pkgconfig autoconf automake libtool | ||||||
| %if %{with_hardware} | %if %{with_hardware} | ||||||
| BuildRequires: kernel-headers | BuildRequires: kernel-headers | ||||||
| @ -302,6 +321,24 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1 | |||||||
| %patch16 -p1 -b .vdpau | %patch16 -p1 -b .vdpau | ||||||
| %patch20 -p1 -b .egbe | %patch20 -p1 -b .egbe | ||||||
| 
 | 
 | ||||||
|  | %patch21 -p1 | ||||||
|  | %patch22 -p1 | ||||||
|  | %patch23 -p1 | ||||||
|  | %patch24 -p1 | ||||||
|  | %patch25 -p1 | ||||||
|  | %patch26 -p1 | ||||||
|  | %patch27 -p1 | ||||||
|  | %patch28 -p1 | ||||||
|  | %patch29 -p1 | ||||||
|  | %patch30 -p1 | ||||||
|  | %patch31 -p1 | ||||||
|  | %patch32 -p1 | ||||||
|  | %patch33 -p1 | ||||||
|  | %patch34 -p1 | ||||||
|  | %patch35 -p1 | ||||||
|  | %patch36 -p1 | ||||||
|  | %patch37 -p1 | ||||||
|  | 
 | ||||||
| %if 0%{with_private_llvm} | %if 0%{with_private_llvm} | ||||||
| sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac | sed -i 's/llvm-config/mesa-private-llvm-config-%{__isa_bits}/g' configure.ac | ||||||
| sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac | sed -i 's/`$LLVM_CONFIG --version`/&-mesa/' configure.ac | ||||||
| @ -600,6 +637,9 @@ rm -rf $RPM_BUILD_ROOT | |||||||
| %endif | %endif | ||||||
| 
 | 
 | ||||||
| %changelog | %changelog | ||||||
|  | * Thu Nov 14 2013 Igor Gnatenko <i.gnatenko.brain@gmail.com> - 9.2.3-1.20131114 | ||||||
|  | - 9.2.3 upstream release | ||||||
|  | 
 | ||||||
| * Wed Nov 13 2013 Igor Gnatenko <i.gnatenko.brain@gmail.com> - 9.2.2-1.20131113 | * Wed Nov 13 2013 Igor Gnatenko <i.gnatenko.brain@gmail.com> - 9.2.2-1.20131113 | ||||||
| - 9.2.2 upstream release + fixes from git 9.2 branch | - 9.2.2 upstream release + fixes from git 9.2 branch | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user