Mesa 9.1 Release Notes / date February 22, 2013
-+Mesa 9.1 Release Notes / February 22, 2013
- -- Mesa 9.1 is a new development release. -@@ -33,7 +33,9 @@ because GL_ARB_compatibility is not supported. - -
MD5 checksums
---tbd -+86d40f3056f89949368764bf84aff55e MesaLib-9.1.tar.gz -+d3891e02215422e120271d976ff1947e MesaLib-9.1.tar.bz2 -+01645f28f53351c23b0beb6c688911d8 MesaLib-9.1.zip -- - -diff --git a/docs/relnotes.html b/docs/relnotes.html -index e373091..2e11bc4 100644 ---- a/docs/relnotes.html -+++ b/docs/relnotes.html -@@ -22,6 +22,7 @@ The release notes summarize what's new or changed in each Mesa release. - -
-
-
- 9.1 release notes -+
- 9.0.3 release notes -
- 9.0.2 release notes -
- 9.0.1 release notes -
- 9.0 release notes
-diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
-index 09dca5b..1e388f8 100644
---- a/include/pci_ids/i965_pci_ids.h
-+++ b/include/pci_ids/i965_pci_ids.h
-@@ -53,12 +53,12 @@ CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2)
- CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1)
- CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2)
- CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2)
--CHIPSET(0x0D12, HASWELL_CRW_GT1, hsw_gt1)
--CHIPSET(0x0D22, HASWELL_CRW_GT2, hsw_gt2)
--CHIPSET(0x0D32, HASWELL_CRW_GT2_PLUS, hsw_gt2)
--CHIPSET(0x0D16, HASWELL_CRW_M_GT1, hsw_gt1)
--CHIPSET(0x0D26, HASWELL_CRW_M_GT2, hsw_gt2)
--CHIPSET(0x0D36, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
--CHIPSET(0x0D1A, HASWELL_CRW_S_GT1, hsw_gt1)
--CHIPSET(0x0D2A, HASWELL_CRW_S_GT2, hsw_gt2)
--CHIPSET(0x0D3A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
-+CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1)
-+CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2)
-+CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2)
-+CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1)
-+CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2)
-+CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
-+CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1)
-+CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2)
-+CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
-diff --git a/include/pci_ids/r600_pci_ids.h b/include/pci_ids/r600_pci_ids.h
-index 7ceb820..9c9bab2 100644
---- a/include/pci_ids/r600_pci_ids.h
-+++ b/include/pci_ids/r600_pci_ids.h
-@@ -298,6 +298,10 @@ CHIPSET(0x9907, ARUBA_9907, ARUBA)
- CHIPSET(0x9908, ARUBA_9908, ARUBA)
- CHIPSET(0x9909, ARUBA_9909, ARUBA)
- CHIPSET(0x990A, ARUBA_990A, ARUBA)
-+CHIPSET(0x990B, ARUBA_990B, ARUBA)
-+CHIPSET(0x990C, ARUBA_990C, ARUBA)
-+CHIPSET(0x990D, ARUBA_990D, ARUBA)
-+CHIPSET(0x990E, ARUBA_990E, ARUBA)
- CHIPSET(0x990F, ARUBA_990F, ARUBA)
- CHIPSET(0x9910, ARUBA_9910, ARUBA)
- CHIPSET(0x9913, ARUBA_9913, ARUBA)
-@@ -309,6 +313,13 @@ CHIPSET(0x9991, ARUBA_9991, ARUBA)
- CHIPSET(0x9992, ARUBA_9992, ARUBA)
- CHIPSET(0x9993, ARUBA_9993, ARUBA)
- CHIPSET(0x9994, ARUBA_9994, ARUBA)
-+CHIPSET(0x9995, ARUBA_9995, ARUBA)
-+CHIPSET(0x9996, ARUBA_9996, ARUBA)
-+CHIPSET(0x9997, ARUBA_9997, ARUBA)
-+CHIPSET(0x9998, ARUBA_9998, ARUBA)
-+CHIPSET(0x9999, ARUBA_9999, ARUBA)
-+CHIPSET(0x999A, ARUBA_999A, ARUBA)
-+CHIPSET(0x999B, ARUBA_999B, ARUBA)
- CHIPSET(0x99A0, ARUBA_99A0, ARUBA)
- CHIPSET(0x99A2, ARUBA_99A2, ARUBA)
- CHIPSET(0x99A4, ARUBA_99A4, ARUBA)
-diff --git a/scons/gallium.py b/scons/gallium.py
-index 4b51b6e..b28be5d 100755
---- a/scons/gallium.py
-+++ b/scons/gallium.py
-@@ -289,6 +289,7 @@ def generate(env):
- '_CRT_SECURE_NO_DEPRECATE',
- '_SCL_SECURE_NO_WARNINGS',
- '_SCL_SECURE_NO_DEPRECATE',
-+ '_ALLOW_KEYWORD_MACROS',
- ]
- if env['build'] in ('debug', 'checked'):
- cppdefines += ['_DEBUG']
-@@ -401,6 +402,8 @@ def generate(env):
- '/Oi', # enable intrinsic functions
- ]
- else:
-+ if distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('11.0'):
-+ print 'scons: warning: Visual Studio versions prior to 2012 are known to produce incorrect code when optimizations are enabled ( https://bugs.freedesktop.org/show_bug.cgi?id=58718 )'
- ccflags += [
- '/O2', # optimize for speed
- ]
-diff --git a/scons/llvm.py b/scons/llvm.py
-index e1ed760..7f00c6c 100644
---- a/scons/llvm.py
-+++ b/scons/llvm.py
-@@ -92,7 +92,19 @@ def generate(env):
- 'HAVE_STDINT_H',
- ])
- env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
-- if llvm_version >= distutils.version.LooseVersion('3.0'):
-+ if llvm_version >= distutils.version.LooseVersion('3.2'):
-+ # 3.2
-+ env.Prepend(LIBS = [
-+ 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
-+ 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
-+ 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
-+ 'LLVMX86Utils', 'LLVMX86Info', 'LLVMJIT',
-+ 'LLVMExecutionEngine', 'LLVMCodeGen', 'LLVMScalarOpts',
-+ 'LLVMInstCombine', 'LLVMTransformUtils', 'LLVMipa',
-+ 'LLVMAnalysis', 'LLVMTarget', 'LLVMMC', 'LLVMCore',
-+ 'LLVMSupport', 'LLVMRuntimeDyld', 'LLVMObject'
-+ ])
-+ elif llvm_version >= distutils.version.LooseVersion('3.0'):
- # 3.0
- env.Prepend(LIBS = [
- 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
-diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
-index 351fbf4..e17d5be 100644
---- a/src/egl/drivers/dri2/egl_dri2.c
-+++ b/src/egl/drivers/dri2/egl_dri2.c
-@@ -195,7 +195,14 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
- for (i = 0; attr_list[i] != EGL_NONE; i += 2)
- _eglSetConfigKey(&base, attr_list[i], attr_list[i+1]);
-
-- if (depth > 0 && depth != base.BufferSize)
-+ /* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig. Otherwise
-+ * it will only match a 32-bit RGBA visual. On a composited window manager
-+ * on X11, this will make all of the EGLConfigs with destination alpha get
-+ * blended by the compositor. This is probably not what the application
-+ * wants... especially on drivers that only have 32-bit RGBA EGLConfigs!
-+ */
-+ if (depth > 0 && depth != base.BufferSize
-+ && !(depth == 24 && base.BufferSize == 32))
- return NULL;
-
- if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks)))
-diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
-index 7b879c4..3110809 100644
---- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
-+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
-@@ -167,12 +167,17 @@ static void interp( const struct clip_stage *clip,
- {
- int k;
- t_nopersp = t;
-- for (k = 0; k < 2; k++)
-+ /* find either in.x != out.x or in.y != out.y */
-+ for (k = 0; k < 2; k++) {
- if (in->clip[k] != out->clip[k]) {
-- t_nopersp = (dst->clip[k] - out->clip[k]) /
-- (in->clip[k] - out->clip[k]);
-+ /* do divide by W, then compute linear interpolation factor */
-+ float in_coord = in->clip[k] / in->clip[3];
-+ float out_coord = out->clip[k] / out->clip[3];
-+ float dst_coord = dst->clip[k] / dst->clip[3];
-+ t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
- break;
- }
-+ }
- }
-
- /* Other attributes
-diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
-index 3da52b1..3578525 100644
---- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
-+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
-@@ -127,10 +127,44 @@ static void offset_first_tri( struct draw_stage *stage,
- struct prim_header *header )
- {
- struct offset_stage *offset = offset_stage(stage);
-+ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer;
-+ unsigned fill_mode = rast->fill_front;
-+ boolean do_offset;
-+
-+ if (rast->fill_back != rast->fill_front) {
-+ /* Need to check for back-facing triangle */
-+ boolean ccw = header->det < 0.0f;
-+ if (ccw != rast->front_ccw)
-+ fill_mode = rast->fill_back;
-+ }
-+
-+ /* Now determine if we need to do offsetting for the point/line/fill mode */
-+ switch (fill_mode) {
-+ case PIPE_POLYGON_MODE_FILL:
-+ do_offset = rast->offset_tri;
-+ break;
-+ case PIPE_POLYGON_MODE_LINE:
-+ do_offset = rast->offset_line;
-+ break;
-+ case PIPE_POLYGON_MODE_POINT:
-+ do_offset = rast->offset_point;
-+ break;
-+ default:
-+ assert(!"invalid fill_mode in offset_first_tri()");
-+ do_offset = rast->offset_tri;
-+ }
-+
-+ if (do_offset) {
-+ offset->scale = rast->offset_scale;
-+ offset->clamp = rast->offset_clamp;
-+ offset->units = (float) (rast->offset_units * stage->draw->mrd);
-+ }
-+ else {
-+ offset->scale = 0.0f;
-+ offset->clamp = 0.0f;
-+ offset->units = 0.0f;
-+ }
-
-- offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd);
-- offset->scale = stage->draw->rasterizer->offset_scale;
-- offset->clamp = stage->draw->rasterizer->offset_clamp;
-
- stage->tri = offset_tri;
- stage->tri( stage, header );
-diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
-new file mode 100644
-index 0000000..4b1d0d1
---- /dev/null
-+++ b/src/gallium/auxiliary/util/u_range.h
-@@ -0,0 +1,89 @@
-+/*
-+ * Copyright 2013 Marek Olšák
-+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * on the rights to use, copy, modify, merge, publish, distribute, sub -+ * license, and/or sell copies of the Software, and to permit persons to whom -+ * the Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the next -+ * paragraph) shall be included in all copies or substantial portions of the -+ * Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -+ -+/** -+ * @file -+ * 1D integer range, capable of the union and intersection operations. -+ * -+ * It only maintains a single interval which is extended when the union is -+ * done. This implementation is partially thread-safe (readers are not -+ * protected by a lock). -+ * -+ * @author Marek Olšák -+ */ -+ -+#ifndef U_RANGE_H -+#define U_RANGE_H -+ -+#include "os/os_thread.h" -+ -+struct util_range { -+ unsigned start; /* inclusive */ -+ unsigned end; /* exclusive */ -+ -+ /* for the range to be consistent with multiple contexts: */ -+ pipe_mutex write_mutex; -+}; -+ -+ -+static INLINE void -+util_range_set_empty(struct util_range *range) -+{ -+ range->start = ~0; -+ range->end = 0; -+} -+ -+/* This is like a union of two sets. */ -+static INLINE void -+util_range_add(struct util_range *range, unsigned start, unsigned end) -+{ -+ if (start < range->start || end > range->end) { -+ pipe_mutex_lock(range->write_mutex); -+ range->start = MIN2(start, range->start); -+ range->end = MAX2(end, range->end); -+ pipe_mutex_unlock(range->write_mutex); -+ } -+} -+ -+static INLINE boolean -+util_ranges_intersect(struct util_range *range, unsigned start, unsigned end) -+{ -+ return MAX2(start, range->start) < MIN2(end, range->end); -+} -+ -+ -+/* Init/deinit */ -+ -+static INLINE void -+util_range_init(struct util_range *range) -+{ -+ pipe_mutex_init(range->write_mutex); -+ util_range_set_empty(range); -+} -+ -+static INLINE void -+util_range_destroy(struct util_range *range) -+{ -+ pipe_mutex_destroy(range->write_mutex); -+} -+ -+#endif -diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c -index 40ccaf6..ca8df71 100644 ---- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c -+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c -@@ -46,6 +46,10 @@ clear_flags(struct pipe_rasterizer_state *rast) - { - rast->light_twoside = 0; - rast->offset_tri = 0; -+ rast->offset_line = 0; -+ rast->offset_point = 0; -+ rast->offset_units = 0.0f; -+ rast->offset_scale = 0.0f; - } - - -@@ -74,6 +78,8 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, - */ - need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL || - rast->fill_back != PIPE_POLYGON_MODE_FILL || -+ rast->offset_point || -+ rast->offset_line || - rast->point_smooth || - rast->line_smooth || - rast->line_stipple_enable || -diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c -index 2e9c6bf..f17a04a 100644 ---- a/src/gallium/drivers/llvmpipe/lp_texture.c -+++ b/src/gallium/drivers/llvmpipe/lp_texture.c -@@ -295,7 +295,9 @@ llvmpipe_resource_create(struct pipe_screen *_screen, - /* assert(lpr->base.bind); */ - - if (resource_is_texture(&lpr->base)) { -- if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) { -+ if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | -+ PIPE_BIND_SCANOUT | -+ PIPE_BIND_SHARED)) { - /* displayable surface */ - if (!llvmpipe_displaytarget_layout(screen, lpr)) - goto fail; -diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c -index bb47530..bb43353 100644 ---- a/src/gallium/drivers/r600/evergreen_hw_context.c -+++ b/src/gallium/drivers/r600/evergreen_hw_context.c -@@ -283,4 +283,7 @@ void evergreen_dma_copy(struct r600_context *rctx, - src_offset += csize << shift; - size -= csize; - } -+ -+ util_range_add(&rdst->valid_buffer_range, dst_offset, -+ dst_offset + size); - } -diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c -index 389ad3c..804c037 100644 ---- a/src/gallium/drivers/r600/evergreen_state.c -+++ b/src/gallium/drivers/r600/evergreen_state.c -@@ -808,6 +808,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, - dsa->valuemask[1] = state->stencil[1].valuemask; - dsa->writemask[0] = state->stencil[0].writemask; - dsa->writemask[1] = state->stencil[1].writemask; -+ dsa->zwritemask = state->depth.writemask; - - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | -@@ -1321,6 +1322,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx, - * elements. */ - surf->cb_color_dim = pipe_buffer->width0; - -+ /* Set the buffer range the GPU will have access to: */ -+ util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range, -+ 0, pipe_buffer->width0); -+ - surf->cb_color_cmask = surf->cb_color_base; - surf->cb_color_cmask_slice = 0; - surf->cb_color_fmask = surf->cb_color_base; -@@ -1405,10 +1410,15 @@ void evergreen_init_color_surface(struct r600_context *rctx, - S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) | - S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); - -- if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) { -- unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); -- color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | -- S_028C74_NUM_FRAGMENTS(log_samples); -+ if (rctx->chip_class == CAYMAN) { -+ color_attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == -+ UTIL_FORMAT_SWIZZLE_1); -+ -+ if (rtex->resource.b.b.nr_samples > 1) { -+ unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples); -+ color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | -+ S_028C74_NUM_FRAGMENTS(log_samples); -+ } - } - - ntype = V_028C70_NUMBER_UNORM; -@@ -1647,6 +1657,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, - } - if (rctx->framebuffer.state.zsbuf) { - rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; -+ -+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture; -+ if (rtex->htile) { -+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META; -+ } - } - - util_copy_framebuffer_state(&rctx->framebuffer.state, state); -@@ -2222,7 +2237,14 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ - } - db_render_override |= S_02800C_NOOP_CULL_DISABLE(1); - } -- if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) { -+ /* FIXME we should be able to use hyperz even if we are not writing to -+ * zbuffer but somehow this trigger GPU lockup. See : -+ * -+ * https://bugs.freedesktop.org/show_bug.cgi?id=60848 -+ * -+ * Disable hyperz for now if not writing to zbuffer. -+ */ -+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled && rctx->zwritemask) { - /* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */ - db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF); - /* This is to fix a lockup when hyperz and alpha test are enabled at -@@ -3654,6 +3676,17 @@ boolean evergreen_dma_blit(struct pipe_context *ctx, - return FALSE; - } - -+ /* 128 bpp surfaces require non_disp_tiling for both -+ * tiled and linear buffers on cayman. However, async -+ * DMA only supports it on the tiled side. As such -+ * the tile order is backwards after a L2T/T2L packet. -+ */ -+ if ((rctx->chip_class == CAYMAN) && -+ (src_mode != dst_mode) && -+ (util_format_get_blocksize(src->format) >= 16)) { -+ return FALSE; -+ } -+ - if (src_mode == dst_mode) { - uint64_t dst_offset, src_offset; - /* simple dma blit would do NOTE code here assume : -diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h -index 11dbb3b..0115293 100644 ---- a/src/gallium/drivers/r600/r600.h -+++ b/src/gallium/drivers/r600/r600.h -@@ -28,6 +28,7 @@ - - #include "../../winsys/radeon/drm/radeon_winsys.h" - #include "util/u_double_list.h" -+#include "util/u_range.h" - #include "util/u_transfer.h" - - #define R600_ERR(fmt, args...) \ -@@ -50,6 +51,16 @@ struct r600_resource { - - /* Resource state. */ - unsigned domains; -+ -+ /* The buffer range which is initialized (with a write transfer, -+ * streamout, DMA, or as a random access target). The rest of -+ * the buffer is considered invalid and can be mapped unsynchronized. -+ * -+ * This allows unsychronized mapping of a buffer range which hasn't -+ * been used yet. It's for applications which forget to use -+ * the unsynchronized map flag and expect the driver to figure it out. -+ */ -+ struct util_range valid_buffer_range; - }; - - #define R600_BLOCK_MAX_BO 32 -@@ -152,6 +163,7 @@ struct r600_so_target { - #define R600_CONTEXT_FLUSH_AND_INV (1 << 4) - #define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5) - #define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 6) -+#define R600_CONTEXT_FLUSH_AND_INV_DB_META (1 << 7) - - struct r600_context; - struct r600_screen; -diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c -index f25c6aa..bda425c 100644 ---- a/src/gallium/drivers/r600/r600_asm.c -+++ b/src/gallium/drivers/r600/r600_asm.c -@@ -322,6 +322,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecod - output->swizzle_y == bc->cf_last->output.swizzle_y && - output->swizzle_z == bc->cf_last->output.swizzle_z && - output->swizzle_w == bc->cf_last->output.swizzle_w && -+ output->comp_mask == bc->cf_last->output.comp_mask && - (output->burst_count + bc->cf_last->output.burst_count) <= 16) { - - if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && -@@ -873,12 +874,6 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc, - bank_swizzle[4] = SQ_ALU_SCL_210; - while(bank_swizzle[4] <= SQ_ALU_SCL_221) { - -- if (max_slots == 4) { -- for (i = 0; i < max_slots; i++) { -- if (bank_swizzle[i] == SQ_ALU_VEC_210) -- return -1; -- } -- } - init_bank_swizzle(&bs); - if (scalar_only == false) { - for (i = 0; i < 4; i++) { -@@ -910,8 +905,10 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc, - bank_swizzle[i]++; - if (bank_swizzle[i] <= SQ_ALU_VEC_210) - break; -- else -+ else if (i < max_slots - 1) - bank_swizzle[i] = SQ_ALU_VEC_012; -+ else -+ return -1; - } - } - } -diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c -index 6df0d91..bb85fc1 100644 ---- a/src/gallium/drivers/r600/r600_buffer.c -+++ b/src/gallium/drivers/r600/r600_buffer.c -@@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen, - { - struct r600_resource *rbuffer = r600_resource(buf); - -+ util_range_destroy(&rbuffer->valid_buffer_range); - pb_reference(&rbuffer->buf, NULL); - FREE(rbuffer); - } -@@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, - - assert(box->x + box->width <= resource->width0); - -+ /* See if the buffer range being mapped has never been initialized, -+ * in which case it can be mapped unsynchronized. */ -+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && -+ usage & PIPE_TRANSFER_WRITE && -+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { -+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; -+ } -+ - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && - !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { - assert(usage & PIPE_TRANSFER_WRITE); -@@ -178,6 +187,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - { - struct r600_context *rctx = (struct r600_context*)pipe; - struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; -+ struct r600_resource *rbuffer = r600_resource(transfer->resource); - - if (rtransfer->staging) { - struct pipe_resource *dst, *src; -@@ -189,7 +199,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - doffset = transfer->box.x; - soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT; - /* Copy the staging buffer into the original one. */ -- if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset)) { -+ if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) { - if (rctx->screen->chip_class >= EVERGREEN) { - evergreen_dma_copy(rctx, dst, src, doffset, soffset, size); - } else { -@@ -203,6 +213,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe, - } - pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL); - } -+ -+ if (transfer->usage & PIPE_TRANSFER_WRITE) { -+ util_range_add(&rbuffer->valid_buffer_range, transfer->box.x, -+ transfer->box.x + transfer->box.width); -+ } - util_slab_free(&rctx->pool_transfers, transfer); - } - -@@ -259,6 +274,7 @@ bool r600_init_resource(struct r600_screen *rscreen, - - res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf); - res->domains = domains; -+ util_range_set_empty(&res->valid_buffer_range); - return true; - } - -@@ -275,6 +291,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.screen = screen; - rbuffer->b.vtbl = &r600_buffer_vtbl; -+ util_range_init(&rbuffer->valid_buffer_range); - - if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) { - FREE(rbuffer); -diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c -index 9091ec0..322381a 100644 ---- a/src/gallium/drivers/r600/r600_hw_context.c -+++ b/src/gallium/drivers/r600/r600_hw_context.c -@@ -648,6 +648,12 @@ void r600_flush_emit(struct r600_context *rctx) - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0); - } - -+ if (rctx->chip_class >= R700 && -+ (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) { -+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); -+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0); -+ } -+ - if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); -@@ -742,6 +748,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) - */ - ctx->flags |= R600_CONTEXT_FLUSH_AND_INV | - R600_CONTEXT_FLUSH_AND_INV_CB_META | -+ R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_WAIT_3D_IDLE | - R600_CONTEXT_WAIT_CP_DMA_IDLE; - -@@ -1119,6 +1126,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, - rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES | - R600_CONTEXT_FLUSH_AND_INV | - R600_CONTEXT_FLUSH_AND_INV_CB_META | -+ R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_STREAMOUT_FLUSH | - R600_CONTEXT_WAIT_3D_IDLE; - -@@ -1164,6 +1172,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, - - /* Invalidate the read caches. */ - rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; -+ -+ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, -+ dst_offset + size); - } - - void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw) -@@ -1210,4 +1221,7 @@ void r600_dma_copy(struct r600_context *rctx, - src_offset += csize << shift; - size -= csize; - } -+ -+ util_range_add(&rdst->valid_buffer_range, dst_offset, -+ dst_offset + size); - } -diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h -index 692e6ec..3b50f68 100644 ---- a/src/gallium/drivers/r600/r600_hw_context_priv.h -+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h -@@ -29,7 +29,7 @@ - #include "r600_pipe.h" - - /* the number of CS dwords for flushing and drawing */ --#define R600_MAX_FLUSH_CS_DWORDS 12 -+#define R600_MAX_FLUSH_CS_DWORDS 16 - #define R600_MAX_DRAW_CS_DWORDS 34 - #define R600_TRACE_CS_DWORDS 7 - -diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c -index fa66fcc..7a41688 100644 ---- a/src/gallium/drivers/r600/r600_llvm.c -+++ b/src/gallium/drivers/r600/r600_llvm.c -@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const( - LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.SwizzleX], ""); - offset[1] = LLVMBuildAdd(bld_base->base.gallivm->builder, offset[1], index, ""); - } -+ unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ; -+ if (reg->Register.Dimension) { -+ ConstantAddressSpace += reg->Dimension.Index; -+ } - LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024), -- CONSTANT_BUFFER_0_ADDR_SPACE); -+ ConstantAddressSpace); - LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, ""); - LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, ""); - LLVMValueRef cvecval = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, ""); -diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c -index a59578d..a7973a5 100644 ---- a/src/gallium/drivers/r600/r600_pipe.c -+++ b/src/gallium/drivers/r600/r600_pipe.c -@@ -22,6 +22,7 @@ - */ - #include "r600_pipe.h" - #include "r600_public.h" -+#include "r600d.h" - - #include - #include "pipe/p_shader_tokens.h" -@@ -165,12 +166,23 @@ static void r600_flush_gfx_ring(void *ctx, unsigned flags) - static void r600_flush_dma_ring(void *ctx, unsigned flags) - { - struct r600_context *rctx = (struct r600_context *)ctx; -+ struct radeon_winsys_cs *cs = rctx->rings.dma.cs; -+ unsigned padding_dw, i; - -- if (!rctx->rings.dma.cs->cdw) { -+ if (!cs->cdw) { - return; - } -+ -+ /* Pad the DMA CS to a multiple of 8 dwords. */ -+ padding_dw = 8 - cs->cdw % 8; -+ if (padding_dw < 8) { -+ for (i = 0; i < padding_dw; i++) { -+ cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); -+ } -+ } -+ - rctx->rings.dma.flushing = true; -- rctx->ws->cs_flush(rctx->rings.dma.cs, flags); -+ rctx->ws->cs_flush(cs, flags); - rctx->rings.dma.flushing = false; - } - -diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h -index ec59c92..1be4321 100644 ---- a/src/gallium/drivers/r600/r600_pipe.h -+++ b/src/gallium/drivers/r600/r600_pipe.h -@@ -298,7 +298,8 @@ struct r600_dsa_state { - unsigned alpha_ref; - ubyte valuemask[2]; - ubyte writemask[2]; -- unsigned sx_alpha_test_control; -+ unsigned zwritemask; -+ unsigned sx_alpha_test_control; - }; - - struct r600_pipe_shader; -@@ -513,6 +514,7 @@ struct r600_context { - bool alpha_to_one; - bool force_blend_disable; - boolean dual_src_blend; -+ unsigned zwritemask; - - /* Index buffer. */ - struct pipe_index_buffer index_buffer; -diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c -index 3f165f7..70232fd 100644 ---- a/src/gallium/drivers/r600/r600_state.c -+++ b/src/gallium/drivers/r600/r600_state.c -@@ -802,6 +802,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, - dsa->valuemask[1] = state->stencil[1].valuemask; - dsa->writemask[0] = state->stencil[0].writemask; - dsa->writemask[1] = state->stencil[1].writemask; -+ dsa->zwritemask = state->depth.writemask; - - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | -@@ -1515,6 +1516,11 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, - } - if (rctx->framebuffer.state.zsbuf) { - rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; -+ -+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture; -+ if (rctx->chip_class >= R700 && rtex->htile) { -+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META; -+ } - } - - /* Set the new state. */ -diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c -index 88bb62b..f0e9de3 100644 ---- a/src/gallium/drivers/r600/r600_state_common.c -+++ b/src/gallium/drivers/r600/r600_state_common.c -@@ -284,6 +284,16 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) - ref.valuemask[1] = dsa->valuemask[1]; - ref.writemask[0] = dsa->writemask[0]; - ref.writemask[1] = dsa->writemask[1]; -+ if (rctx->zwritemask != dsa->zwritemask) { -+ rctx->zwritemask = dsa->zwritemask; -+ if (rctx->chip_class >= EVERGREEN) { -+ /* work around some issue when not writting to zbuffer -+ * we are having lockup on evergreen so do not enable -+ * hyperz when not writting zbuffer -+ */ -+ rctx->db_misc_state.atom.dirty = true; -+ } -+ } - - r600_set_stencil_ref(ctx, &ref); - -@@ -972,6 +982,7 @@ r600_create_so_target(struct pipe_context *ctx, - { - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_so_target *t; -+ struct r600_resource *rbuffer = (struct r600_resource*)buffer; - - t = CALLOC_STRUCT(r600_so_target); - if (!t) { -@@ -991,6 +1002,9 @@ r600_create_so_target(struct pipe_context *ctx, - pipe_resource_reference(&t->b.buffer, buffer); - t->b.buffer_offset = buffer_offset; - t->b.buffer_size = buffer_size; -+ -+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset, -+ buffer_offset + buffer_size); - return &t->b; - } - -diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h -index 621e7a1..81e5a6c 100644 ---- a/src/gallium/drivers/r600/r600d.h -+++ b/src/gallium/drivers/r600/r600d.h -@@ -119,6 +119,7 @@ - #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 - #define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f - #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20 -+#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */ - #define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */ - #define EVENT_TYPE(x) ((x) << 0) - #define EVENT_INDEX(x) ((x) << 8) -diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c -index 0f90991..8902ae4 100644 ---- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c -+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c -@@ -766,6 +766,22 @@ static void emit_icmp( - emit_data->output[emit_data->chan] = v; - } - -+static void emit_ucmp( -+ const struct lp_build_tgsi_action * action, -+ struct lp_build_tgsi_context * bld_base, -+ struct lp_build_emit_data * emit_data) -+{ -+ unsigned pred; -+ LLVMBuilderRef builder = bld_base->base.gallivm->builder; -+ LLVMContextRef context = bld_base->base.gallivm->context; -+ -+ -+ LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE, -+ emit_data->args[0], lp_build_const_float(bld_base->base.gallivm, 0.), ""); -+ -+ emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, v, emit_data->args[2], emit_data->args[1], ""); -+} -+ - static void emit_cmp( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context * bld_base, -@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) - bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; - bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; -+ bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; - - bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; - bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; -diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c -index 2545634..7922928 100644 ---- a/src/gallium/drivers/radeonsi/radeonsi_shader.c -+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c -@@ -309,14 +309,8 @@ static void declare_input_fs( - /* XXX: Handle all possible interpolation modes */ - switch (decl->Interp.Interpolate) { - case TGSI_INTERPOLATE_COLOR: -- /* XXX: Flat shading hangs the GPU */ -- if (si_shader_ctx->rctx->queued.named.rasterizer && -- si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { --#if 0 -+ if (si_shader_ctx->key.flatshade) { - intr_name = "llvm.SI.fs.interp.constant"; --#else -- intr_name = "llvm.SI.fs.interp.linear.center"; --#endif - } else { - if (decl->Interp.Centroid) - intr_name = "llvm.SI.fs.interp.persp.centroid"; -@@ -325,11 +319,8 @@ static void declare_input_fs( - } - break; - case TGSI_INTERPOLATE_CONSTANT: -- /* XXX: Flat shading hangs the GPU */ --#if 0 - intr_name = "llvm.SI.fs.interp.constant"; - break; --#endif - case TGSI_INTERPOLATE_LINEAR: - if (decl->Interp.Centroid) - intr_name = "llvm.SI.fs.interp.linear.centroid"; -diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h -index 07b2f9f..f54f67c 100644 ---- a/src/gallium/drivers/radeonsi/radeonsi_shader.h -+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h -@@ -82,6 +82,7 @@ struct si_shader_key { - unsigned nr_cbufs:4; - unsigned color_two_side:1; - unsigned alpha_func:3; -+ unsigned flatshade:1; - float alpha_ref; - }; - -diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c -index a6b1983..39817fb 100644 ---- a/src/gallium/drivers/radeonsi/si_state.c -+++ b/src/gallium/drivers/radeonsi/si_state.c -@@ -421,8 +421,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, - rs->offset_units = state->offset_units; - rs->offset_scale = state->offset_scale * 12.0f; - -- /* XXX: Flat shading hangs the GPU */ -- tmp = S_0286D4_FLAT_SHADE_ENA(0); -+ tmp = S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - tmp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | -@@ -1859,7 +1858,7 @@ static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *c - key.export_16bpc = rctx->export_16bpc; - if (rctx->queued.named.rasterizer) { - key.color_two_side = rctx->queued.named.rasterizer->two_side; -- /*key.flatshade = rctx->queued.named.rasterizer->flatshade;*/ -+ key.flatshade = rctx->queued.named.rasterizer->flatshade; - } - if (rctx->queued.named.dsa) { - key.alpha_func = rctx->queued.named.dsa->alpha_func; -diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c -index 3704410..8c35625 100644 ---- a/src/gallium/drivers/radeonsi/si_state_draw.c -+++ b/src/gallium/drivers/radeonsi/si_state_draw.c -@@ -128,11 +128,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s - continue; - } - -- /* XXX: Flat shading hangs the GPU */ -- if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || -- (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR && -- rctx->queued.named.rasterizer->flatshade)) -- have_linear = TRUE; - if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR) - have_linear = TRUE; - if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) -@@ -327,15 +322,12 @@ static void si_update_spi_map(struct r600_context *rctx) - bcolor: - tmp = 0; - --#if 0 -- /* XXX: Flat shading hangs the GPU */ - if (name == TGSI_SEMANTIC_POSITION || - ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || - (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR && -- rctx->rasterizer && rctx->rasterizer->flatshade)) { -+ rctx->ps_shader->current->key.flatshade)) { - tmp |= S_028644_FLAT_SHADE(1); - } --#endif - - if (name == TGSI_SEMANTIC_GENERIC && - rctx->sprite_coord_enable & (1 << ps->input[i].sid)) { -@@ -453,8 +445,14 @@ static void si_vertex_buffer_update(struct r600_context *rctx) - si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF); - si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | - S_008F04_STRIDE(vb->stride))); -- si_pm4_sh_data_add(pm4, (vb->buffer->width0 - vb->buffer_offset) / -- MAX2(vb->stride, 1)); -+ if (vb->stride) -+ /* Round up by rounding down and adding 1 */ -+ si_pm4_sh_data_add(pm4, -+ (vb->buffer->width0 - offset - -+ util_format_get_blocksize(ve->src_format)) / -+ vb->stride + 1); -+ else -+ si_pm4_sh_data_add(pm4, vb->buffer->width0 - offset); - si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]); - - if (!bound[ve->vertex_buffer_index]) { -diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c -index 607584f..021175c 100644 ---- a/src/gallium/state_trackers/glx/xlib/xm_api.c -+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c -@@ -438,7 +438,6 @@ create_xmesa_buffer(Drawable d, BufferType type, - { - XMesaDisplay xmdpy = xmesa_init_display(vis->display); - XMesaBuffer b; -- uint width, height; - - ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER); - -@@ -457,7 +456,7 @@ create_xmesa_buffer(Drawable d, BufferType type, - b->type = type; - b->cmap = cmap; - -- get_drawable_size(vis->display, d, &width, &height); -+ get_drawable_size(vis->display, d, &b->width, &b->height); - - /* - * Create framebuffer, but we'll plug in our own renderbuffers below. -diff --git a/src/gallium/targets/dri-vmwgfx/Makefile.am b/src/gallium/targets/dri-vmwgfx/Makefile.am -index 06ebf88..ca7df65 100644 ---- a/src/gallium/targets/dri-vmwgfx/Makefile.am -+++ b/src/gallium/targets/dri-vmwgfx/Makefile.am -@@ -58,17 +58,13 @@ vmwgfx_dri_la_LIBADD = \ - $(top_builddir)/src/gallium/drivers/svga/libsvga.la \ - $(GALLIUM_DRI_LIB_DEPS) - --if HAVE_MESA_LLVM - vmwgfx_dri_la_LINK = $(CXXLINK) $(vmwgfx_dri_la_LDFLAGS) - # Mention a dummy pure C++ file to trigger generation of the $(LINK) variable - nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-cpp.cpp - -+if HAVE_MESA_LLVM - vmwgfx_dri_la_LDFLAGS += $(LLVM_LDFLAGS) - vmwgfx_dri_la_LIBADD += $(LLVM_LIBS) --else --vmwgfx_dri_la_LINK = $(LINK) $(vmwgfx_dri_la_LDFLAGS) --# Mention a dummy pure C file to trigger generation of the $(LINK) variable --nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-c.c - endif - - # Provide compatibility with scripts for the old Mesa build system for -diff --git a/src/gallium/targets/vdpau-softpipe/Makefile.am b/src/gallium/targets/vdpau-softpipe/Makefile.am -index 3372b5c..7bde2f8 100644 ---- a/src/gallium/targets/vdpau-softpipe/Makefile.am -+++ b/src/gallium/targets/vdpau-softpipe/Makefile.am -@@ -35,7 +35,7 @@ vdpaudir = $(VDPAU_LIB_INSTALL_DIR) - vdpau_LTLIBRARIES = libvdpau_softpipe.la - - libvdpau_softpipe_la_SOURCES = \ -- $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_dri.c -+ $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_xsp.c - - libvdpau_softpipe_la_LDFLAGS = \ - -module \ -diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c -index 2d41c26..f4ac526 100644 ---- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c -+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c -@@ -957,16 +957,16 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, - - bo->flinked = TRUE; - bo->flink = flink.name; -+ -+ pipe_mutex_lock(bo->mgr->bo_handles_mutex); -+ util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)bo->flink, bo); -+ pipe_mutex_unlock(bo->mgr->bo_handles_mutex); - } - whandle->handle = bo->flink; - } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { - whandle->handle = bo->handle; - } - -- pipe_mutex_lock(bo->mgr->bo_handles_mutex); -- util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); -- pipe_mutex_unlock(bo->mgr->bo_handles_mutex); -- - whandle->stride = stride; - return TRUE; - } -diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c -index 519929e..a3a0530 100644 ---- a/src/gbm/backends/dri/gbm_dri.c -+++ b/src/gbm/backends/dri/gbm_dri.c -@@ -481,6 +481,7 @@ create_dumb(struct gbm_device *gbm, - bo->base.base.width = width; - bo->base.base.height = height; - bo->base.base.stride = create_arg.pitch; -+ bo->base.base.format = format; - bo->base.base.handle.u32 = create_arg.handle; - bo->handle = create_arg.handle; - bo->size = create_arg.size; -@@ -529,6 +530,7 @@ gbm_dri_bo_create(struct gbm_device *gbm, - bo->base.base.gbm = gbm; - bo->base.base.width = width; - bo->base.base.height = height; -+ bo->base.base.format = format; - - switch (format) { - case GBM_FORMAT_RGB565: -diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c -index 4e32b50..29a209e 100644 ---- a/src/mesa/drivers/common/meta.c -+++ b/src/mesa/drivers/common/meta.c -@@ -1910,6 +1910,14 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, - GLuint *tmp = malloc(srcW * srcH * sizeof(GLuint)); - - if (tmp) { -+ -+ newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT); -+ _mesa_ReadPixels(srcX, srcY, srcW, srcH, GL_DEPTH_COMPONENT, -+ GL_UNSIGNED_INT, tmp); -+ setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, -+ srcW, srcH, GL_DEPTH_COMPONENT, -+ GL_UNSIGNED_INT, tmp); -+ - /* texcoords (after texture allocation!) */ - { - verts[0].s = 0.0F; -@@ -1928,15 +1936,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx, - if (!blit->DepthFP) - init_blit_depth_pixels(ctx); - -- /* maybe change tex format here */ -- newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT); -- -- _mesa_ReadPixels(srcX, srcY, srcW, srcH, -- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); -- -- setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, srcW, srcH, -- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp); -- - _mesa_BindProgramARB(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP); - _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE); - _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); -diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am -index dc140df..77670ef 100644 ---- a/src/mesa/drivers/dri/i965/Makefile.am -+++ b/src/mesa/drivers/dri/i965/Makefile.am -@@ -62,6 +62,7 @@ TEST_LIBS = \ - ../common/libdri_test_stubs.la - - i965_dri_la_SOURCES = -+nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp - i965_dri_la_LIBADD = $(COMMON_LIBS) - i965_dri_la_LDFLAGS = -module -avoid-version -shared - -diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp -index 8dab431..f80219e 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs.cpp -+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp -@@ -258,6 +258,26 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index, - return instructions; - } - -+/** -+ * A helper for MOV generation for fixing up broken hardware SEND dependency -+ * handling. -+ */ -+fs_inst * -+fs_visitor::DEP_RESOLVE_MOV(int grf) -+{ -+ fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); -+ -+ inst->ir = NULL; -+ inst->annotation = "send dependency resolve"; -+ -+ /* The caller always wants uncompressed to emit the minimal extra -+ * dependencies, and to avoid having to deal with aligning its regs to 2. -+ */ -+ inst->force_uncompressed = true; -+ -+ return inst; -+} -+ - bool - fs_inst::equals(fs_inst *inst) - { -@@ -1690,8 +1710,6 @@ fs_visitor::setup_pull_constants() - dst, index, offset); - pull->ir = inst->ir; - pull->annotation = inst->annotation; -- pull->base_mrf = 14; -- pull->mlen = 1; - - inst->insert_before(pull); - -@@ -1911,6 +1929,7 @@ fs_visitor::register_coalesce() - - bool has_source_modifiers = (inst->src[0].abs || - inst->src[0].negate || -+ inst->src[0].smear != -1 || - inst->src[0].file == UNIFORM); - - /* Found a move of a GRF to a GRF. Let's see if we can coalesce -@@ -2228,6 +2247,265 @@ fs_visitor::remove_duplicate_mrf_writes() - return progress; - } - -+static void -+clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps, -+ int first_grf, int grf_len) -+{ -+ bool inst_16wide = (dispatch_width > 8 && -+ !inst->force_uncompressed && -+ !inst->force_sechalf); -+ -+ /* Clear the flag for registers that actually got read (as expected). */ -+ for (int i = 0; i < 3; i++) { -+ int grf; -+ if (inst->src[i].file == GRF) { -+ grf = inst->src[i].reg; -+ } else if (inst->src[i].file == FIXED_HW_REG && -+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { -+ grf = inst->src[i].fixed_hw_reg.nr; -+ } else { -+ continue; -+ } -+ -+ if (grf >= first_grf && -+ grf < first_grf + grf_len) { -+ deps[grf - first_grf] = false; -+ if (inst_16wide) -+ deps[grf - first_grf + 1] = false; -+ } -+ } -+} -+ -+/** -+ * Implements this workaround for the original 965: -+ * -+ * "[DevBW, DevCL] Implementation Restrictions: As the hardware does not -+ * check for post destination dependencies on this instruction, software -+ * must ensure that there is no destination hazard for the case of ‘write -+ * followed by a posted write’ shown in the following example. -+ * -+ * 1. mov r3 0 -+ * 2. send r3.xy -+ * 3. mov r2 r3 -+ * -+ * Due to no post-destination dependency check on the ‘send’, the above -+ * code sequence could have two instructions (1 and 2) in flight at the -+ * same time that both consider ‘r3’ as the target of their final writes. -+ */ -+void -+fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) -+{ -+ int write_len = inst->regs_written() * dispatch_width / 8; -+ int first_write_grf = inst->dst.reg; -+ bool needs_dep[BRW_MAX_MRF]; -+ assert(write_len < (int)sizeof(needs_dep) - 1); -+ -+ memset(needs_dep, false, sizeof(needs_dep)); -+ memset(needs_dep, true, write_len); -+ -+ clear_deps_for_inst_src(inst, dispatch_width, -+ needs_dep, first_write_grf, write_len); -+ -+ /* Walk backwards looking for writes to registers we're writing which -+ * aren't read since being written. If we hit the start of the program, -+ * we assume that there are no outstanding dependencies on entry to the -+ * program. -+ */ -+ for (fs_inst *scan_inst = (fs_inst *)inst->prev; -+ scan_inst != NULL; -+ scan_inst = (fs_inst *)scan_inst->prev) { -+ -+ /* If we hit control flow, assume that there *are* outstanding -+ * dependencies, and force their cleanup before our instruction. -+ */ -+ if (scan_inst->is_control_flow()) { -+ for (int i = 0; i < write_len; i++) { -+ if (needs_dep[i]) { -+ inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); -+ } -+ } -+ } -+ -+ bool scan_inst_16wide = (dispatch_width > 8 && -+ !scan_inst->force_uncompressed && -+ !scan_inst->force_sechalf); -+ -+ /* We insert our reads as late as possible on the assumption that any -+ * instruction but a MOV that might have left us an outstanding -+ * dependency has more latency than a MOV. -+ */ -+ if (scan_inst->dst.file == GRF && -+ scan_inst->dst.reg >= first_write_grf && -+ scan_inst->dst.reg < first_write_grf + write_len && -+ needs_dep[scan_inst->dst.reg - first_write_grf]) { -+ inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg)); -+ needs_dep[scan_inst->dst.reg - first_write_grf] = false; -+ if (scan_inst_16wide) -+ needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false; -+ } -+ -+ /* Clear the flag for registers that actually got read (as expected). */ -+ clear_deps_for_inst_src(scan_inst, dispatch_width, -+ needs_dep, first_write_grf, write_len); -+ -+ /* Continue the loop only if we haven't resolved all the dependencies */ -+ int i; -+ for (i = 0; i < write_len; i++) { -+ if (needs_dep[i]) -+ break; -+ } -+ if (i == write_len) -+ return; -+ } -+} -+ -+/** -+ * Implements this workaround for the original 965: -+ * -+ * "[DevBW, DevCL] Errata: A destination register from a send can not be -+ * used as a destination register until after it has been sourced by an -+ * instruction with a different destination register. -+ */ -+void -+fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst) -+{ -+ int write_len = inst->regs_written() * dispatch_width / 8; -+ int first_write_grf = inst->dst.reg; -+ bool needs_dep[BRW_MAX_MRF]; -+ assert(write_len < (int)sizeof(needs_dep) - 1); -+ -+ memset(needs_dep, false, sizeof(needs_dep)); -+ memset(needs_dep, true, write_len); -+ /* Walk forwards looking for writes to registers we're writing which aren't -+ * read before being written. -+ */ -+ for (fs_inst *scan_inst = (fs_inst *)inst->next; -+ !scan_inst->is_tail_sentinel(); -+ scan_inst = (fs_inst *)scan_inst->next) { -+ /* If we hit control flow, force resolve all remaining dependencies. */ -+ if (scan_inst->is_control_flow()) { -+ for (int i = 0; i < write_len; i++) { -+ if (needs_dep[i]) -+ scan_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); -+ } -+ } -+ -+ /* Clear the flag for registers that actually got read (as expected). */ -+ clear_deps_for_inst_src(scan_inst, dispatch_width, -+ needs_dep, first_write_grf, write_len); -+ -+ /* We insert our reads as late as possible since they're reading the -+ * result of a SEND, which has massive latency. -+ */ -+ if (scan_inst->dst.file == GRF && -+ scan_inst->dst.reg >= first_write_grf && -+ scan_inst->dst.reg < first_write_grf + write_len && -+ needs_dep[scan_inst->dst.reg - first_write_grf]) { -+ scan_inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg)); -+ needs_dep[scan_inst->dst.reg - first_write_grf] = false; -+ } -+ -+ /* Continue the loop only if we haven't resolved all the dependencies */ -+ int i; -+ for (i = 0; i < write_len; i++) { -+ if (needs_dep[i]) -+ break; -+ } -+ if (i == write_len) -+ return; -+ } -+ -+ /* If we hit the end of the program, resolve all remaining dependencies out -+ * of paranoia. -+ */ -+ fs_inst *last_inst = (fs_inst *)this->instructions.get_tail(); -+ assert(last_inst->eot); -+ for (int i = 0; i < write_len; i++) { -+ if (needs_dep[i]) -+ last_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i)); -+ } -+} -+ -+void -+fs_visitor::insert_gen4_send_dependency_workarounds() -+{ -+ if (intel->gen != 4 || intel->is_g4x) -+ return; -+ -+ /* Note that we're done with register allocation, so GRF fs_regs always -+ * have a .reg_offset of 0. -+ */ -+ -+ foreach_list_safe(node, &this->instructions) { -+ fs_inst *inst = (fs_inst *)node; -+ -+ if (inst->mlen != 0 && inst->dst.file == GRF) { -+ insert_gen4_pre_send_dependency_workarounds(inst); -+ insert_gen4_post_send_dependency_workarounds(inst); -+ } -+ } -+} -+ -+/** -+ * Turns the generic expression-style uniform pull constant load instruction -+ * into a hardware-specific series of instructions for loading a pull -+ * constant. -+ * -+ * The expression style allows the CSE pass before this to optimize out -+ * repeated loads from the same offset, and gives the pre-register-allocation -+ * scheduling full flexibility, while the conversion to native instructions -+ * allows the post-register-allocation scheduler the best information -+ * possible. -+ */ -+void -+fs_visitor::lower_uniform_pull_constant_loads() -+{ -+ foreach_list(node, &this->instructions) { -+ fs_inst *inst = (fs_inst *)node; -+ -+ if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) -+ continue; -+ -+ if (intel->gen >= 7) { -+ fs_reg const_offset_reg = inst->src[1]; -+ assert(const_offset_reg.file == IMM && -+ const_offset_reg.type == BRW_REGISTER_TYPE_UD); -+ const_offset_reg.imm.u /= 16; -+ fs_reg payload = fs_reg(this, glsl_type::uint_type); -+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0), -+ BRW_REGISTER_TYPE_UD); -+ -+ fs_inst *setup1 = MOV(payload, fs_reg(g0)); -+ setup1->force_writemask_all = true; -+ /* We don't need the second half of this vgrf to be filled with g1 -+ * in the 16-wide case, but if we use force_uncompressed then live -+ * variable analysis won't consider this a def! -+ */ -+ -+ fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET, -+ payload, payload, -+ const_offset_reg); -+ -+ setup1->ir = inst->ir; -+ setup1->annotation = inst->annotation; -+ inst->insert_before(setup1); -+ setup2->ir = inst->ir; -+ setup2->annotation = inst->annotation; -+ inst->insert_before(setup2); -+ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; -+ inst->src[1] = payload; -+ } else { -+ /* Before register allocation, we didn't tell the scheduler about the -+ * MRF we use. We know it's safe to use this MRF because nothing -+ * else does except for register spill/unspill, which generates and -+ * uses its MRF within a single IR instruction. -+ */ -+ inst->base_mrf = 14; -+ inst->mlen = 1; -+ } -+ } -+} -+ - void - fs_visitor::dump_instruction(fs_inst *inst) - { -@@ -2500,6 +2778,8 @@ fs_visitor::run() - - schedule_instructions(false); - -+ lower_uniform_pull_constant_loads(); -+ - assign_curb_setup(); - assign_urb_setup(); - -@@ -2522,6 +2802,12 @@ fs_visitor::run() - assert(force_uncompressed_stack == 0); - assert(force_sechalf_stack == 0); - -+ /* This must come after all optimization and register allocation, since -+ * it inserts dead code that happens to have side effects, and it does -+ * so based on the actual physical registers in use. -+ */ -+ insert_gen4_send_dependency_workarounds(); -+ - if (failed) - return false; - -diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h -index 88fecb9..d1bb111 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs.h -+++ b/src/mesa/drivers/dri/i965/brw_fs.h -@@ -285,6 +285,7 @@ public: - fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition); - fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, - uint32_t condition); -+ fs_inst *DEP_RESOLVE_MOV(int grf); - - int type_size(const struct glsl_type *type); - fs_inst *get_instruction_generating_reg(fs_inst *start, -@@ -329,7 +330,11 @@ public: - bool remove_duplicate_mrf_writes(); - bool virtual_grf_interferes(int a, int b); - void schedule_instructions(bool post_reg_alloc); -+ void insert_gen4_send_dependency_workarounds(); -+ void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst); -+ void insert_gen4_post_send_dependency_workarounds(fs_inst *inst); - void fail(const char *msg, ...); -+ void lower_uniform_pull_constant_loads(); - - void push_force_uncompressed(); - void pop_force_uncompressed(); -diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp -index c4ec1d9..194ed07 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp -+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp -@@ -223,7 +223,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) - inst->src[arg].file = entry->src.file; - inst->src[arg].reg = entry->src.reg; - inst->src[arg].reg_offset = entry->src.reg_offset; -- inst->src[arg].smear = entry->src.smear; -+ if (entry->src.smear != -1) -+ inst->src[arg].smear = entry->src.smear; - - if (!inst->src[arg].abs) { - inst->src[arg].abs = entry->src.abs; -diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp -index 70c143a..a13ca36 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp -+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp -@@ -105,7 +105,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) - /* Match current instruction's expression against those in AEB. */ - if (inst->opcode == entry->generator->opcode && - inst->saturate == entry->generator->saturate && -- operands_match(entry->generator->src, inst->src)) { -+ inst->dst.type == entry->generator->dst.type && -+ operands_match(entry->generator->src, inst->src)) { - - found = true; - progress = true; -diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp -index 45072da..365a2ec 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp -+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp -@@ -604,29 +604,8 @@ fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst) - { - assert(inst->mlen != 0); - -- /* Clear any post destination dependencies that would be ignored by -- * the block read. See the B-Spec for pre-gen5 send instruction. -- * -- * This could use a better solution, since texture sampling and -- * math reads could potentially run into it as well -- anywhere -- * that we have a SEND with a destination that is a register that -- * was written but not read within the last N instructions (what's -- * N? unsure). This is rare because of dead code elimination, but -- * not impossible. -- */ -- if (intel->gen == 4 && !intel->is_g4x) -- brw_MOV(p, brw_null_reg(), dst); -- - brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, - inst->offset); -- -- if (intel->gen == 4 && !intel->is_g4x) { -- /* gen4 errata: destination from a send can't be used as a -- * destination until it's been read. Just read it so we don't -- * have to worry. -- */ -- brw_MOV(p, brw_null_reg(), dst); -- } - } - - void -@@ -637,19 +616,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, - { - assert(inst->mlen != 0); - -- /* Clear any post destination dependencies that would be ignored by -- * the block read. See the B-Spec for pre-gen5 send instruction. -- * -- * This could use a better solution, since texture sampling and -- * math reads could potentially run into it as well -- anywhere -- * that we have a SEND with a destination that is a register that -- * was written but not read within the last N instructions (what's -- * N? unsure). This is rare because of dead code elimination, but -- * not impossible. -- */ -- if (intel->gen == 4 && !intel->is_g4x) -- brw_MOV(p, brw_null_reg(), dst); -- - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; -@@ -660,14 +626,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, - - brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), - read_offset, surf_index); -- -- if (intel->gen == 4 && !intel->is_g4x) { -- /* gen4 errata: destination from a send can't be used as a -- * destination until it's been read. Just read it so we don't -- * have to worry. -- */ -- brw_MOV(p, brw_null_reg(), dst); -- } - } - - void -diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp -index d4f6fc9..573921c 100644 ---- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp -+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp -@@ -597,31 +597,9 @@ fs_visitor::visit(ir_expression *ir) - fs_reg packed_consts = fs_reg(this, glsl_type::float_type); - packed_consts.type = result.type; - -- if (intel->gen >= 7) { -- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16); -- fs_reg payload = fs_reg(this, glsl_type::uint_type); -- struct brw_reg g0 = retype(brw_vec8_grf(0, 0), -- BRW_REGISTER_TYPE_UD); -- fs_inst *setup = emit(MOV(payload, fs_reg(g0))); -- setup->force_writemask_all = true; -- /* We don't need the second half of this vgrf to be filled with g1 -- * in the 16-wide case, but if we use force_uncompressed then live -- * variable analysis won't consider this a def! -- */ -- -- emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload, -- payload, const_offset_reg); -- emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts, -- surf_index, payload); -- } else { -- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]); -- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, -- packed_consts, -- surf_index, -- const_offset_reg)); -- pull->base_mrf = 14; -- pull->mlen = 1; -- } -+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); -+ emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, -+ packed_consts, surf_index, const_offset_reg)); - - packed_consts.smear = const_offset->value.u[0] % 16 / 4; - for (int i = 0; i < ir->type->vector_elements; i++) { -diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c -index 3d53843..48635c5 100644 ---- a/src/mesa/drivers/dri/i965/brw_vs_constval.c -+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c -@@ -238,6 +238,23 @@ static void calc_wm_input_sizes( struct brw_context *brw ) - - calc_sizes(&t); - -+ /* _NEW_POINT -+ * -+ * If the SF will be replacing the vertex output with a reference to -+ * gl_PointCoord, then tell the fragment shader that the value actually -+ * does vary. -+ */ -+ if (ctx->Point.PointSprite) { -+ for (int i = 0; i < 8; i++) { -+ if (ctx->Point.CoordReplace[i]) { -+ t.size_masks[4-1] |= FRAG_BIT_TEX(i); -+ t.size_masks[3-1] |= FRAG_BIT_TEX(i); -+ t.size_masks[2-1] |= FRAG_BIT_TEX(i); -+ t.size_masks[1-1] |= FRAG_BIT_TEX(i); -+ } -+ } -+ } -+ - if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) { - memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); - brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; -@@ -246,7 +263,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) - - const struct brw_tracked_state brw_wm_input_sizes = { - .dirty = { -- .mesa = _NEW_LIGHT | _NEW_PROGRAM, -+ .mesa = _NEW_LIGHT | _NEW_PROGRAM | _NEW_POINT, - .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, - .cache = 0 - }, -diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h -index 9c00ba8..885f6c2 100644 ---- a/src/mesa/drivers/dri/intel/intel_chipset.h -+++ b/src/mesa/drivers/dri/intel/intel_chipset.h -@@ -114,15 +114,15 @@ - #define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ - #define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A - #define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A --#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */ --#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22 --#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32 --#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */ --#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 --#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 --#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */ --#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A --#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A -+#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ -+#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 -+#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D22 -+#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ -+#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 -+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26 -+#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ -+#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A -+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A - - #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ - devid == PCI_CHIP_I915_GM || \ -diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c -index a951283..6d91534 100644 ---- a/src/mesa/main/attrib.c -+++ b/src/mesa/main/attrib.c -@@ -130,6 +130,9 @@ struct gl_enable_attrib - GLboolean VertexProgramPointSize; - GLboolean VertexProgramTwoSide; - -+ /* GL_ARB_fragment_program */ -+ GLboolean FragmentProgram; -+ - /* GL_ARB_point_sprite / GL_NV_point_sprite */ - GLboolean PointSprite; - GLboolean FragmentShaderATI; -@@ -316,6 +319,10 @@ _mesa_PushAttrib(GLbitfield mask) - attr->VertexProgram = ctx->VertexProgram.Enabled; - attr->VertexProgramPointSize = ctx->VertexProgram.PointSizeEnabled; - attr->VertexProgramTwoSide = ctx->VertexProgram.TwoSideEnabled; -+ -+ /* GL_ARB_fragment_program */ -+ attr->FragmentProgram = ctx->FragmentProgram.Enabled; -+ - save_attrib_data(&head, GL_ENABLE_BIT, attr); - - /* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */ -@@ -607,6 +614,11 @@ pop_enable_group(struct gl_context *ctx, const struct gl_enable_attrib *enable) - enable->VertexProgramTwoSide, - GL_VERTEX_PROGRAM_TWO_SIDE_ARB); - -+ /* GL_ARB_fragment_program */ -+ TEST_AND_UPDATE(ctx->FragmentProgram.Enabled, -+ enable->FragmentProgram, -+ GL_FRAGMENT_PROGRAM_ARB); -+ - /* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */ - TEST_AND_UPDATE(ctx->Color.sRGBEnabled, enable->sRGBEnabled, - GL_FRAMEBUFFER_SRGB); -diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c -index 5e9e539..df57b76 100644 ---- a/src/mesa/main/context.c -+++ b/src/mesa/main/context.c -@@ -1072,7 +1072,6 @@ _mesa_initialize_context(struct gl_context *ctx, - case API_OPENGLES2: - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; -- ctx->Point.PointSprite = GL_TRUE; /* always on for ES 2.x */ - break; - } - -diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c -index 8728540..c1e1658 100644 ---- a/src/mesa/main/glformats.c -+++ b/src/mesa/main/glformats.c -@@ -917,7 +917,7 @@ _mesa_is_compressed_format(struct gl_context *ctx, GLenum format) - case GL_COMPRESSED_SIGNED_RG11_EAC: - case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: - case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: -- return _mesa_is_gles3(ctx); -+ return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility; - case GL_PALETTE4_RGB8_OES: - case GL_PALETTE4_RGBA8_OES: - case GL_PALETTE4_R5_G6_B5_OES: -diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c -index 1778640..c925d4c 100644 ---- a/src/mesa/main/points.c -+++ b/src/mesa/main/points.c -@@ -253,7 +253,8 @@ _mesa_init_point(struct gl_context *ctx) - * In a core context, the state will default to true, and the setters and - * getters are disabled. - */ -- ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE); -+ ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE || -+ ctx->API == API_OPENGLES2); - - ctx->Point.SpriteRMode = GL_ZERO; /* GL_NV_point_sprite (only!) */ - ctx->Point.SpriteOrigin = GL_UPPER_LEFT; /* GL_ARB_point_sprite */ -diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c -index d1723b8..1b9525b 100644 ---- a/src/mesa/main/teximage.c -+++ b/src/mesa/main/teximage.c -@@ -520,7 +520,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat ) - } - } - -- if (_mesa_is_gles3(ctx)) { -+ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { - switch (internalFormat) { - case GL_COMPRESSED_RGB8_ETC2: - case GL_COMPRESSED_SRGB8_ETC2: -@@ -3187,6 +3187,12 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image) - return; - } - -+ if (!image) { -+ _mesa_error(ctx, GL_INVALID_OPERATION, -+ "glEGLImageTargetTexture2D(image=%p)", image); -+ return; -+ } -+ - if (ctx->NewState & _NEW_PIXEL) - _mesa_update_state(ctx); - -diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c -index 52ede13..6f18ec6 100644 ---- a/src/mesa/main/texparam.c -+++ b/src/mesa/main/texparam.c -@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) - *params = (GLfloat) obj->Immutable; - break; - -+ case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES: -+ if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external) -+ goto invalid_pname; -+ *params = obj->RequiredTextureImageUnits; -+ break; -+ - case GL_TEXTURE_SRGB_DECODE_EXT: - if (!ctx->Extensions.EXT_texture_sRGB_decode) - goto invalid_pname; -diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c -index f20df9e..7fdfa72 100644 ---- a/src/mesa/state_tracker/st_atom_rasterizer.c -+++ b/src/mesa/state_tracker/st_atom_rasterizer.c -@@ -135,16 +135,12 @@ static void update_raster_state( struct st_context *st ) - - /* _NEW_POLYGON - */ -- if (ctx->Polygon.OffsetUnits != 0.0 || -- ctx->Polygon.OffsetFactor != 0.0) { -- raster->offset_point = ctx->Polygon.OffsetPoint; -- raster->offset_line = ctx->Polygon.OffsetLine; -- raster->offset_tri = ctx->Polygon.OffsetFill; -- } -- - if (ctx->Polygon.OffsetPoint || - ctx->Polygon.OffsetLine || - ctx->Polygon.OffsetFill) { -+ raster->offset_point = ctx->Polygon.OffsetPoint; -+ raster->offset_line = ctx->Polygon.OffsetLine; -+ raster->offset_tri = ctx->Polygon.OffsetFill; - raster->offset_units = ctx->Polygon.OffsetUnits; - raster->offset_scale = ctx->Polygon.OffsetFactor; - } -diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c -index 63dbdb2..36fffe9 100644 ---- a/src/mesa/state_tracker/st_cb_bitmap.c -+++ b/src/mesa/state_tracker/st_cb_bitmap.c -@@ -675,11 +675,12 @@ st_flush_bitmap_cache(struct st_context *st) - * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. - */ - static GLboolean --accum_bitmap(struct st_context *st, -+accum_bitmap(struct gl_context *ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap ) - { -+ struct st_context *st = ctx->st; - struct bitmap_cache *cache = st->bitmap.cache; - int px = -999, py = -999; - const GLfloat z = st->ctx->Current.RasterPos[2]; -@@ -729,9 +730,17 @@ accum_bitmap(struct st_context *st, - /* create the transfer if needed */ - create_cache_trans(st); - -+ /* PBO source... */ -+ bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap); -+ if (!bitmap) { -+ return FALSE; -+ } -+ - unpack_bitmap(st, px, py, width, height, unpack, bitmap, - cache->buffer, BITMAP_CACHE_WIDTH); - -+ _mesa_unmap_pbo_source(ctx, unpack); -+ - return GL_TRUE; /* accumulated */ - } - -@@ -764,7 +773,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, - semantic_indexes); - } - -- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) -+ if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap)) - return; - - pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); -diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c -index de62264..bff8d9b 100644 ---- a/src/mesa/state_tracker/st_draw.c -+++ b/src/mesa/state_tracker/st_draw.c -@@ -283,7 +283,7 @@ st_draw_vbo(struct gl_context *ctx, - /* don't trim, restarts might be inside index list */ - cso_draw_vbo(st->cso_context, &info); - } -- else if (u_trim_pipe_prim(info.mode, &info.count)) -+ else if (u_trim_pipe_prim(prims[i].mode, &info.count)) - cso_draw_vbo(st->cso_context, &info); - } - -diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c -index a9111b5..f56f7cb 100644 ---- a/src/mesa/state_tracker/st_program.c -+++ b/src/mesa/state_tracker/st_program.c -@@ -1142,7 +1142,7 @@ st_print_shaders(struct gl_context *ctx) - static void - destroy_program_variants(struct st_context *st, struct gl_program *program) - { -- if (!program) -+ if (!program || program == &_mesa_DummyProgram) - return; - - switch (program->Target) { diff --git a/mesa-9.1.1-53-g3cff41c.patch b/mesa-9.1.1-53-g3cff41c.patch new file mode 100644 index 0000000..d6b302a --- /dev/null +++ b/mesa-9.1.1-53-g3cff41c.patch @@ -0,0 +1,2242 @@ +diff --git a/configure.ac b/configure.ac +index 4a98996..1c9d606 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -452,6 +452,9 @@ if test "x$enable_asm" = xyes; then + linux* | *freebsd* | dragonfly* | *netbsd*) + test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86 + ;; ++ gnu*) ++ asm_arch=x86 ++ ;; + esac + ;; + x86_64) +@@ -826,20 +829,6 @@ if test "x$enable_dri" = xyes; then + fi + fi + +-dnl Find out if X is available. +-PKG_CHECK_MODULES([X11], [x11], [no_x=no], [no_x=yes]) +- +-dnl Try to tell the user that the --x-* options are only used when +-dnl pkg-config is not available. This must be right after AC_PATH_XTRA. +-m4_divert_once([HELP_BEGIN], +-[These options are only used when the X libraries cannot be found by the +-pkg-config utility.]) +- +-dnl We need X for xlib and dri, so bomb now if it's not found +-if test "x$enable_glx" = xyes -a "x$no_x" = xyes; then +- AC_MSG_ERROR([X11 development libraries needed for GLX]) +-fi +- + dnl Direct rendering or just indirect rendering + case "$host_os" in + gnu*) +diff --git a/docs/relnotes-9.1.1.html b/docs/relnotes-9.1.1.html +index 8921c8f..a73c974 100644 +--- a/docs/relnotes-9.1.1.html ++++ b/docs/relnotes-9.1.1.html +@@ -30,6 +30,9 @@ because GL_ARB_compatibility is not supported. + + MD5 checksums
+++6508d9882d8dce7106717f365632700c MesaLib-9.1.1.tar.gz ++6ea2bdc3b7ecfb4257b39814b4182580 MesaLib-9.1.1.tar.bz2 ++3434c0eb47849a08c53cd32833d10d13 MesaLib-9.1.1.zip +
+ +New features
+diff --git a/include/c99_compat.h b/include/c99_compat.h +new file mode 100644 +index 0000000..3a9f502 +--- /dev/null ++++ b/include/c99_compat.h +@@ -0,0 +1,147 @@ ++/************************************************************************** ++ * ++ * Copyright 2007-2013 VMware, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++ ++#ifndef _C99_COMPAT_H_ ++#define _C99_COMPAT_H_ ++ ++ ++/* ++ * MSVC hacks. ++ */ ++#if defined(_MSC_VER) ++ /* ++ * Visual Studio 2012 will complain if we define the `inline` keyword, but ++ * actually it only supports the keyword on C++. ++ * ++ * We could skip this check by defining _ALLOW_KEYWORD_MACROS, but there is ++ * probably value in checking this for other keywords. So simply include ++ * the checking before we define it below. ++ */ ++# if _MSC_VER >= 1700 ++# include++# endif ++ ++ /* ++ * XXX: MSVC has a `__restrict` keyword, but it also has a ++ * `__declspec(restrict)` modifier, so it is impossible to define a ++ * `restrict` macro without interfering with the latter. Furthermore the ++ * MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT ++ * macro. For now resolve this issue by redefining _CRTRESTRICT, but going ++ * forward we should probably should stop using restrict, especially ++ * considering that our code does not obbey strict aliasing rules any way. ++ */ ++# include ++# undef _CRTRESTRICT ++# define _CRTRESTRICT ++#endif ++ ++ ++/* ++ * C99 inline keyword ++ */ ++#ifndef inline ++# ifdef __cplusplus ++ /* C++ supports inline keyword */ ++# elif defined(__GNUC__) ++# define inline __inline__ ++# elif defined(_MSC_VER) ++# define inline __inline ++# elif defined(__ICL) ++# define inline __inline ++# elif defined(__INTEL_COMPILER) ++ /* Intel compiler supports inline keyword */ ++# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) ++# define inline __inline ++# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) ++ /* C99 supports inline keyword */ ++# elif (__STDC_VERSION__ >= 199901L) ++ /* C99 supports inline keyword */ ++# else ++# define inline ++# endif ++#endif ++ ++ ++/* ++ * C99 restrict keyword ++ * ++ * See also: ++ * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html ++ */ ++#ifndef restrict ++# if (__STDC_VERSION__ >= 199901L) ++ /* C99 */ ++# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) ++ /* C99 */ ++# elif defined(__GNUC__) ++# define restrict __restrict__ ++# elif defined(_MSC_VER) ++# define restrict __restrict ++# else ++# define restrict /* */ ++# endif ++#endif ++ ++ ++/* ++ * C99 __func__ macro ++ */ ++#ifndef __func__ ++# if (__STDC_VERSION__ >= 199901L) ++ /* C99 */ ++# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) ++ /* C99 */ ++# elif defined(__GNUC__) ++# if __GNUC__ >= 2 ++# define __func__ __FUNCTION__ ++# else ++# define __func__ " " ++# endif ++# elif defined(_MSC_VER) ++# if _MSC_VER >= 1300 ++# define __func__ __FUNCTION__ ++# else ++# define __func__ " " ++# endif ++# else ++# define __func__ " " ++# endif ++#endif ++ ++ ++/* Simple test case for debugging */ ++#if 0 ++static inline const char * ++test_c99_compat_h(const void * restrict a, ++ const void * restrict b) ++{ ++ return __func__; ++} ++#endif ++ ++ ++#endif /* _C99_COMPAT_H_ */ +diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h +index 9823693..2499172 100644 +--- a/src/egl/main/eglcompiler.h ++++ b/src/egl/main/eglcompiler.h +@@ -31,6 +31,9 @@ + #define EGLCOMPILER_INCLUDED + + ++#include "c99_compat.h" /* inline, __func__, etc. */ ++ ++ + /** + * Get standard integer types + */ +@@ -62,30 +65,7 @@ + #endif + + +-/** +- * Function inlining +- */ +-#ifndef inline +-# ifdef __cplusplus +- /* C++ supports inline keyword */ +-# elif defined(__GNUC__) +-# define inline __inline__ +-# elif defined(_MSC_VER) +-# define inline __inline +-# elif defined(__ICL) +-# define inline __inline +-# elif defined(__INTEL_COMPILER) +- /* Intel compiler supports inline keyword */ +-# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +-# define inline __inline +-# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +- /* C99 supports inline keyword */ +-# elif (__STDC_VERSION__ >= 199901L) +- /* C99 supports inline keyword */ +-# else +-# define inline +-# endif +-#endif ++/* XXX: Use standard `inline` keyword instead */ + #ifndef INLINE + # define INLINE inline + #endif +@@ -104,21 +84,9 @@ + # endif + #endif + +-/** +- * The __FUNCTION__ gcc variable is generally only used for debugging. +- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here. +- * Don't define it if using a newer Windows compiler. +- */ ++/* XXX: Use standard `__func__` instead */ + #ifndef __FUNCTION__ +-# if (!defined __GNUC__) && (!defined __xlC__) && \ +- (!defined(_MSC_VER) || _MSC_VER < 1300) +-# if (__STDC_VERSION__ >= 199901L) /* C99 */ || \ +- (defined(__SUNPRO_C) && defined(__C99FEATURES__)) +-# define __FUNCTION__ __func__ +-# else +-# define __FUNCTION__ " " +-# endif +-# endif ++# define __FUNCTION__ __func__ + #endif + + #endif /* EGLCOMPILER_INCLUDED */ +diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am +index a4eee47..f14279b 100644 +--- a/src/gallium/auxiliary/Makefile.am ++++ b/src/gallium/auxiliary/Makefile.am +@@ -7,7 +7,10 @@ noinst_LTLIBRARIES = libgallium.la + + AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/auxiliary/util \ +- $(GALLIUM_CFLAGS) ++ $(GALLIUM_CFLAGS) \ ++ $(VISIBILITY_CFLAGS) ++ ++AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) + + libgallium_la_SOURCES = \ + $(C_SOURCES) \ +@@ -18,7 +21,7 @@ if HAVE_MESA_LLVM + AM_CFLAGS += \ + $(LLVM_CFLAGS) + +-AM_CXXFLAGS = \ ++AM_CXXFLAGS += \ + $(GALLIUM_CFLAGS) \ + $(LLVM_CXXFLAGS) + +diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +index 4898849..5fb4a11 100644 +--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h ++++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +@@ -240,6 +240,7 @@ struct lp_exec_mask { + struct lp_build_context *bld; + + boolean has_mask; ++ boolean ret_in_main; + + LLVMTypeRef int_vec_type; + +diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +index 0621fb4..413a918 100644 +--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c ++++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +@@ -73,6 +73,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context + + mask->bld = bld; + mask->has_mask = FALSE; ++ mask->ret_in_main = FALSE; + mask->cond_stack_size = 0; + mask->loop_stack_size = 0; + mask->call_stack_size = 0; +@@ -108,7 +109,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) + } else + mask->exec_mask = mask->cond_mask; + +- if (mask->call_stack_size) { ++ if (mask->call_stack_size || mask->ret_in_main) { + mask->exec_mask = LLVMBuildAnd(builder, + mask->exec_mask, + mask->ret_mask, +@@ -117,7 +118,8 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) + + mask->has_mask = (mask->cond_stack_size > 0 || + mask->loop_stack_size > 0 || +- mask->call_stack_size > 0); ++ mask->call_stack_size > 0 || ++ mask->ret_in_main); + } + + static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, +@@ -348,11 +350,23 @@ static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) + LLVMBuilderRef builder = mask->bld->gallivm->builder; + LLVMValueRef exec_mask; + +- if (mask->call_stack_size == 0) { ++ if (mask->cond_stack_size == 0 && ++ mask->loop_stack_size == 0 && ++ mask->call_stack_size == 0) { + /* returning from main() */ + *pc = -1; + return; + } ++ ++ if (mask->call_stack_size == 0) { ++ /* ++ * This requires special handling since we need to ensure ++ * we don't drop the mask even if we have no call stack ++ * (e.g. after a ret in a if clause after the endif) ++ */ ++ mask->ret_in_main = TRUE; ++ } ++ + exec_mask = LLVMBuildNot(builder, + mask->exec_mask, + "ret"); +diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c +index 1267e79..dc3a5fb 100644 +--- a/src/gallium/auxiliary/tgsi/tgsi_text.c ++++ b/src/gallium/auxiliary/tgsi/tgsi_text.c +@@ -1569,7 +1569,7 @@ tgsi_text_translate( + struct tgsi_token *tokens, + uint num_tokens ) + { +- struct translate_ctx ctx; ++ struct translate_ctx ctx = {0}; + + ctx.text = text; + ctx.cur = text; +diff --git a/src/gallium/drivers/Makefile.am b/src/gallium/drivers/Makefile.am +index 25d9533..3477fee 100644 +--- a/src/gallium/drivers/Makefile.am ++++ b/src/gallium/drivers/Makefile.am +@@ -1,6 +1,7 @@ + AUTOMAKE_OPTIONS = subdir-objects + + AM_CPPFLAGS = \ ++ -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ +diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c +index 328c0f7..e145391 100644 +--- a/src/gallium/drivers/llvmpipe/lp_scene.c ++++ b/src/gallium/drivers/llvmpipe/lp_scene.c +@@ -64,6 +64,28 @@ lp_scene_create( struct pipe_context *pipe ) + + pipe_mutex_init(scene->mutex); + ++#ifdef DEBUG ++ /* Do some scene limit sanity checks here */ ++ { ++ size_t maxBins = TILES_X * TILES_Y; ++ size_t maxCommandBytes = sizeof(struct cmd_block) * maxBins; ++ size_t maxCommandPlusData = maxCommandBytes + DATA_BLOCK_SIZE; ++ /* We'll need at least one command block per bin. Make sure that's ++ * less than the max allowed scene size. ++ */ ++ assert(maxCommandBytes < LP_SCENE_MAX_SIZE); ++ /* We'll also need space for at least one other data block */ ++ assert(maxCommandPlusData <= LP_SCENE_MAX_SIZE); ++ ++ /* Ideally, the size of a cmd_block object will be a power of two ++ * in order to avoid wasting space when we allocation them from ++ * data blocks (which are power of two also). ++ */ ++ assert(sizeof(struct cmd_block) == ++ util_next_power_of_two(sizeof(struct cmd_block))); ++ } ++#endif ++ + return scene; + } + +diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h +index b1db61b..801829d 100644 +--- a/src/gallium/drivers/llvmpipe/lp_scene.h ++++ b/src/gallium/drivers/llvmpipe/lp_scene.h +@@ -49,12 +49,18 @@ struct lp_rast_state; + #define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE) + + +-#define CMD_BLOCK_MAX 128 ++/* Commands per command block (ideally so sizeof(cmd_block) is a power of ++ * two in size.) ++ */ ++#define CMD_BLOCK_MAX 29 ++ ++/* Bytes per data block. ++ */ + #define DATA_BLOCK_SIZE (64 * 1024) + + /* Scene temporary storage is clamped to this size: + */ +-#define LP_SCENE_MAX_SIZE (4*1024*1024) ++#define LP_SCENE_MAX_SIZE (9*1024*1024) + + /* The maximum amount of texture storage referenced by a scene is + * clamped ot this size: +diff --git a/src/gallium/drivers/nv50/nv50_blit.h b/src/gallium/drivers/nv50/nv50_blit.h +index d409f21..bdd6a63 100644 +--- a/src/gallium/drivers/nv50/nv50_blit.h ++++ b/src/gallium/drivers/nv50/nv50_blit.h +@@ -180,4 +180,44 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info) + return mask; + } + ++#if NOUVEAU_DRIVER == 0xc0 ++# define nv50_format_table nvc0_format_table ++#endif ++ ++/* return TRUE for formats that can be converted among each other by NVC0_2D */ ++static INLINE boolean ++nv50_2d_dst_format_faithful(enum pipe_format format) ++{ ++ const uint64_t mask = ++ NV50_ENG2D_SUPPORTED_FORMATS & ++ ~NV50_ENG2D_NOCONVERT_FORMATS; ++ uint8_t id = nv50_format_table[format].rt; ++ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); ++} ++static INLINE boolean ++nv50_2d_src_format_faithful(enum pipe_format format) ++{ ++ const uint64_t mask = ++ NV50_ENG2D_SUPPORTED_FORMATS & ++ ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS); ++ uint8_t id = nv50_format_table[format].rt; ++ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0))); ++} ++ ++static INLINE boolean ++nv50_2d_format_supported(enum pipe_format format) ++{ ++ uint8_t id = nv50_format_table[format].rt; ++ return (id >= 0xc0) && ++ (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); ++} ++ ++static INLINE boolean ++nv50_2d_dst_format_ops_supported(enum pipe_format format) ++{ ++ uint8_t id = nv50_format_table[format].rt; ++ return (id >= 0xc0) && ++ (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0))); ++} ++ + #endif /* __NV50_BLIT_H__ */ +diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c +index a95e96d..f5e7b36 100644 +--- a/src/gallium/drivers/nv50/nv50_state_validate.c ++++ b/src/gallium/drivers/nv50/nv50_state_validate.c +@@ -9,6 +9,7 @@ nv50_validate_fb(struct nv50_context *nv50) + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i; + unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; ++ uint32_t array_size = 0xffff, array_mode = 0; + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + +@@ -23,6 +24,13 @@ nv50_validate_fb(struct nv50_context *nv50) + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]); + struct nouveau_bo *bo = mt->base.bo; + ++ array_size = MIN2(array_size, sf->depth); ++ if (mt->layout_3d) ++ array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */ ++ ++ /* can't mix 3D with ARRAY or have RTs of different depth/array_size */ ++ assert(mt->layout_3d || !array_mode || array_size == 1); ++ + BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5); + PUSH_DATAh(push, bo->offset + sf->offset); + PUSH_DATA (push, bo->offset + sf->offset); +@@ -34,7 +42,7 @@ nv50_validate_fb(struct nv50_context *nv50) + PUSH_DATA (push, sf->width); + PUSH_DATA (push, sf->height); + BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1); +- PUSH_DATA (push, sf->depth); ++ PUSH_DATA (push, array_mode | array_size); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); +@@ -63,7 +71,7 @@ nv50_validate_fb(struct nv50_context *nv50) + struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture); + struct nv50_surface *sf = nv50_surface(fb->zsbuf); + struct nouveau_bo *bo = mt->base.bo; +- int unk = mt->base.base.target == PIPE_TEXTURE_2D; ++ int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1; + + BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5); + PUSH_DATAh(push, bo->offset + sf->offset); +diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c +index 7a0470c..3a780f6 100644 +--- a/src/gallium/drivers/nv50/nv50_surface.c ++++ b/src/gallium/drivers/nv50/nv50_surface.c +@@ -35,25 +35,22 @@ + + #include "nv50_context.h" + #include "nv50_resource.h" +-#include "nv50_blit.h" + + #include "nv50_defs.xml.h" + #include "nv50_texture.xml.h" + ++/* these are used in nv50_blit.h */ + #define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL ++#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL ++#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL ++#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL ++#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL + +-/* return TRUE for formats that can be converted among each other by NV50_2D */ +-static INLINE boolean +-nv50_2d_format_faithful(enum pipe_format format) +-{ +- uint8_t id = nv50_format_table[format].rt; +- +- return (id >= 0xc0) && +- (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); +-} ++#define NOUVEAU_DRIVER 0x50 ++#include "nv50_blit.h" + + static INLINE uint8_t +-nv50_2d_format(enum pipe_format format) ++nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) + { + uint8_t id = nv50_format_table[format].rt; + +@@ -62,6 +59,7 @@ nv50_2d_format(enum pipe_format format) + */ + if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)))) + return id; ++ assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: +@@ -78,7 +76,7 @@ nv50_2d_format(enum pipe_format format) + static int + nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, +- enum pipe_format pformat) ++ enum pipe_format pformat, boolean dst_src_pformat_equal) + { + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; +@@ -86,7 +84,7 @@ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst, + uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + +- format = nv50_2d_format(pformat); ++ format = nv50_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); +@@ -155,15 +153,16 @@ nv50_2d_texture_do_copy(struct nouveau_pushbuf *push, + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; ++ boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + +- ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt); ++ ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + +- ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt); ++ ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + +@@ -243,8 +242,8 @@ nv50_resource_copy_region(struct pipe_context *pipe, + } + + assert((src->format == dst->format) || +- (nv50_2d_format_faithful(src->format) && +- nv50_2d_format_faithful(dst->format))); ++ (nv50_2d_src_format_faithful(src->format) && ++ nv50_2d_dst_format_faithful(dst->format))); + + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR); +@@ -936,7 +935,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) + nv50_blit_select_fp(blit, info); + nv50_blitctx_pre_blit(blit); + +- nv50_blit_set_dst(blit, dst, info->dst.level, 0, info->dst.format); ++ nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + +@@ -977,6 +976,8 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) + + BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); + PUSH_DATA (push, 0); ++ BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); ++ PUSH_DATA (push, 0x1); + + /* Draw a large triangle in screen coordinates covering the whole + * render target, with scissors defining the destination region. +@@ -1059,7 +1060,8 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) + int64_t du_dx, dv_dy; + int i; + uint32_t mode; +- const uint32_t mask = nv50_blit_eng2d_get_mask(info); ++ uint32_t mask = nv50_blit_eng2d_get_mask(info); ++ boolean b; + + mode = nv50_blit_get_filter(info) ? + NV50_2D_BLIT_CONTROL_FILTER_BILINEAR : +@@ -1070,8 +1072,9 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + +- nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format); +- nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format); ++ b = info->dst.format == info->src.format; ++ nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); ++ nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NV04(push, NV50_2D(CLIP_X), 5); +@@ -1094,6 +1097,17 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info) + PUSH_DATA (push, 0xffffffff); + BEGIN_NV04(push, NV50_2D(OPERATION), 1); + PUSH_DATA (push, NV50_2D_OPERATION_ROP); ++ } else ++ if (info->src.format != info->dst.format) { ++ if (info->src.format == PIPE_FORMAT_R8_UNORM || ++ info->src.format == PIPE_FORMAT_R16_UNORM || ++ info->src.format == PIPE_FORMAT_R16_FLOAT || ++ info->src.format == PIPE_FORMAT_R32_FLOAT) { ++ mask = 0xffff0000; /* also makes condition for OPERATION reset true */ ++ BEGIN_NV04(push, NV50_2D(BETA4), 2); ++ PUSH_DATA (push, mask); ++ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT); ++ } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { +@@ -1224,10 +1238,25 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + +- if (!eng3d && info->dst.format != info->src.format) +- if (!nv50_2d_format_faithful(info->dst.format) || +- !nv50_2d_format_faithful(info->src.format)) ++ if (!eng3d && info->dst.format != info->src.format) { ++ if (!nv50_2d_dst_format_faithful(info->dst.format) || ++ !nv50_2d_src_format_faithful(info->src.format)) { + eng3d = TRUE; ++ } else ++ if (!nv50_2d_src_format_faithful(info->src.format)) { ++ if (!util_format_is_luminance(info->src.format)) { ++ if (util_format_is_intensity(info->src.format)) ++ eng3d = TRUE; ++ else ++ if (!nv50_2d_dst_format_ops_supported(info->dst.format)) ++ eng3d = TRUE; ++ else ++ eng3d = !nv50_2d_format_supported(info->src.format); ++ } ++ } else ++ if (util_format_is_luminance_alpha(info->src.format)) ++ eng3d = TRUE; ++ } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) +diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h +index 1cf1f96..bd3de58 100644 +--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h ++++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h +@@ -1041,7 +1041,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c + + #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c +-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001 ++#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1 0x00000001 + #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006 + #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1 + #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000 +diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c +index 281d740..66154a4 100644 +--- a/src/gallium/drivers/nvc0/nvc0_surface.c ++++ b/src/gallium/drivers/nvc0/nvc0_surface.c +@@ -36,29 +36,32 @@ + + #include "nv50/nv50_defs.xml.h" + #include "nv50/nv50_texture.xml.h" +-#include "nv50/nv50_blit.h" + +-#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL ++/* these are used in nv50_blit.h */ ++#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL ++#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL ++#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL ++#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL ++#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL + +-/* return TRUE for formats that can be converted among each other by NVC0_2D */ +-static INLINE boolean +-nvc0_2d_format_faithful(enum pipe_format format) +-{ +- uint8_t id = nvc0_format_table[format].rt; +- +- return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))); +-} ++#define NOUVEAU_DRIVER 0xc0 ++#include "nv50/nv50_blit.h" + + static INLINE uint8_t +-nvc0_2d_format(enum pipe_format format) ++nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal) + { + uint8_t id = nvc0_format_table[format].rt; + ++ /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */ ++ if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal) ++ return NV50_SURFACE_FORMAT_A8_UNORM; ++ + /* Hardware values for color formats range from 0xc0 to 0xff, + * but the 2D engine doesn't support all of them. + */ +- if (nvc0_2d_format_faithful(format)) ++ if (nv50_2d_format_supported(format)) + return id; ++ assert(dst_src_equal); + + switch (util_format_get_blocksize(format)) { + case 1: +@@ -72,6 +75,7 @@ nvc0_2d_format(enum pipe_format format) + case 16: + return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + default: ++ assert(0); + return 0; + } + } +@@ -79,7 +83,7 @@ nvc0_2d_format(enum pipe_format format) + static int + nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, + struct nv50_miptree *mt, unsigned level, unsigned layer, +- enum pipe_format pformat) ++ enum pipe_format pformat, boolean dst_src_pformat_equal) + { + struct nouveau_bo *bo = mt->base.bo; + uint32_t width, height, depth; +@@ -87,7 +91,7 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, + uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; + uint32_t offset = mt->level[level].offset; + +- format = nvc0_2d_format(pformat); ++ format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal); + if (!format) { + NOUVEAU_ERR("invalid/unsupported surface format: %s\n", + util_format_name(pformat)); +@@ -157,15 +161,16 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push, + const enum pipe_format dfmt = dst->base.base.format; + const enum pipe_format sfmt = src->base.base.format; + int ret; ++ boolean eqfmt = dfmt == sfmt; + + if (!PUSH_SPACE(push, 2 * 16 + 32)) + return PIPE_ERROR; + +- ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt); ++ ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt); + if (ret) + return ret; + +- ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt); ++ ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt); + if (ret) + return ret; + +@@ -243,8 +248,8 @@ nvc0_resource_copy_region(struct pipe_context *pipe, + return; + } + +- assert(nvc0_2d_format_faithful(src->format)); +- assert(nvc0_2d_format_faithful(dst->format)); ++ assert(nv50_2d_dst_format_faithful(dst->format)); ++ assert(nv50_2d_src_format_faithful(src->format)); + + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD); + BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR); +@@ -490,19 +495,19 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit) + { + static const uint32_t code_nvc0[] = + { +- 0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */ +- 0xfff11c26, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/ +- 0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */ +- 0x13f01c26, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */ ++ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ ++ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ ++ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ ++ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + static const uint32_t code_nve4[] = + { + 0x00000007, 0x20000000, /* sched */ +- 0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */ +- 0xfff11c46, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/ +- 0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */ +- 0x13f01c46, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */ ++ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ ++ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ ++ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ ++ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ + 0x00001de7, 0x80000000, /* exit */ + }; + +@@ -515,13 +520,13 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit) + blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */ + blit->vp.code_size = sizeof(code_nvc0); + } +- blit->vp.max_gpr = 7; ++ blit->vp.max_gpr = 6; + blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS; + + blit->vp.hdr[0] = 0x00020461; /* vertprog magic */ + blit->vp.hdr[4] = 0x000ff000; /* no outputs read */ +- blit->vp.hdr[6] = 0x0000003f; /* a[0x80], a[0x90] */ +- blit->vp.hdr[13] = 0x0003f000; /* o[0x70], o[0x80] */ ++ blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */ ++ blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */ + } + + static void +@@ -820,7 +825,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + nvc0_blit_select_fp(blit, info); + nvc0_blitctx_pre_blit(blit); + +- nvc0_blit_set_dst(blit, dst, info->dst.level, 0, info->dst.format); ++ nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format); + nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format, + blit->filter); + +@@ -859,6 +864,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + z += 0.5f * dz; + + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); ++ IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | ++ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); + BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); + PUSH_DATA (push, nvc0->framebuffer.width << 16); + PUSH_DATA (push, nvc0->framebuffer.height << 16); +@@ -925,11 +932,14 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + if (info->dst.box.z + info->dst.box.depth - 1) + IMMED_NVC0(push, NVC0_3D(LAYER), 0); + +- /* re-enable normally constant state */ ++ nvc0_blitctx_post_blit(blit); + +- IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); ++ /* restore viewport */ + +- nvc0_blitctx_post_blit(blit); ++ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2); ++ PUSH_DATA (push, nvc0->framebuffer.width << 16); ++ PUSH_DATA (push, nvc0->framebuffer.height << 16); ++ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); + } + + static void +@@ -948,7 +958,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + int64_t du_dx, dv_dy; + int i; + uint32_t mode; +- const uint32_t mask = nv50_blit_eng2d_get_mask(info); ++ uint32_t mask = nv50_blit_eng2d_get_mask(info); ++ boolean b; + + mode = nv50_blit_get_filter(info) ? + NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR : +@@ -959,8 +970,9 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width; + dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height; + +- nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format); +- nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format); ++ b = info->dst.format == info->src.format; ++ nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b); ++ nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b); + + if (info->scissor_enable) { + BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5); +@@ -981,6 +993,25 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + PUSH_DATA (push, 0xffffffff); + PUSH_DATA (push, 0xffffffff); + IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP); ++ } else ++ if (info->src.format != info->dst.format) { ++ if (info->src.format == PIPE_FORMAT_R8_UNORM || ++ info->src.format == PIPE_FORMAT_R8_SNORM || ++ info->src.format == PIPE_FORMAT_R16_UNORM || ++ info->src.format == PIPE_FORMAT_R16_SNORM || ++ info->src.format == PIPE_FORMAT_R16_FLOAT || ++ info->src.format == PIPE_FORMAT_R32_FLOAT) { ++ mask = 0xffff0000; /* also makes condition for OPERATION reset true */ ++ BEGIN_NVC0(push, NVC0_2D(BETA4), 2); ++ PUSH_DATA (push, mask); ++ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); ++ } else ++ if (info->src.format == PIPE_FORMAT_A8_UNORM) { ++ mask = 0xff000000; ++ BEGIN_NVC0(push, NVC0_2D(BETA4), 2); ++ PUSH_DATA (push, mask); ++ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT); ++ } + } + + if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) { +@@ -1106,10 +1137,24 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) + debug_printf("blit: cannot filter array or cube textures in z direction"); + } + +- if (!eng3d && info->dst.format != info->src.format) +- if (!nvc0_2d_format_faithful(info->dst.format) || +- !nvc0_2d_format_faithful(info->src.format)) ++ if (!eng3d && info->dst.format != info->src.format) { ++ if (!nv50_2d_dst_format_faithful(info->dst.format)) { ++ eng3d = TRUE; ++ } else ++ if (!nv50_2d_src_format_faithful(info->src.format)) { ++ if (!util_format_is_luminance(info->src.format)) { ++ if (util_format_is_intensity(info->src.format)) ++ eng3d = info->src.format != PIPE_FORMAT_I8_UNORM; ++ else ++ if (!nv50_2d_dst_format_ops_supported(info->dst.format)) ++ eng3d = TRUE; ++ else ++ eng3d = !nv50_2d_format_supported(info->src.format); ++ } ++ } else ++ if (util_format_is_luminance_alpha(info->src.format)) + eng3d = TRUE; ++ } + + if (info->src.resource->nr_samples == 8 && + info->dst.resource->nr_samples <= 1) +diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c +index 734c7f2..74afd6f 100644 +--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c ++++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c +@@ -708,6 +708,7 @@ static int peephole_mul_omod( + struct rc_list * writer_list; + struct rc_variable * var; + struct peephole_mul_cb_data cb_data; ++ unsigned writemask_sum; + + for (i = 0; i < 2; i++) { + unsigned int j; +@@ -815,10 +816,11 @@ static int peephole_mul_omod( + } + + /* Rewrite the instructions */ ++ writemask_sum = rc_variable_writemask_sum(writer_list->Item); + for (var = writer_list->Item; var; var = var->Friend) { + struct rc_variable * writer = var; + unsigned conversion_swizzle = rc_make_conversion_swizzle( +- writer->Inst->U.I.DstReg.WriteMask, ++ writemask_sum, + inst_mul->U.I.DstReg.WriteMask); + writer->Inst->U.I.Omod = omod_op; + writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; +diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c +index a7973a5..80b859f 100644 +--- a/src/gallium/drivers/r600/r600_pipe.c ++++ b/src/gallium/drivers/r600/r600_pipe.c +@@ -1157,7 +1157,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) + * case were triggering lockup quickly such as : + * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8 + */ +- rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", TRUE); ++ rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE); + rscreen->use_hyperz = rscreen->info.drm_minor >= 26 ? rscreen->use_hyperz : FALSE; + + rscreen->global_pool = compute_memory_pool_new(rscreen); +diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c +index 0335189..782ad26 100644 +--- a/src/gallium/drivers/r600/r600_query.c ++++ b/src/gallium/drivers/r600/r600_query.c +@@ -186,10 +186,11 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: ++ va += query->buffer.results_end + query->result_size/2; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); +- cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2; +- cs->buf[cs->cdw++] = 0; ++ cs->buf[cs->cdw++] = va; ++ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + break; + case PIPE_QUERY_TIME_ELAPSED: + va += query->buffer.results_end + query->result_size/2; +diff --git a/src/gallium/drivers/radeon/Makefile.am b/src/gallium/drivers/radeon/Makefile.am +index e6eb241..a3a7b74 100644 +--- a/src/gallium/drivers/radeon/Makefile.am ++++ b/src/gallium/drivers/radeon/Makefile.am +@@ -1,11 +1,14 @@ + include Makefile.sources + include $(top_srcdir)/src/gallium/Automake.inc + ++LIBGALLIUM_LIBS= ++ + if HAVE_GALLIUM_R600 + if HAVE_GALLIUM_RADEONSI + lib_LTLIBRARIES = libllvmradeon@VERSION@.la + libllvmradeon@VERSION@_la_LDFLAGS = -Wl, -shared -avoid-version \ + $(LLVM_LDFLAGS) ++LIBGALLIUM_LIBS += $(top_builddir)/src/gallium/auxiliary/libgallium.la + else + noinst_LTLIBRARIES = libllvmradeon@VERSION@.la + endif +@@ -26,5 +29,6 @@ libllvmradeon@VERSION@_la_SOURCES = \ + $(C_FILES) + + libllvmradeon@VERSION@_la_LIBADD = \ ++ $(LIBGALLIUM_LIBS) \ + $(CLOCK_LIB) \ + $(LLVM_LIBS) +diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c +index 8c35625..93766a3 100644 +--- a/src/gallium/drivers/radeonsi/si_state_draw.c ++++ b/src/gallium/drivers/radeonsi/si_state_draw.c +@@ -401,6 +401,11 @@ static void si_update_derived_state(struct r600_context *rctx) + } + + if (si_pm4_state_changed(rctx, ps) || si_pm4_state_changed(rctx, vs)) { ++ /* XXX: Emitting the PS state even when only the VS changed ++ * fixes random failures with piglit glsl-max-varyings. ++ * Not sure why... ++ */ ++ rctx->emitted.named.ps = NULL; + si_update_spi_map(rctx); + } + } +diff --git a/src/gallium/drivers/rbug/Makefile.am b/src/gallium/drivers/rbug/Makefile.am +index 655bfe1..3c1a8b5 100644 +--- a/src/gallium/drivers/rbug/Makefile.am ++++ b/src/gallium/drivers/rbug/Makefile.am +@@ -30,6 +30,7 @@ noinst_LTLIBRARIES = librbug.la + # preprocessor is determined by the ordering of the -I flags. + AM_CFLAGS = \ + $(GALLIUM_CFLAGS) \ ++ $(VISIBILITY_CFLAGS) \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/include + +diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am +index fdaa3c8..7eacd90 100644 +--- a/src/gallium/drivers/svga/Makefile.am ++++ b/src/gallium/drivers/svga/Makefile.am +@@ -29,6 +29,8 @@ AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + $(GALLIUM_CFLAGS) + ++AM_CFLAGS = $(VISIBILITY_CFLAGS) ++ + #On some systems -std= must be added to CFLAGS to be the last -std= + CFLAGS += -std=gnu99 + +diff --git a/src/gallium/drivers/trace/Makefile.am b/src/gallium/drivers/trace/Makefile.am +index a9e1457..984ead4 100644 +--- a/src/gallium/drivers/trace/Makefile.am ++++ b/src/gallium/drivers/trace/Makefile.am +@@ -1,7 +1,8 @@ + include $(top_srcdir)/src/gallium/Automake.inc + + AM_CFLAGS = \ +- $(GALLIUM_CFLAGS) ++ $(GALLIUM_CFLAGS) \ ++ $(VISIBILITY_CFLAGS) + + noinst_LTLIBRARIES = libtrace.la + +diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h +index 5958333..a131969 100644 +--- a/src/gallium/include/pipe/p_compiler.h ++++ b/src/gallium/include/pipe/p_compiler.h +@@ -29,6 +29,8 @@ + #define P_COMPILER_H + + ++#include "c99_compat.h" /* inline, __func__, etc. */ ++ + #include "p_config.h" + + #include +@@ -90,28 +92,7 @@ typedef unsigned char boolean; + #endif + #endif + +-/* Function inlining */ +-#ifndef inline +-# ifdef __cplusplus +- /* C++ supports inline keyword */ +-# elif defined(__GNUC__) +-# define inline __inline__ +-# elif defined(_MSC_VER) +-# define inline __inline +-# elif defined(__ICL) +-# define inline __inline +-# elif defined(__INTEL_COMPILER) +- /* Intel compiler supports inline keyword */ +-# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +-# define inline __inline +-# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +- /* C99 supports inline keyword */ +-# elif (__STDC_VERSION__ >= 199901L) +- /* C99 supports inline keyword */ +-# else +-# define inline +-# endif +-#endif ++/* XXX: Use standard `inline` keyword instead */ + #ifndef INLINE + # define INLINE inline + #endif +@@ -127,26 +108,6 @@ typedef unsigned char boolean; + # endif + #endif + +-/* +- * Define the C99 restrict keyword. +- * +- * See also: +- * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html +- */ +-#ifndef restrict +-# if (__STDC_VERSION__ >= 199901L) +- /* C99 */ +-# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +- /* C99 */ +-# elif defined(__GNUC__) +-# define restrict __restrict__ +-# elif defined(_MSC_VER) +-# define restrict __restrict +-# else +-# define restrict /* */ +-# endif +-#endif +- + + /* Function visibility */ + #ifndef PUBLIC +@@ -160,35 +121,10 @@ typedef unsigned char boolean; + #endif + + +-/* The __FUNCTION__ gcc variable is generally only used for debugging. +- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here. +- */ ++/* XXX: Use standard `__func__` instead */ + #ifndef __FUNCTION__ +-# if !defined(__GNUC__) +-# if (__STDC_VERSION__ >= 199901L) /* C99 */ || \ +- (defined(__SUNPRO_C) && defined(__C99FEATURES__)) +-# define __FUNCTION__ __func__ +-# else +-# define __FUNCTION__ " " +-# endif +-# endif +-# if defined(_MSC_VER) && _MSC_VER < 1300 +-# define __FUNCTION__ " " +-# endif ++# define __FUNCTION__ __func__ + #endif +-#ifndef __func__ +-# if (__STDC_VERSION__ >= 199901L) || \ +- (defined(__SUNPRO_C) && defined(__C99FEATURES__)) +- /* __func__ is part of C99 */ +-# elif defined(_MSC_VER) +-# if _MSC_VER >= 1300 +-# define __func__ __FUNCTION__ +-# else +-# define __func__ " " +-# endif +-# endif +-#endif +- + + + /* This should match linux gcc cdecl semantics everywhere, so that we +diff --git a/src/gallium/state_trackers/egl/Makefile.am b/src/gallium/state_trackers/egl/Makefile.am +index e19e9a3..f78b36e 100644 +--- a/src/gallium/state_trackers/egl/Makefile.am ++++ b/src/gallium/state_trackers/egl/Makefile.am +@@ -27,7 +27,7 @@ include $(top_srcdir)/src/gallium/Automake.inc + AM_CFLAGS = $(GALLIUM_CFLAGS) + AM_CPPFLAGS = \ + -I$(top_srcdir)/src/egl/main \ +- -I$(top_srcdir)/src/egl/wayland/wayland-drm/ \ ++ -I$(top_builddir)/src/egl/wayland/wayland-drm/ \ + -I$(top_srcdir)/include + + noinst_LTLIBRARIES = libegl.la +diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am +index 5b53ef9..57d55c4 100644 +--- a/src/gallium/state_trackers/xa/Makefile.am ++++ b/src/gallium/state_trackers/xa/Makefile.am +@@ -24,7 +24,9 @@ include $(top_srcdir)/src/gallium/Automake.inc + + AM_CFLAGS = \ + -Wall -pedantic \ +- $(GALLIUM_CFLAGS) ++ $(GALLIUM_CFLAGS) \ ++ $(VISIBILITY_CFLAGS) ++ + AM_CPPFLAGS = \ + -I$(top_srcdir)/src/gallium/ \ + -I$(top_srcdir)/src/gallium/winsys \ +diff --git a/src/gallium/winsys/svga/drm/Makefile.am b/src/gallium/winsys/svga/drm/Makefile.am +index 53bbcc2..d7ada3c 100644 +--- a/src/gallium/winsys/svga/drm/Makefile.am ++++ b/src/gallium/winsys/svga/drm/Makefile.am +@@ -31,6 +31,8 @@ AM_CPPFLAGS = \ + $(GALLIUM_CFLAGS) \ + $(LIBDRM_CFLAGS) + ++AM_CFLAGS = $(VISIBILITY_CFLAGS) ++ + #On some systems -std= must be added to CFLAGS to be the last -std= + CFLAGS += -std=gnu99 -D_FILE_OFFSET_BITS=64 + +diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp +index 02d85b8..dee9709 100644 +--- a/src/glsl/glsl_types.cpp ++++ b/src/glsl/glsl_types.cpp +@@ -446,6 +446,8 @@ const glsl_type *glsl_type::get_scalar_type() const + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; ++ case GLSL_TYPE_BOOL: ++ return bool_type; + default: + /* Handle everything else */ + return type; +diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp +index d8cafd5..78ce13e 100644 +--- a/src/glsl/ir_validate.cpp ++++ b/src/glsl/ir_validate.cpp +@@ -695,6 +695,11 @@ check_node_type(ir_instruction *ir, void *data) + void + validate_ir_tree(exec_list *instructions) + { ++ /* We shouldn't have any reason to validate IR in a release build, ++ * and it's half composed of assert()s anyway which wouldn't do ++ * anything. ++ */ ++#ifdef DEBUG + ir_validate v; + + v.run(instructions); +@@ -704,4 +709,5 @@ validate_ir_tree(exec_list *instructions) + + visit_tree(ir, check_node_type, NULL); + } ++#endif + } +diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp +index 57e7a9a..cf0420c 100644 +--- a/src/glsl/linker.cpp ++++ b/src/glsl/linker.cpp +@@ -1067,13 +1067,11 @@ link_intrastage_shaders(void *mem_ctx, + + free(linking_shaders); + +-#ifdef DEBUG + /* At this point linked should contain all of the linked IR, so + * validate it to make sure nothing went wrong. + */ + if (linked) + validate_ir_tree(linked->ir); +-#endif + + /* Make a pass over all variable declarations to ensure that arrays with + * unspecified sizes have a size specified. The size is inferred from the +diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am +index 4aa900a..f01709b 100644 +--- a/src/glx/Makefile.am ++++ b/src/glx/Makefile.am +@@ -39,6 +39,7 @@ AM_CFLAGS = \ + -I$(top_srcdir)/src/mapi/glapi \ + -I$(top_builddir)/src/mapi \ + -I$(top_builddir)/src/mapi/glapi \ ++ $(VISIBILITY_CFLAGS) \ + $(SHARED_GLAPI_CFLAGS) \ + $(EXTRA_DEFINES_XF86VIDMODE) \ + -D_REENTRANT \ +diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py +index a3548c2..19e0e15 100644 +--- a/src/mapi/glapi/gen/gl_x86-64_asm.py ++++ b/src/mapi/glapi/gen/gl_x86-64_asm.py +@@ -181,19 +181,6 @@ class PrintGenericStubs(gl_XML.gl_print_base): + + def printRealFooter(self): + print '' +- print '#if defined(GLX_USE_TLS) && defined(__linux__)' +- print ' .section ".note.ABI-tag", "a"' +- print ' .p2align 2' +- print ' .long 1f - 0f /* name length */' +- print ' .long 3f - 2f /* data length */' +- print ' .long 1 /* note length */' +- print '0: .asciz "GNU" /* vendor name */' +- print '1: .p2align 2' +- print '2: .long 0 /* note data: the ABI tag */' +- print ' .long 2,4,20 /* Minimum kernel version w/TLS */' +- print '3: .p2align 2 /* pad out section */' +- print '#endif /* GLX_USE_TLS */' +- print '' + print '#if defined (__ELF__) && defined (__linux__)' + print ' .section .note.GNU-stack,"",%progbits' + print '#endif' +diff --git a/src/mapi/glapi/gen/gl_x86_asm.py b/src/mapi/glapi/gen/gl_x86_asm.py +index 8b0f6ee..919bbc0 100644 +--- a/src/mapi/glapi/gen/gl_x86_asm.py ++++ b/src/mapi/glapi/gen/gl_x86_asm.py +@@ -189,19 +189,6 @@ class PrintGenericStubs(gl_XML.gl_print_base): + print '\t\tALIGNTEXT16' + print 'GLNAME(gl_dispatch_functions_end):' + print '' +- print '#if defined(GLX_USE_TLS) && defined(__linux__)' +- print ' .section ".note.ABI-tag", "a"' +- print ' .p2align 2' +- print ' .long 1f - 0f /* name length */' +- print ' .long 3f - 2f /* data length */' +- print ' .long 1 /* note length */' +- print '0: .asciz "GNU" /* vendor name */' +- print '1: .p2align 2' +- print '2: .long 0 /* note data: the ABI tag */' +- print ' .long 2,4,20 /* Minimum kernel version w/TLS */' +- print '3: .p2align 2 /* pad out section */' +- print '#endif /* GLX_USE_TLS */' +- print '' + print '#if defined (__ELF__) && defined (__linux__)' + print ' .section .note.GNU-stack,"",%progbits' + print '#endif' +diff --git a/src/mapi/mapi/entry_x86-64_tls.h b/src/mapi/mapi/entry_x86-64_tls.h +index 72d4125..36cad00 100644 +--- a/src/mapi/mapi/entry_x86-64_tls.h ++++ b/src/mapi/mapi/entry_x86-64_tls.h +@@ -28,19 +28,6 @@ + + #include "u_macros.h" + +-#ifdef __linux__ +-__asm__(".section .note.ABI-tag, \"a\"\n\t" +- ".p2align 2\n\t" +- ".long 1f - 0f\n\t" /* name length */ +- ".long 3f - 2f\n\t" /* data length */ +- ".long 1\n\t" /* note length */ +- "0: .asciz \"GNU\"\n\t" /* vendor name */ +- "1: .p2align 2\n\t" +- "2: .long 0\n\t" /* note data: the ABI tag */ +- ".long 2,4,20\n\t" /* Minimum kernel version w/TLS */ +- "3: .p2align 2\n\t"); /* pad out section */ +-#endif /* __linux__ */ +- + __asm__(".text\n" + ".balign 32\n" + "x86_64_entry_start:"); +diff --git a/src/mapi/mapi/entry_x86_tls.h b/src/mapi/mapi/entry_x86_tls.h +index de91812..58d09ca 100644 +--- a/src/mapi/mapi/entry_x86_tls.h ++++ b/src/mapi/mapi/entry_x86_tls.h +@@ -29,19 +29,6 @@ + #include + #include "u_macros.h" + +-#ifdef __linux__ +-__asm__(".section .note.ABI-tag, \"a\"\n\t" +- ".p2align 2\n\t" +- ".long 1f - 0f\n\t" /* name length */ +- ".long 3f - 2f\n\t" /* data length */ +- ".long 1\n\t" /* note length */ +- "0: .asciz \"GNU\"\n\t" /* vendor name */ +- "1: .p2align 2\n\t" +- "2: .long 0\n\t" /* note data: the ABI tag */ +- ".long 2,4,20\n\t" /* Minimum kernel version w/TLS */ +- "3: .p2align 2\n\t"); /* pad out section */ +-#endif /* __linux__ */ +- + __asm__(".text"); + + __asm__("x86_current_tls:\n\t" +diff --git a/src/mapi/mapi/u_compiler.h b/src/mapi/mapi/u_compiler.h +index 2b019ed..f376e97 100644 +--- a/src/mapi/mapi/u_compiler.h ++++ b/src/mapi/mapi/u_compiler.h +@@ -1,28 +1,10 @@ + #ifndef _U_COMPILER_H_ + #define _U_COMPILER_H_ + +-/* Function inlining */ +-#ifndef inline +-# ifdef __cplusplus +- /* C++ supports inline keyword */ +-# elif defined(__GNUC__) +-# define inline __inline__ +-# elif defined(_MSC_VER) +-# define inline __inline +-# elif defined(__ICL) +-# define inline __inline +-# elif defined(__INTEL_COMPILER) +- /* Intel compiler supports inline keyword */ +-# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +-# define inline __inline +-# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +- /* C99 supports inline keyword */ +-# elif (__STDC_VERSION__ >= 199901L) +- /* C99 supports inline keyword */ +-# else +-# define inline +-# endif +-#endif ++#include "c99_compat.h" /* inline, __func__, etc. */ ++ ++ ++/* XXX: Use standard `inline` keyword instead */ + #ifndef INLINE + # define INLINE inline + #endif +diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c +index 53d8e54..cde1a06 100644 +--- a/src/mesa/drivers/dri/i965/brw_clear.c ++++ b/src/mesa/drivers/dri/i965/brw_clear.c +@@ -40,6 +40,8 @@ + #include "intel_mipmap_tree.h" + #include "intel_regions.h" + ++#include "brw_context.h" ++ + #define FILE_DEBUG_FLAG DEBUG_BLIT + + static const char *buffer_names[] = { +@@ -219,7 +221,8 @@ brw_fast_clear_depth(struct gl_context *ctx) + static void + brw_clear(struct gl_context *ctx, GLbitfield mask) + { +- struct intel_context *intel = intel_context(ctx); ++ struct brw_context *brw = brw_context(ctx); ++ struct intel_context *intel = &brw->intel; + + if (!_mesa_check_conditional_render(ctx)) + return; +@@ -229,6 +232,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) + } + + intel_prepare_render(intel); ++ brw_workaround_depthstencil_alignment(brw); + + if (mask & BUFFER_BIT_DEPTH) { + if (brw_fast_clear_depth(ctx)) { +diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h +index 79cc12f..4bcfb95 100644 +--- a/src/mesa/drivers/dri/i965/brw_defines.h ++++ b/src/mesa/drivers/dri/i965/brw_defines.h +@@ -437,6 +437,7 @@ + #define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 0x1B9 + #define BRW_SURFACEFORMAT_B10G10R10A2_UINT 0x1BA + #define BRW_SURFACEFORMAT_B10G10R10A2_SINT 0x1BB ++#define BRW_SURFACEFORMAT_RAW 0x1FF + #define BRW_SURFACE_FORMAT_SHIFT 18 + #define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) + +@@ -857,6 +858,7 @@ enum brw_message_target { + GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, + + GEN7_SFID_DATAPORT_DATA_CACHE = 10, ++ HSW_SFID_DATAPORT_DATA_CACHE_1 = 12, + }; + + #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 +@@ -965,7 +967,44 @@ enum brw_message_target { + + /* GEN7 */ + #define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 ++#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ 0 ++#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1 ++#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ 2 + #define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ 3 ++#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ 4 ++#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ 5 ++#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP 6 ++#define GEN7_DATAPORT_DC_MEMORY_FENCE 7 ++#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE 8 ++#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE 10 ++#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE 11 ++#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE 12 ++#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE 13 ++ ++/* HSW */ ++#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0 ++#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1 ++#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ 2 ++#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ 3 ++#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ 4 ++#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE 7 ++#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE 8 ++#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE 10 ++#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE 11 ++#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE 12 ++ ++#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ 1 ++#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP 2 ++#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2 3 ++#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ 4 ++#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ 5 ++#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP 6 ++#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 7 ++#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE 9 ++#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE 10 ++#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11 ++#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12 ++#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 + + /* dataport atomic operations. */ + #define BRW_AOP_AND 1 +diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c +index b34754a..40cae37 100644 +--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c ++++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c +@@ -2539,15 +2539,22 @@ void brw_shader_time_add(struct brw_compile *p, + brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + base_mrf, 0)); + ++ uint32_t sfid, msg_type; ++ if (intel->is_haswell) { ++ sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; ++ msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; ++ } else { ++ sfid = GEN7_SFID_DATAPORT_DATA_CACHE; ++ msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; ++ } ++ + bool header_present = false; + bool eot = false; + uint32_t mlen = 2; /* offset, value */ + uint32_t rlen = 0; +- brw_set_message_descriptor(p, send, +- GEN7_SFID_DATAPORT_DATA_CACHE, +- mlen, rlen, header_present, eot); ++ brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot); + +- send->bits3.ud |= 6 << 14; /* untyped atomic op */ ++ send->bits3.ud |= msg_type << 14; + send->bits3.ud |= 0 << 13; /* no return data */ + send->bits3.ud |= 1 << 12; /* SIMD8 mode */ + send->bits3.ud |= BRW_AOP_ADD << 8; +diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp +index f80219e..4924441 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs.cpp +@@ -2295,7 +2295,8 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps, + void + fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) + { +- int write_len = inst->regs_written() * dispatch_width / 8; ++ int reg_size = dispatch_width / 8; ++ int write_len = inst->regs_written() * reg_size; + int first_write_grf = inst->dst.reg; + bool needs_dep[BRW_MAX_MRF]; + assert(write_len < (int)sizeof(needs_dep) - 1); +@@ -2334,14 +2335,19 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst) + * instruction but a MOV that might have left us an outstanding + * dependency has more latency than a MOV. + */ +- if (scan_inst->dst.file == GRF && +- scan_inst->dst.reg >= first_write_grf && +- scan_inst->dst.reg < first_write_grf + write_len && +- needs_dep[scan_inst->dst.reg - first_write_grf]) { +- inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg)); +- needs_dep[scan_inst->dst.reg - first_write_grf] = false; +- if (scan_inst_16wide) +- needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false; ++ if (scan_inst->dst.file == GRF) { ++ for (int i = 0; i < scan_inst->regs_written(); i++) { ++ int reg = scan_inst->dst.reg + i * reg_size; ++ ++ if (reg >= first_write_grf && ++ reg < first_write_grf + write_len && ++ needs_dep[reg - first_write_grf]) { ++ inst->insert_before(DEP_RESOLVE_MOV(reg)); ++ needs_dep[reg - first_write_grf] = false; ++ if (scan_inst_16wide) ++ needs_dep[reg - first_write_grf + 1] = false; ++ } ++ } + } + + /* Clear the flag for registers that actually got read (as expected). */ +@@ -2494,6 +2500,8 @@ fs_visitor::lower_uniform_pull_constant_loads() + inst->insert_before(setup2); + inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7; + inst->src[1] = payload; ++ ++ this->live_intervals_valid = false; + } else { + /* Before register allocation, we didn't tell the scheduler about the + * MRF we use. We know it's safe to use this MRF because nothing +diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +index db8f397..4c7991d 100644 +--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp ++++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +@@ -190,6 +190,37 @@ fs_visitor::calculate_live_intervals() + int reg = inst->src[i].reg; + + use[reg] = ip; ++ ++ /* In most cases, a register can be written over safely by the ++ * same instruction that is its last use. For a single ++ * instruction, the sources are dereferenced before writing of the ++ * destination starts (naturally). This gets more complicated for ++ * simd16, because the instruction: ++ * ++ * mov(16) g4<1>F g4<8,8,1>F g6<8,8,1>F ++ * ++ * is actually decoded in hardware as: ++ * ++ * mov(8) g4<1>F g4<8,8,1>F g6<8,8,1>F ++ * mov(8) g5<1>F g5<8,8,1>F g7<8,8,1>F ++ * ++ * Which is safe. However, if we have uniform accesses ++ * happening, we get into trouble: ++ * ++ * mov(8) g4<1>F g4<0,1,0>F g6<8,8,1>F ++ * mov(8) g5<1>F g4<0,1,0>F g7<8,8,1>F ++ * ++ * Now our destination for the first instruction overwrote the ++ * second instruction's src0, and we get garbage for those 8 ++ * pixels. There's a similar issue for the pre-gen6 ++ * pixel_x/pixel_y, which are registers of 16-bit values and thus ++ * would get stomped by the first decode as well. ++ */ ++ if (dispatch_width == 16 && (inst->src[i].smear || ++ (this->pixel_x.reg == reg || ++ this->pixel_y.reg == reg))) { ++ use[reg]++; ++ } + } + } + +@@ -264,28 +295,5 @@ fs_visitor::virtual_grf_interferes(int a, int b) + int start = MAX2(a_def, b_def); + int end = MIN2(a_use, b_use); + +- /* If the register is used to store 16 values of less than float +- * size (only the case for pixel_[xy]), then we can't allocate +- * another dword-sized thing to that register that would be used in +- * the same instruction. This is because when the GPU decodes (for +- * example): +- * +- * (declare (in ) vec4 gl_FragCoord@0x97766a0) +- * add(16) g6<1>F g6<8,8,1>UW 0.5F { align1 compr }; +- * +- * it's actually processed as: +- * add(8) g6<1>F g6<8,8,1>UW 0.5F { align1 }; +- * add(8) g7<1>F g6.8<8,8,1>UW 0.5F { align1 sechalf }; +- * +- * so our second half values in g6 got overwritten in the first +- * half. +- */ +- if (dispatch_width == 16 && (this->pixel_x.reg == a || +- this->pixel_x.reg == b || +- this->pixel_y.reg == a || +- this->pixel_y.reg == b)) { +- return start <= end; +- } +- + return start < end; + } +diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h +index ecc61c4..02ce57b 100644 +--- a/src/mesa/drivers/dri/i965/brw_state.h ++++ b/src/mesa/drivers/dri/i965/brw_state.h +@@ -216,6 +216,8 @@ void gen7_set_surface_mcs_info(struct brw_context *brw, + bool is_render_target); + void gen7_check_surface_setup(uint32_t *surf, bool is_render_target); + void gen7_init_vtable_surface_functions(struct brw_context *brw); ++void gen7_create_shader_time_surface(struct brw_context *brw, ++ uint32_t *out_offset); + + /* brw_wm_sampler_state.c */ + uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest); +diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +index 4da7eaa..2aefc0c 100644 +--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +@@ -137,14 +137,11 @@ const struct brw_tracked_state brw_vs_ubo_surfaces = { + static void + brw_vs_upload_binding_table(struct brw_context *brw) + { +- struct intel_context *intel = &brw->intel; + uint32_t *bind; + int i; + + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { +- intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0, +- brw->shader_time.bo->size, +- &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]); ++ gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]); + + assert(brw->vs.prog_data->num_surfaces <= SURF_INDEX_VS_SHADER_TIME); + brw->vs.prog_data->num_surfaces = SURF_INDEX_VS_SHADER_TIME; +diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +index 6ec7d71..657a56f 100644 +--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +@@ -770,7 +770,8 @@ brw_get_texture_swizzle(const struct gl_context *ctx, + case GL_RED: + case GL_RG: + case GL_RGB: +- swizzles[3] = SWIZZLE_ONE; ++ if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0) ++ swizzles[3] = SWIZZLE_ONE; + break; + } + +@@ -1468,14 +1469,11 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = { + static void + brw_upload_wm_binding_table(struct brw_context *brw) + { +- struct intel_context *intel = &brw->intel; + uint32_t *bind; + int i; + + if (INTEL_DEBUG & DEBUG_SHADER_TIME) { +- intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0, +- brw->shader_time.bo->size, +- &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]); ++ gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]); + } + + /* Might want to calculate nr_surfaces first, to avoid taking up so much +diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c +index d32f636..7ac5d5f 100644 +--- a/src/mesa/drivers/dri/i965/gen6_cc.c ++++ b/src/mesa/drivers/dri/i965/gen6_cc.c +@@ -126,7 +126,7 @@ gen6_upload_blend_state(struct brw_context *brw) + * not read the alpha channel, but will instead use the correct + * implicit value for alpha. + */ +- if (!_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE)) ++ if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE)) + { + srcRGB = brw_fix_xRGB_alpha(srcRGB); + srcA = brw_fix_xRGB_alpha(srcA); +diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +index 24f1b9c..2913fc6 100644 +--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +@@ -413,6 +413,46 @@ gen7_create_constant_surface(struct brw_context *brw, + gen7_check_surface_setup(surf, false /* is_render_target */); + } + ++/** ++ * Create a surface for shader time. ++ */ ++void ++gen7_create_shader_time_surface(struct brw_context *brw, uint32_t *out_offset) ++{ ++ struct intel_context *intel = &brw->intel; ++ const int w = brw->shader_time.bo->size - 1; ++ ++ uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, ++ 8 * 4, 32, out_offset); ++ memset(surf, 0, 8 * 4); ++ ++ surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | ++ BRW_SURFACEFORMAT_RAW << BRW_SURFACE_FORMAT_SHIFT | ++ BRW_SURFACE_RC_READ_WRITE; ++ ++ surf[1] = brw->shader_time.bo->offset; /* reloc */ ++ ++ surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) | ++ SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT); ++ surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH); ++ ++ /* Unlike texture or renderbuffer surfaces, we only do untyped operations ++ * on the shader_time surface, so there's no need to set HSW channel ++ * overrides. ++ */ ++ ++ /* Emit relocation to surface contents. Section 5.1.1 of the gen4 ++ * bspec ("Data Cache") says that the data cache does not exist as ++ * a separate cache and is just the sampler cache. ++ */ ++ drm_intel_bo_emit_reloc(intel->batch.bo, ++ *out_offset + 4, ++ brw->shader_time.bo, 0, ++ I915_GEM_DOMAIN_SAMPLER, 0); ++ ++ gen7_check_surface_setup(surf, false /* is_render_target */); ++} ++ + static void + gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) + { +diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c +index 5ec93f1..4173c0f 100644 +--- a/src/mesa/drivers/dri/intel/intel_screen.c ++++ b/src/mesa/drivers/dri/intel/intel_screen.c +@@ -312,7 +312,7 @@ intel_create_image_from_name(__DRIscreen *screen, + cpp = _mesa_get_format_bytes(image->format); + image->region = intel_region_alloc_for_handle(intelScreen, + cpp, width, height, +- pitch, name, "image"); ++ pitch * cpp, name, "image"); + if (image->region == NULL) { + free(image); + return NULL; +diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c +index f56b3b2..6c119d5 100644 +--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c ++++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c +@@ -69,7 +69,8 @@ nouveau_flush(struct gl_context *ctx) + __DRIdri2LoaderExtension *dri2 = screen->dri2.loader; + __DRIdrawable *drawable = nctx->dri_context->driDrawablePriv; + +- dri2->flushFrontBuffer(drawable, drawable->loaderPrivate); ++ if (drawable && drawable->loaderPrivate) ++ dri2->flushFrontBuffer(drawable, drawable->loaderPrivate); + } + } + +diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c +index 7eda4e0..4ffc4ef 100644 +--- a/src/mesa/drivers/dri/nouveau/nv10_context.c ++++ b/src/mesa/drivers/dri/nouveau/nv10_context.c +@@ -469,7 +469,7 @@ nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visua + goto fail; + + /* 3D engine. */ +- if (context_chipset(ctx) >= 0x17) ++ if (context_chipset(ctx) >= 0x17 && context_chipset(ctx) != 0x1a) + celsius_class = NV17_3D_CLASS; + else if (context_chipset(ctx) >= 0x11) + celsius_class = NV15_3D_CLASS; +diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am +index c4b178b..2503401 100644 +--- a/src/mesa/drivers/osmesa/Makefile.am ++++ b/src/mesa/drivers/osmesa/Makefile.am +@@ -24,6 +24,7 @@ + AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/mapi \ ++ -I$(top_builddir)/src/mapi \ + -I$(top_srcdir)/src/mesa/ \ + $(DEFINES) \ + $(API_DEFINES) +diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h +index b22b994..8b23665 100644 +--- a/src/mesa/main/compiler.h ++++ b/src/mesa/main/compiler.h +@@ -48,6 +48,8 @@ + #include + #include + ++#include "c99_compat.h" /* inline, __func__, etc. */ ++ + + #ifdef __cplusplus + extern "C" { +@@ -111,30 +113,7 @@ extern "C" { + + + +-/** +- * Function inlining +- */ +-#ifndef inline +-# ifdef __cplusplus +- /* C++ supports inline keyword */ +-# elif defined(__GNUC__) +-# define inline __inline__ +-# elif defined(_MSC_VER) +-# define inline __inline +-# elif defined(__ICL) +-# define inline __inline +-# elif defined(__INTEL_COMPILER) +- /* Intel compiler supports inline keyword */ +-# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +-# define inline __inline +-# elif defined(__SUNPRO_C) && defined(__C99FEATURES__) +- /* C99 supports inline keyword */ +-# elif (__STDC_VERSION__ >= 199901L) +- /* C99 supports inline keyword */ +-# else +-# define inline +-# endif +-#endif ++/* XXX: Use standard `inline` keyword instead */ + #ifndef INLINE + # define INLINE inline + #endif +@@ -177,35 +156,10 @@ extern "C" { + # endif + #endif + +-/** +- * The __FUNCTION__ gcc variable is generally only used for debugging. +- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here. +- * Don't define it if using a newer Windows compiler. +- */ ++/* XXX: Use standard `__func__` instead */ + #ifndef __FUNCTION__ +-# if !defined(__GNUC__) && !defined(__xlC__) && \ +- (!defined(_MSC_VER) || _MSC_VER < 1300) +-# if (__STDC_VERSION__ >= 199901L) /* C99 */ || \ +- (defined(__SUNPRO_C) && defined(__C99FEATURES__)) +-# define __FUNCTION__ __func__ +-# else +-# define __FUNCTION__ " " +-# endif +-# endif ++# define __FUNCTION__ __func__ + #endif +-#ifndef __func__ +-# if (__STDC_VERSION__ >= 199901L) || \ +- (defined(__SUNPRO_C) && defined(__C99FEATURES__)) +- /* __func__ is part of C99 */ +-# elif defined(_MSC_VER) +-# if _MSC_VER >= 1300 +-# define __func__ __FUNCTION__ +-# else +-# define __func__ " " +-# endif +-# endif +-#endif +- + + /** + * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32. +@@ -353,8 +307,9 @@ static INLINE GLuint CPU_TO_LE32(GLuint x) + * USE_IEEE: Determine if we're using IEEE floating point + */ + #if defined(__i386__) || defined(__386__) || defined(__sparc__) || \ +- defined(__s390x__) || defined(__powerpc__) || \ ++ defined(__s390__) || defined(__s390x__) || defined(__powerpc__) || \ + defined(__x86_64__) || \ ++ defined(__m68k__) || \ + defined(ia64) || defined(__ia64__) || \ + defined(__hppa__) || defined(hpux) || \ + defined(__mips) || defined(_MIPS_ARCH) || \ +diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c +index 257f839..61c1151 100644 +--- a/src/mesa/main/fbobject.c ++++ b/src/mesa/main/fbobject.c +@@ -3160,7 +3160,9 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + } + } + +- if (!mask) { ++ if (!mask || ++ (srcX1 - srcX0) == 0 || (srcY1 - srcY0) == 0 || ++ (dstX1 - dstX0) == 0 || (dstY1 - dstY0) == 0) { + return; + } + +diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c +index 5f4e2fa..6fb2f5d 100644 +--- a/src/mesa/main/get.c ++++ b/src/mesa/main/get.c +@@ -34,6 +34,7 @@ + #include "state.h" + #include "texcompress.h" + #include "framebuffer.h" ++#include "samplerobj.h" + + /* This is a table driven implemetation of the glGet*v() functions. + * The basic idea is that most getters just look up an int somewhere +@@ -823,7 +824,16 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu + { + struct gl_sampler_object *samp = + ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler; +- v->value_int = samp ? samp->Name : 0; ++ ++ /* ++ * The sampler object may have been deleted on another context, ++ * so we try to lookup the sampler object before returning its Name. ++ */ ++ if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) { ++ v->value_int = samp->Name; ++ } else { ++ v->value_int = 0; ++ } + } + break; + /* GL_ARB_uniform_buffer_object */ +diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py +index 9aab889..15c1c4d 100644 +--- a/src/mesa/main/get_hash_params.py ++++ b/src/mesa/main/get_hash_params.py +@@ -412,7 +412,7 @@ descriptor=[ + [ "DEPTH_SCALE", "CONTEXT_FLOAT(Pixel.DepthScale), NO_EXTRA" ], + [ "DOUBLEBUFFER", "BUFFER_INT(Visual.doubleBufferMode), NO_EXTRA" ], + [ "DRAW_BUFFER", "BUFFER_ENUM(ColorDrawBuffer[0]), NO_EXTRA" ], +- [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], ++ [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_flush_current" ], + [ "FEEDBACK_BUFFER_SIZE", "CONTEXT_INT(Feedback.BufferSize), NO_EXTRA" ], + [ "FEEDBACK_BUFFER_TYPE", "CONTEXT_ENUM(Feedback.Type), NO_EXTRA" ], + [ "FOG_INDEX", "CONTEXT_FLOAT(Fog.Index), NO_EXTRA" ], +diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h +index 3369623..8f906ae 100644 +--- a/src/mesa/main/mtypes.h ++++ b/src/mesa/main/mtypes.h +@@ -1274,6 +1274,7 @@ struct gl_texture_object + GLfloat Priority; /**< in [0,1] */ + GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */ + GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */ ++ GLint ImmutableLevels; /**< ES 3.0 / ARB_texture_view */ + GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */ + GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */ + GLint CropRect[4]; /**< GL_OES_draw_texture */ +diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c +index 319a444..5cff329 100644 +--- a/src/mesa/main/samplerobj.c ++++ b/src/mesa/main/samplerobj.c +@@ -40,7 +40,7 @@ + #include "main/samplerobj.h" + + +-static struct gl_sampler_object * ++struct gl_sampler_object * + _mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name) + { + if (name == 0) +@@ -206,9 +206,19 @@ _mesa_DeleteSamplers(GLsizei count, const GLuint *samplers) + + for (i = 0; i < count; i++) { + if (samplers[i]) { ++ GLuint j; + struct gl_sampler_object *sampObj = + _mesa_lookup_samplerobj(ctx, samplers[i]); ++ + if (sampObj) { ++ /* If the sampler is currently bound, unbind it. */ ++ for (j = 0; j < ctx->Const.MaxCombinedTextureImageUnits; j++) { ++ if (ctx->Texture.Unit[j].Sampler == sampObj) { ++ FLUSH_VERTICES(ctx, _NEW_TEXTURE); ++ _mesa_reference_sampler_object(ctx, &ctx->Texture.Unit[j].Sampler, NULL); ++ } ++ } ++ + /* The ID is immediately freed for re-use */ + _mesa_HashRemove(ctx->Shared->SamplerObjects, samplers[i]); + /* But the object exists until its reference count goes to zero */ +diff --git a/src/mesa/main/samplerobj.h b/src/mesa/main/samplerobj.h +index 3114257..69e3899 100644 +--- a/src/mesa/main/samplerobj.h ++++ b/src/mesa/main/samplerobj.h +@@ -62,6 +62,8 @@ _mesa_reference_sampler_object(struct gl_context *ctx, + _mesa_reference_sampler_object_(ctx, ptr, samp); + } + ++extern struct gl_sampler_object * ++_mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name); + + extern struct gl_sampler_object * + _mesa_new_sampler_object(struct gl_context *ctx, GLuint name); +diff --git a/src/mesa/main/tests/hash_table/Makefile.am b/src/mesa/main/tests/hash_table/Makefile.am +index 272c63a..f63841d 100644 +--- a/src/mesa/main/tests/hash_table/Makefile.am ++++ b/src/mesa/main/tests/hash_table/Makefile.am +@@ -19,6 +19,7 @@ + # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + AM_CPPFLAGS = \ ++ -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/mesa/main \ + $(API_DEFINES) $(DEFINES) $(INCLUDE_DIRS) + +diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c +index 7299a4b..74b09ef 100644 +--- a/src/mesa/main/texgetimage.c ++++ b/src/mesa/main/texgetimage.c +@@ -518,6 +518,7 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, + if (type_needs_clamping(type)) { + /* the returned image type can't have negative values */ + if (dataType == GL_FLOAT || ++ dataType == GL_HALF_FLOAT || + dataType == GL_SIGNED_NORMALIZED || + format == GL_LUMINANCE || + format == GL_LUMINANCE_ALPHA) { +diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c +index 1b9525b..1b91b89 100644 +--- a/src/mesa/main/teximage.c ++++ b/src/mesa/main/teximage.c +@@ -1362,6 +1362,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target, + return GL_FALSE; + return GL_TRUE; + ++ case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: +@@ -3438,19 +3439,21 @@ copyteximage(struct gl_context *ctx, GLuint dims, + _mesa_init_teximage_fields(ctx, texImage, width, height, 1, + border, internalFormat, texFormat); + +- /* Allocate texture memory (no pixel data yet) */ +- ctx->Driver.AllocTextureImageBuffer(ctx, texImage); ++ if (width && height) { ++ /* Allocate texture memory (no pixel data yet) */ ++ ctx->Driver.AllocTextureImageBuffer(ctx, texImage); + +- if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, +- &width, &height)) { +- struct gl_renderbuffer *srcRb = +- get_copy_tex_image_source(ctx, texImage->TexFormat); ++ if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY, ++ &width, &height)) { ++ struct gl_renderbuffer *srcRb = ++ get_copy_tex_image_source(ctx, texImage->TexFormat); + +- ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ, +- srcRb, srcX, srcY, width, height); +- } ++ ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ, ++ srcRb, srcX, srcY, width, height); ++ } + +- check_gen_mipmap(ctx, target, texObj, level); ++ check_gen_mipmap(ctx, target, texObj, level); ++ } + + _mesa_update_fbo_texture(ctx, texObj, face, level); + +diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c +index 6f18ec6..dd67baa 100644 +--- a/src/mesa/main/texparam.c ++++ b/src/mesa/main/texparam.c +@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) + *params = (GLfloat) obj->Immutable; + break; + ++ case GL_TEXTURE_IMMUTABLE_LEVELS: ++ if (!_mesa_is_gles3(ctx)) ++ goto invalid_pname; ++ *params = (GLfloat) obj->ImmutableLevels; ++ break; ++ + case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES: + if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external) + goto invalid_pname; +@@ -1609,6 +1615,12 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params ) + *params = (GLint) obj->Immutable; + break; + ++ case GL_TEXTURE_IMMUTABLE_LEVELS: ++ if (!_mesa_is_gles3(ctx)) ++ goto invalid_pname; ++ *params = obj->ImmutableLevels; ++ break; ++ + case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES: + if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external) + goto invalid_pname; +diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c +index 00f19ba..675fd74 100644 +--- a/src/mesa/main/texstorage.c ++++ b/src/mesa/main/texstorage.c +@@ -397,6 +397,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat, + } + + texObj->Immutable = GL_TRUE; ++ texObj->ImmutableLevels = levels; + } + } + +diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c +index efb386e..f5b5c41 100644 +--- a/src/mesa/vbo/vbo_save_draw.c ++++ b/src/mesa/vbo/vbo_save_draw.c +@@ -253,7 +253,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) + struct vbo_save_context *save = &vbo_context(ctx)->save; + GLboolean remap_vertex_store = GL_FALSE; + +- if (save->vertex_store->buffer) { ++ if (save->vertex_store && save->vertex_store->buffer) { + /* The vertex store is currently mapped but we're about to replay + * a display list. This can happen when a nested display list is + * being build with GL_COMPILE_AND_EXECUTE. diff --git a/mesa.spec b/mesa.spec index cdf5126..7d9f01b 100644 --- a/mesa.spec +++ b/mesa.spec @@ -47,8 +47,8 @@ Summary: Mesa graphics libraries Name: mesa -Version: 9.1 -Release: 6%{?dist} +Version: 9.1.1 +Release: 1%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -64,7 +64,7 @@ Source3: make-git-snapshot.sh Source4: Mesa-MLAA-License-Clarification-Email.txt # git diff-tree -p mesa-9.1..origin/9.1 > `git describe origin/9.1`.patch -Patch0: mesa-9.1-53-gd0ccb5b.patch +Patch0: mesa-9.1.1-53-g3cff41c.patch Patch1: nv50-fix-build.patch Patch2: intel-revert-gl3.patch @@ -592,6 +592,9 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Sat Apr 27 2013 Dave Airlie 9.1.1-1 +- rebase to Mesa 9.1.1 + fixes from git + * Thu Apr 11 2013 Dave Airlie 9.1-6 - enable glx tls for glamor to work properly diff --git a/sources b/sources index 9dba37c..7ecd241 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -d3891e02215422e120271d976ff1947e MesaLib-9.1.tar.bz2 +6ea2bdc3b7ecfb4257b39814b4182580 MesaLib-9.1.1.tar.bz2