diff --git a/configure.ac b/configure.ac
index 4a98996..1c9d606 100644
--- a/configure.ac
+++ b/configure.ac
@@ -452,6 +452,9 @@ if test "x$enable_asm" = xyes; then
         linux* | *freebsd* | dragonfly* | *netbsd*)
             test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86
             ;;
+        gnu*)
+            asm_arch=x86
+            ;;
         esac
         ;;
     x86_64)
@@ -826,20 +829,6 @@ if test "x$enable_dri" = xyes; then
     fi
 fi
 
-dnl Find out if X is available.
-PKG_CHECK_MODULES([X11], [x11], [no_x=no], [no_x=yes])
-
-dnl Try to tell the user that the --x-* options are only used when
-dnl pkg-config is not available. This must be right after AC_PATH_XTRA.
-m4_divert_once([HELP_BEGIN],
-[These options are only used when the X libraries cannot be found by the
-pkg-config utility.])
-
-dnl We need X for xlib and dri, so bomb now if it's not found
-if test "x$enable_glx" = xyes -a "x$no_x" = xyes; then
-    AC_MSG_ERROR([X11 development libraries needed for GLX])
-fi
-
 dnl Direct rendering or just indirect rendering
 case "$host_os" in
 gnu*)
diff --git a/docs/relnotes-9.1.1.html b/docs/relnotes-9.1.1.html
index 8921c8f..a73c974 100644
--- a/docs/relnotes-9.1.1.html
+++ b/docs/relnotes-9.1.1.html
@@ -30,6 +30,9 @@ because GL_ARB_compatibility is not supported.
 
 <h2>MD5 checksums</h2>
 <pre>
+6508d9882d8dce7106717f365632700c  MesaLib-9.1.1.tar.gz
+6ea2bdc3b7ecfb4257b39814b4182580  MesaLib-9.1.1.tar.bz2
+3434c0eb47849a08c53cd32833d10d13  MesaLib-9.1.1.zip
 </pre>
 
 <h2>New features</h2>
diff --git a/include/c99_compat.h b/include/c99_compat.h
new file mode 100644
index 0000000..3a9f502
--- /dev/null
+++ b/include/c99_compat.h
@@ -0,0 +1,147 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _C99_COMPAT_H_
+#define _C99_COMPAT_H_
+
+
+/*
+ * MSVC hacks.
+ */
+#if defined(_MSC_VER)
+   /*
+    * Visual Studio 2012 will complain if we define the `inline` keyword, but
+    * actually it only supports the keyword on C++.
+    *
+    * We could skip this check by defining _ALLOW_KEYWORD_MACROS, but there is
+    * probably value in checking this for other keywords.  So simply include
+    * the checking before we define it below.
+    */
+#  if _MSC_VER >= 1700
+#    include <xkeycheck.h>
+#  endif
+
+   /*
+    * XXX: MSVC has a `__restrict` keyword, but it also has a
+    * `__declspec(restrict)` modifier, so it is impossible to define a
+    * `restrict` macro without interfering with the latter.  Furthermore the
+    * MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT
+    * macro.  For now resolve this issue by redefining _CRTRESTRICT, but going
+    * forward we should probably should stop using restrict, especially
+    * considering that our code does not obbey strict aliasing rules any way.
+    */
+#  include <crtdefs.h>
+#  undef _CRTRESTRICT
+#  define _CRTRESTRICT
+#endif
+
+
+/*
+ * C99 inline keyword
+ */
+#ifndef inline
+#  ifdef __cplusplus
+     /* C++ supports inline keyword */
+#  elif defined(__GNUC__)
+#    define inline __inline__
+#  elif defined(_MSC_VER)
+#    define inline __inline
+#  elif defined(__ICL)
+#    define inline __inline
+#  elif defined(__INTEL_COMPILER)
+     /* Intel compiler supports inline keyword */
+#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+#    define inline __inline
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 supports inline keyword */
+#  elif (__STDC_VERSION__ >= 199901L)
+     /* C99 supports inline keyword */
+#  else
+#    define inline
+#  endif
+#endif
+
+
+/*
+ * C99 restrict keyword
+ *
+ * See also:
+ * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
+ */
+#ifndef restrict
+#  if (__STDC_VERSION__ >= 199901L)
+     /* C99 */
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 */
+#  elif defined(__GNUC__)
+#    define restrict __restrict__
+#  elif defined(_MSC_VER)
+#    define restrict __restrict
+#  else
+#    define restrict /* */
+#  endif
+#endif
+
+
+/*
+ * C99 __func__ macro
+ */
+#ifndef __func__
+#  if (__STDC_VERSION__ >= 199901L)
+     /* C99 */
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 */
+#  elif defined(__GNUC__)
+#    if __GNUC__ >= 2
+#      define __func__ __FUNCTION__
+#    else
+#      define __func__ "<unknown>"
+#    endif
+#  elif defined(_MSC_VER)
+#    if _MSC_VER >= 1300
+#      define __func__ __FUNCTION__
+#    else
+#      define __func__ "<unknown>"
+#    endif
+#  else
+#    define __func__ "<unknown>"
+#  endif
+#endif
+
+
+/* Simple test case for debugging */
+#if 0
+static inline const char *
+test_c99_compat_h(const void * restrict a,
+                  const void * restrict b)
+{
+   return __func__;
+}
+#endif
+
+
+#endif /* _C99_COMPAT_H_ */
diff --git a/src/egl/main/eglcompiler.h b/src/egl/main/eglcompiler.h
index 9823693..2499172 100644
--- a/src/egl/main/eglcompiler.h
+++ b/src/egl/main/eglcompiler.h
@@ -31,6 +31,9 @@
 #define EGLCOMPILER_INCLUDED
 
 
+#include "c99_compat.h" /* inline, __func__, etc. */
+
+
 /**
  * Get standard integer types
  */
@@ -62,30 +65,7 @@
 #endif
 
 
-/**
- * Function inlining
- */
-#ifndef inline
-#  ifdef __cplusplus
-     /* C++ supports inline keyword */
-#  elif defined(__GNUC__)
-#    define inline __inline__
-#  elif defined(_MSC_VER)
-#    define inline __inline
-#  elif defined(__ICL)
-#    define inline __inline
-#  elif defined(__INTEL_COMPILER)
-     /* Intel compiler supports inline keyword */
-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#    define inline __inline
-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-     /* C99 supports inline keyword */
-#  elif (__STDC_VERSION__ >= 199901L)
-     /* C99 supports inline keyword */
-#  else
-#    define inline
-#  endif
-#endif
+/* XXX: Use standard `inline` keyword instead */
 #ifndef INLINE
 #  define INLINE inline
 #endif
@@ -104,21 +84,9 @@
 #  endif
 #endif
 
-/**
- * The __FUNCTION__ gcc variable is generally only used for debugging.
- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
- * Don't define it if using a newer Windows compiler.
- */
+/* XXX: Use standard `__func__` instead */
 #ifndef __FUNCTION__
-# if (!defined __GNUC__) && (!defined __xlC__) && \
-      (!defined(_MSC_VER) || _MSC_VER < 1300)
-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
-#   define __FUNCTION__ __func__
-#  else
-#   define __FUNCTION__ "<unknown>"
-#  endif
-# endif
+#  define __FUNCTION__ __func__
 #endif
 
 #endif /* EGLCOMPILER_INCLUDED */
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index a4eee47..f14279b 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -7,7 +7,10 @@ noinst_LTLIBRARIES = libgallium.la
 
 AM_CFLAGS = \
 	-I$(top_srcdir)/src/gallium/auxiliary/util \
-	$(GALLIUM_CFLAGS)
+	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS)
+
+AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
 
 libgallium_la_SOURCES = \
 	$(C_SOURCES) \
@@ -18,7 +21,7 @@ if HAVE_MESA_LLVM
 AM_CFLAGS += \
 	$(LLVM_CFLAGS)
 
-AM_CXXFLAGS = \
+AM_CXXFLAGS += \
 	$(GALLIUM_CFLAGS) \
 	$(LLVM_CXXFLAGS)
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 4898849..5fb4a11 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -240,6 +240,7 @@ struct lp_exec_mask {
    struct lp_build_context *bld;
 
    boolean has_mask;
+   boolean ret_in_main;
 
    LLVMTypeRef int_vec_type;
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0621fb4..413a918 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -73,6 +73,7 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
 
    mask->bld = bld;
    mask->has_mask = FALSE;
+   mask->ret_in_main = FALSE;
    mask->cond_stack_size = 0;
    mask->loop_stack_size = 0;
    mask->call_stack_size = 0;
@@ -108,7 +109,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
    } else
       mask->exec_mask = mask->cond_mask;
 
-   if (mask->call_stack_size) {
+   if (mask->call_stack_size || mask->ret_in_main) {
       mask->exec_mask = LLVMBuildAnd(builder,
                                      mask->exec_mask,
                                      mask->ret_mask,
@@ -117,7 +118,8 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask)
 
    mask->has_mask = (mask->cond_stack_size > 0 ||
                      mask->loop_stack_size > 0 ||
-                     mask->call_stack_size > 0);
+                     mask->call_stack_size > 0 ||
+                     mask->ret_in_main);
 }
 
 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
@@ -348,11 +350,23 @@ static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
    LLVMBuilderRef builder = mask->bld->gallivm->builder;
    LLVMValueRef exec_mask;
 
-   if (mask->call_stack_size == 0) {
+   if (mask->cond_stack_size == 0 &&
+       mask->loop_stack_size == 0 &&
+       mask->call_stack_size == 0) {
       /* returning from main() */
       *pc = -1;
       return;
    }
+
+   if (mask->call_stack_size == 0) {
+      /*
+       * This requires special handling since we need to ensure
+       * we don't drop the mask even if we have no call stack
+       * (e.g. after a ret in a if clause after the endif)
+       */
+      mask->ret_in_main = TRUE;
+   }
+
    exec_mask = LLVMBuildNot(builder,
                             mask->exec_mask,
                             "ret");
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 1267e79..dc3a5fb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -1569,7 +1569,7 @@ tgsi_text_translate(
    struct tgsi_token *tokens,
    uint num_tokens )
 {
-   struct translate_ctx ctx;
+   struct translate_ctx ctx = {0};
 
    ctx.text = text;
    ctx.cur = text;
diff --git a/src/gallium/drivers/Makefile.am b/src/gallium/drivers/Makefile.am
index 25d9533..3477fee 100644
--- a/src/gallium/drivers/Makefile.am
+++ b/src/gallium/drivers/Makefile.am
@@ -1,6 +1,7 @@
 AUTOMAKE_OPTIONS = subdir-objects
 
 AM_CPPFLAGS = \
+	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/gallium/include \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/gallium/drivers \
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 328c0f7..e145391 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -64,6 +64,28 @@ lp_scene_create( struct pipe_context *pipe )
 
    pipe_mutex_init(scene->mutex);
 
+#ifdef DEBUG
+   /* Do some scene limit sanity checks here */
+   {
+      size_t maxBins = TILES_X * TILES_Y;
+      size_t maxCommandBytes = sizeof(struct cmd_block) * maxBins;
+      size_t maxCommandPlusData = maxCommandBytes + DATA_BLOCK_SIZE;
+      /* We'll need at least one command block per bin.  Make sure that's
+       * less than the max allowed scene size.
+       */
+      assert(maxCommandBytes < LP_SCENE_MAX_SIZE);
+      /* We'll also need space for at least one other data block */
+      assert(maxCommandPlusData <= LP_SCENE_MAX_SIZE);
+
+      /* Ideally, the size of a cmd_block object will be a power of two
+       * in order to avoid wasting space when we allocation them from
+       * data blocks (which are power of two also).
+       */
+      assert(sizeof(struct cmd_block) ==
+             util_next_power_of_two(sizeof(struct cmd_block)));
+   }
+#endif
+
    return scene;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index b1db61b..801829d 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -49,12 +49,18 @@ struct lp_rast_state;
 #define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE)
 
 
-#define CMD_BLOCK_MAX 128
+/* Commands per command block (ideally so sizeof(cmd_block) is a power of
+ * two in size.)
+ */
+#define CMD_BLOCK_MAX 29
+
+/* Bytes per data block.
+ */
 #define DATA_BLOCK_SIZE (64 * 1024)
 
 /* Scene temporary storage is clamped to this size:
  */
-#define LP_SCENE_MAX_SIZE (4*1024*1024)
+#define LP_SCENE_MAX_SIZE (9*1024*1024)
 
 /* The maximum amount of texture storage referenced by a scene is
  * clamped ot this size:
diff --git a/src/gallium/drivers/nv50/nv50_blit.h b/src/gallium/drivers/nv50/nv50_blit.h
index d409f21..bdd6a63 100644
--- a/src/gallium/drivers/nv50/nv50_blit.h
+++ b/src/gallium/drivers/nv50/nv50_blit.h
@@ -180,4 +180,44 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
    return mask;
 }
 
+#if NOUVEAU_DRIVER == 0xc0
+# define nv50_format_table nvc0_format_table
+#endif
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nv50_2d_dst_format_faithful(enum pipe_format format)
+{
+   const uint64_t mask =
+       NV50_ENG2D_SUPPORTED_FORMATS &
+      ~NV50_ENG2D_NOCONVERT_FORMATS;
+   uint8_t id = nv50_format_table[format].rt;
+   return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+static INLINE boolean
+nv50_2d_src_format_faithful(enum pipe_format format)
+{
+   const uint64_t mask =
+      NV50_ENG2D_SUPPORTED_FORMATS &
+    ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS);
+   uint8_t id = nv50_format_table[format].rt;
+   return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_format_supported(enum pipe_format format)
+{
+   uint8_t id = nv50_format_table[format].rt;
+   return (id >= 0xc0) &&
+      (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_dst_format_ops_supported(enum pipe_format format)
+{
+   uint8_t id = nv50_format_table[format].rt;
+   return (id >= 0xc0) &&
+      (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0)));
+}
+
 #endif /* __NV50_BLIT_H__ */
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index a95e96d..f5e7b36 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -9,6 +9,7 @@ nv50_validate_fb(struct nv50_context *nv50)
    struct pipe_framebuffer_state *fb = &nv50->framebuffer;
    unsigned i;
    unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
+   uint32_t array_size = 0xffff, array_mode = 0;
 
    nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
 
@@ -23,6 +24,13 @@ nv50_validate_fb(struct nv50_context *nv50)
       struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
       struct nouveau_bo *bo = mt->base.bo;
 
+      array_size = MIN2(array_size, sf->depth);
+      if (mt->layout_3d)
+         array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */
+
+      /* can't mix 3D with ARRAY or have RTs of different depth/array_size */
+      assert(mt->layout_3d || !array_mode || array_size == 1);
+
       BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
       PUSH_DATAh(push, bo->offset + sf->offset);
       PUSH_DATA (push, bo->offset + sf->offset);
@@ -34,7 +42,7 @@ nv50_validate_fb(struct nv50_context *nv50)
          PUSH_DATA (push, sf->width);
          PUSH_DATA (push, sf->height);
          BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
-         PUSH_DATA (push, sf->depth);
+         PUSH_DATA (push, array_mode | array_size);
       } else {
          PUSH_DATA (push, 0);
          PUSH_DATA (push, 0);
@@ -63,7 +71,7 @@ nv50_validate_fb(struct nv50_context *nv50)
       struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
       struct nv50_surface *sf = nv50_surface(fb->zsbuf);
       struct nouveau_bo *bo = mt->base.bo;
-      int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+      int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;
 
       BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
       PUSH_DATAh(push, bo->offset + sf->offset);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 7a0470c..3a780f6 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -35,25 +35,22 @@
 
 #include "nv50_context.h"
 #include "nv50_resource.h"
-#include "nv50_blit.h"
 
 #include "nv50_defs.xml.h"
 #include "nv50_texture.xml.h"
 
+/* these are used in nv50_blit.h */
 #define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL
 
-/* return TRUE for formats that can be converted among each other by NV50_2D */
-static INLINE boolean
-nv50_2d_format_faithful(enum pipe_format format)
-{
-   uint8_t id = nv50_format_table[format].rt;
-
-   return (id >= 0xc0) &&
-      (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
-}
+#define NOUVEAU_DRIVER 0x50
+#include "nv50_blit.h"
 
 static INLINE uint8_t
-nv50_2d_format(enum pipe_format format)
+nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
 {
    uint8_t id = nv50_format_table[format].rt;
 
@@ -62,6 +59,7 @@ nv50_2d_format(enum pipe_format format)
     */
    if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))))
       return id;
+   assert(dst_src_equal);
 
    switch (util_format_get_blocksize(format)) {
    case 1:
@@ -78,7 +76,7 @@ nv50_2d_format(enum pipe_format format)
 static int
 nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
                     struct nv50_miptree *mt, unsigned level, unsigned layer,
-                    enum pipe_format pformat)
+                    enum pipe_format pformat, boolean dst_src_pformat_equal)
 {
    struct nouveau_bo *bo = mt->base.bo;
    uint32_t width, height, depth;
@@ -86,7 +84,7 @@ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
    uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
    uint32_t offset = mt->level[level].offset;
 
-   format = nv50_2d_format(pformat);
+   format = nv50_2d_format(pformat, dst, dst_src_pformat_equal);
    if (!format) {
       NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
                   util_format_name(pformat));
@@ -155,15 +153,16 @@ nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
    const enum pipe_format dfmt = dst->base.base.format;
    const enum pipe_format sfmt = src->base.base.format;
    int ret;
+   boolean eqfmt = dfmt == sfmt;
 
    if (!PUSH_SPACE(push, 2 * 16 + 32))
       return PIPE_ERROR;
 
-   ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt);
+   ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt);
    if (ret)
       return ret;
 
-   ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt);
+   ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt);
    if (ret)
       return ret;
 
@@ -243,8 +242,8 @@ nv50_resource_copy_region(struct pipe_context *pipe,
    }
 
    assert((src->format == dst->format) ||
-          (nv50_2d_format_faithful(src->format) &&
-           nv50_2d_format_faithful(dst->format)));
+          (nv50_2d_src_format_faithful(src->format) &&
+           nv50_2d_dst_format_faithful(dst->format)));
 
    BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD);
    BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR);
@@ -936,7 +935,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
    nv50_blit_select_fp(blit, info);
    nv50_blitctx_pre_blit(blit);
 
-   nv50_blit_set_dst(blit, dst, info->dst.level,  0, info->dst.format);
+   nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
    nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format,
                      blit->filter);
 
@@ -977,6 +976,8 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
 
    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
    PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+   PUSH_DATA (push, 0x1);
 
    /* Draw a large triangle in screen coordinates covering the whole
     * render target, with scissors defining the destination region.
@@ -1059,7 +1060,8 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
    int64_t du_dx, dv_dy;
    int i;
    uint32_t mode;
-   const uint32_t mask = nv50_blit_eng2d_get_mask(info);
+   uint32_t mask = nv50_blit_eng2d_get_mask(info);
+   boolean b;
 
    mode = nv50_blit_get_filter(info) ?
       NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1070,8 +1072,9 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
    du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
    dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
 
-   nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format);
-   nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format);
+   b = info->dst.format == info->src.format;
+   nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+   nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
 
    if (info->scissor_enable) {
       BEGIN_NV04(push, NV50_2D(CLIP_X), 5);
@@ -1094,6 +1097,17 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
       PUSH_DATA (push, 0xffffffff);
       BEGIN_NV04(push, NV50_2D(OPERATION), 1);
       PUSH_DATA (push, NV50_2D_OPERATION_ROP);
+   } else
+   if (info->src.format != info->dst.format) {
+      if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+          info->src.format == PIPE_FORMAT_R16_UNORM ||
+          info->src.format == PIPE_FORMAT_R16_FLOAT ||
+          info->src.format == PIPE_FORMAT_R32_FLOAT) {
+         mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+         BEGIN_NV04(push, NV50_2D(BETA4), 2);
+         PUSH_DATA (push, mask);
+         PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
+      }
    }
 
    if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
@@ -1224,10 +1238,25 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
       debug_printf("blit: cannot filter array or cube textures in z direction");
    }
 
-   if (!eng3d && info->dst.format != info->src.format)
-      if (!nv50_2d_format_faithful(info->dst.format) ||
-          !nv50_2d_format_faithful(info->src.format))
+   if (!eng3d && info->dst.format != info->src.format) {
+      if (!nv50_2d_dst_format_faithful(info->dst.format) ||
+          !nv50_2d_src_format_faithful(info->src.format)) {
          eng3d = TRUE;
+      } else
+      if (!nv50_2d_src_format_faithful(info->src.format)) {
+         if (!util_format_is_luminance(info->src.format)) {
+            if (util_format_is_intensity(info->src.format))
+               eng3d = TRUE;
+            else
+            if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+               eng3d = TRUE;
+            else
+               eng3d = !nv50_2d_format_supported(info->src.format);
+         }
+      } else
+      if (util_format_is_luminance_alpha(info->src.format))
+         eng3d = TRUE;
+   }
 
    if (info->src.resource->nr_samples == 8 &&
        info->dst.resource->nr_samples <= 1)
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index 1cf1f96..bd3de58 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -1041,7 +1041,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVC0_3D_VIEWPORT_TRANSFORM_EN				0x0000192c
 
 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL				0x0000193c
-#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK0			0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1		0x00000001
 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK		0x00000006
 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT		1
 #define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0			0x00000000
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 281d740..66154a4 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -36,29 +36,32 @@
 
 #include "nv50/nv50_defs.xml.h"
 #include "nv50/nv50_texture.xml.h"
-#include "nv50/nv50_blit.h"
 
-#define NVC0_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+/* these are used in nv50_blit.h */
+#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL
 
-/* return TRUE for formats that can be converted among each other by NVC0_2D */
-static INLINE boolean
-nvc0_2d_format_faithful(enum pipe_format format)
-{
-   uint8_t id = nvc0_format_table[format].rt;
-
-   return (id >= 0xc0) && (NVC0_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
-}
+#define NOUVEAU_DRIVER 0xc0
+#include "nv50/nv50_blit.h"
 
 static INLINE uint8_t
-nvc0_2d_format(enum pipe_format format)
+nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
 {
    uint8_t id = nvc0_format_table[format].rt;
 
+   /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
+   if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
+      return NV50_SURFACE_FORMAT_A8_UNORM;
+
    /* Hardware values for color formats range from 0xc0 to 0xff,
     * but the 2D engine doesn't support all of them.
     */
-   if (nvc0_2d_format_faithful(format))
+   if (nv50_2d_format_supported(format))
       return id;
+   assert(dst_src_equal);
 
    switch (util_format_get_blocksize(format)) {
    case 1:
@@ -72,6 +75,7 @@ nvc0_2d_format(enum pipe_format format)
    case 16:
       return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
    default:
+      assert(0);
       return 0;
    }
 }
@@ -79,7 +83,7 @@ nvc0_2d_format(enum pipe_format format)
 static int
 nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
                     struct nv50_miptree *mt, unsigned level, unsigned layer,
-                    enum pipe_format pformat)
+                    enum pipe_format pformat, boolean dst_src_pformat_equal)
 {
    struct nouveau_bo *bo = mt->base.bo;
    uint32_t width, height, depth;
@@ -87,7 +91,7 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
    uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
    uint32_t offset = mt->level[level].offset;
 
-   format = nvc0_2d_format(pformat);
+   format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal);
    if (!format) {
       NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
                   util_format_name(pformat));
@@ -157,15 +161,16 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
    const enum pipe_format dfmt = dst->base.base.format;
    const enum pipe_format sfmt = src->base.base.format;
    int ret;
+   boolean eqfmt = dfmt == sfmt;
 
    if (!PUSH_SPACE(push, 2 * 16 + 32))
       return PIPE_ERROR;
 
-   ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt);
+   ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
    if (ret)
       return ret;
 
-   ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt);
+   ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
    if (ret)
       return ret;
 
@@ -243,8 +248,8 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
       return;
    }
 
-   assert(nvc0_2d_format_faithful(src->format));
-   assert(nvc0_2d_format_faithful(dst->format));
+   assert(nv50_2d_dst_format_faithful(dst->format));
+   assert(nv50_2d_src_format_faithful(src->format));
 
    BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD);
    BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR);
@@ -490,19 +495,19 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
 {
    static const uint32_t code_nvc0[] =
    {
-      0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */
-      0xfff11c26, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/
-      0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */
-      0x13f01c26, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */
+      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
       0x00001de7, 0x80000000, /* exit */
    };
    static const uint32_t code_nve4[] =
    {
       0x00000007, 0x20000000, /* sched */
-      0xfff01c66, 0x06000080, /* vfetch b128 { $r0 $r1 $r2 $r3 } a[0x80] */
-      0xfff11c46, 0x06000090, /* vfetch b96 { $r4 $r5 $r6 } a[0x90]*/
-      0x03f01c66, 0x0a7e0070, /* export b128 o[0x70] { $r0 $r1 $r2 $r3 } */
-      0x13f01c46, 0x0a7e0080, /* export b96 o[0x80] { $r4 $r5 $r6 } */
+      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
       0x00001de7, 0x80000000, /* exit */
    };
 
@@ -515,13 +520,13 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
       blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
       blit->vp.code_size = sizeof(code_nvc0);
    }
-   blit->vp.max_gpr = 7;
+   blit->vp.max_gpr = 6;
    blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
 
    blit->vp.hdr[0]  = 0x00020461; /* vertprog magic */
    blit->vp.hdr[4]  = 0x000ff000; /* no outputs read */
-   blit->vp.hdr[6]  = 0x0000003f; /* a[0x80], a[0x90] */
-   blit->vp.hdr[13] = 0x0003f000; /* o[0x70], o[0x80] */
+   blit->vp.hdr[6]  = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
+   blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
 }
 
 static void
@@ -820,7 +825,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    nvc0_blit_select_fp(blit, info);
    nvc0_blitctx_pre_blit(blit);
 
-   nvc0_blit_set_dst(blit, dst, info->dst.level,  0, info->dst.format);
+   nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
    nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format,
                      blit->filter);
 
@@ -859,6 +864,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
       z += 0.5f * dz;
 
    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
+   IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
+              NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
    BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
    PUSH_DATA (push, nvc0->framebuffer.width << 16);
    PUSH_DATA (push, nvc0->framebuffer.height << 16);
@@ -925,11 +932,14 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    if (info->dst.box.z + info->dst.box.depth - 1)
       IMMED_NVC0(push, NVC0_3D(LAYER), 0);
 
-   /* re-enable normally constant state */
+   nvc0_blitctx_post_blit(blit);
 
-   IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+   /* restore viewport */
 
-   nvc0_blitctx_post_blit(blit);
+   BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+   PUSH_DATA (push, nvc0->framebuffer.width << 16);
+   PUSH_DATA (push, nvc0->framebuffer.height << 16);
+   IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
 }
 
 static void
@@ -948,7 +958,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    int64_t du_dx, dv_dy;
    int i;
    uint32_t mode;
-   const uint32_t mask = nv50_blit_eng2d_get_mask(info);
+   uint32_t mask = nv50_blit_eng2d_get_mask(info);
+   boolean b;
 
    mode = nv50_blit_get_filter(info) ?
       NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -959,8 +970,9 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
    du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
    dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
 
-   nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format);
-   nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format);
+   b = info->dst.format == info->src.format;
+   nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+   nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
 
    if (info->scissor_enable) {
       BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5);
@@ -981,6 +993,25 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
       PUSH_DATA (push, 0xffffffff);
       PUSH_DATA (push, 0xffffffff);
       IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP);
+   } else
+   if (info->src.format != info->dst.format) {
+      if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+          info->src.format == PIPE_FORMAT_R8_SNORM ||
+          info->src.format == PIPE_FORMAT_R16_UNORM ||
+          info->src.format == PIPE_FORMAT_R16_SNORM ||
+          info->src.format == PIPE_FORMAT_R16_FLOAT ||
+          info->src.format == PIPE_FORMAT_R32_FLOAT) {
+         mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+         BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+         PUSH_DATA (push, mask);
+         PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+      } else
+      if (info->src.format == PIPE_FORMAT_A8_UNORM) {
+         mask = 0xff000000;
+         BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+         PUSH_DATA (push, mask);
+         PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+      }
    }
 
    if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
@@ -1106,10 +1137,24 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
       debug_printf("blit: cannot filter array or cube textures in z direction");
    }
 
-   if (!eng3d && info->dst.format != info->src.format)
-      if (!nvc0_2d_format_faithful(info->dst.format) ||
-          !nvc0_2d_format_faithful(info->src.format))
+   if (!eng3d && info->dst.format != info->src.format) {
+      if (!nv50_2d_dst_format_faithful(info->dst.format)) {
+         eng3d = TRUE;
+      } else
+      if (!nv50_2d_src_format_faithful(info->src.format)) {
+         if (!util_format_is_luminance(info->src.format)) {
+            if (util_format_is_intensity(info->src.format))
+               eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
+            else
+            if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+               eng3d = TRUE;
+            else
+               eng3d = !nv50_2d_format_supported(info->src.format);
+         }
+      } else
+      if (util_format_is_luminance_alpha(info->src.format))
          eng3d = TRUE;
+   }
 
    if (info->src.resource->nr_samples == 8 &&
        info->dst.resource->nr_samples <= 1)
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
index 734c7f2..74afd6f 100644
--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -708,6 +708,7 @@ static int peephole_mul_omod(
 	struct rc_list * writer_list;
 	struct rc_variable * var;
 	struct peephole_mul_cb_data cb_data;
+	unsigned writemask_sum;
 
 	for (i = 0; i < 2; i++) {
 		unsigned int j;
@@ -815,10 +816,11 @@ static int peephole_mul_omod(
 	}
 
 	/* Rewrite the instructions */
+	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
 	for (var = writer_list->Item; var; var = var->Friend) {
 		struct rc_variable * writer = var;
 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
-					writer->Inst->U.I.DstReg.WriteMask,
+					writemask_sum,
 					inst_mul->U.I.DstReg.WriteMask);
 		writer->Inst->U.I.Omod = omod_op;
 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a7973a5..80b859f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -1157,7 +1157,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 	 * case were triggering lockup quickly such as :
 	 * piglit/bin/depthstencil-render-miplevels 1024 d=s=z24_s8
 	 */
-	rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", TRUE);
+	rscreen->use_hyperz = debug_get_bool_option("R600_HYPERZ", FALSE);
 	rscreen->use_hyperz = rscreen->info.drm_minor >= 26 ? rscreen->use_hyperz : FALSE;
 
 	rscreen->global_pool = compute_memory_pool_new(rscreen);
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 0335189..782ad26 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -186,10 +186,11 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+		va += query->buffer.results_end + query->result_size/2;
 		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
-		cs->buf[cs->cdw++] = query->buffer.results_end + query->result_size/2;
-		cs->buf[cs->cdw++] = 0;
+		cs->buf[cs->cdw++] = va;
+		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
diff --git a/src/gallium/drivers/radeon/Makefile.am b/src/gallium/drivers/radeon/Makefile.am
index e6eb241..a3a7b74 100644
--- a/src/gallium/drivers/radeon/Makefile.am
+++ b/src/gallium/drivers/radeon/Makefile.am
@@ -1,11 +1,14 @@
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
+LIBGALLIUM_LIBS=
+
 if HAVE_GALLIUM_R600
 if HAVE_GALLIUM_RADEONSI
 lib_LTLIBRARIES = libllvmradeon@VERSION@.la
 libllvmradeon@VERSION@_la_LDFLAGS = -Wl, -shared -avoid-version \
 	$(LLVM_LDFLAGS)
+LIBGALLIUM_LIBS += $(top_builddir)/src/gallium/auxiliary/libgallium.la
 else
 noinst_LTLIBRARIES = libllvmradeon@VERSION@.la
 endif
@@ -26,5 +29,6 @@ libllvmradeon@VERSION@_la_SOURCES = \
 	$(C_FILES)
 
 libllvmradeon@VERSION@_la_LIBADD = \
+	$(LIBGALLIUM_LIBS) \
 	$(CLOCK_LIB) \
 	$(LLVM_LIBS)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8c35625..93766a3 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -401,6 +401,11 @@ static void si_update_derived_state(struct r600_context *rctx)
 	}
 
 	if (si_pm4_state_changed(rctx, ps) || si_pm4_state_changed(rctx, vs)) {
+		/* XXX: Emitting the PS state even when only the VS changed
+		 * fixes random failures with piglit glsl-max-varyings.
+		 * Not sure why...
+		 */
+		rctx->emitted.named.ps = NULL;
 		si_update_spi_map(rctx);
 	}
 }
diff --git a/src/gallium/drivers/rbug/Makefile.am b/src/gallium/drivers/rbug/Makefile.am
index 655bfe1..3c1a8b5 100644
--- a/src/gallium/drivers/rbug/Makefile.am
+++ b/src/gallium/drivers/rbug/Makefile.am
@@ -30,6 +30,7 @@ noinst_LTLIBRARIES = librbug.la
 # preprocessor is determined by the ordering of the -I flags.
 AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS) \
 	-I$(top_srcdir)/src/gallium/drivers \
 	-I$(top_srcdir)/include
 
diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am
index fdaa3c8..7eacd90 100644
--- a/src/gallium/drivers/svga/Makefile.am
+++ b/src/gallium/drivers/svga/Makefile.am
@@ -29,6 +29,8 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	$(GALLIUM_CFLAGS)
 
+AM_CFLAGS = $(VISIBILITY_CFLAGS)
+
 #On some systems -std= must be added to CFLAGS to be the last -std=
 CFLAGS += -std=gnu99
 
diff --git a/src/gallium/drivers/trace/Makefile.am b/src/gallium/drivers/trace/Makefile.am
index a9e1457..984ead4 100644
--- a/src/gallium/drivers/trace/Makefile.am
+++ b/src/gallium/drivers/trace/Makefile.am
@@ -1,7 +1,8 @@
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CFLAGS = \
-	$(GALLIUM_CFLAGS)
+	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS)
 
 noinst_LTLIBRARIES = libtrace.la
 
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 5958333..a131969 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -29,6 +29,8 @@
 #define P_COMPILER_H
 
 
+#include "c99_compat.h" /* inline, __func__, etc. */
+
 #include "p_config.h"
 
 #include <stdlib.h>
@@ -90,28 +92,7 @@ typedef unsigned char boolean;
 #endif
 #endif
 
-/* Function inlining */
-#ifndef inline
-#  ifdef __cplusplus
-     /* C++ supports inline keyword */
-#  elif defined(__GNUC__)
-#    define inline __inline__
-#  elif defined(_MSC_VER)
-#    define inline __inline
-#  elif defined(__ICL)
-#    define inline __inline
-#  elif defined(__INTEL_COMPILER)
-     /* Intel compiler supports inline keyword */
-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#    define inline __inline
-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-     /* C99 supports inline keyword */
-#  elif (__STDC_VERSION__ >= 199901L)
-     /* C99 supports inline keyword */
-#  else
-#    define inline
-#  endif
-#endif
+/* XXX: Use standard `inline` keyword instead */
 #ifndef INLINE
 #  define INLINE inline
 #endif
@@ -127,26 +108,6 @@ typedef unsigned char boolean;
 #  endif
 #endif
 
-/*
- * Define the C99 restrict keyword.
- *
- * See also:
- * - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
- */
-#ifndef restrict
-#  if (__STDC_VERSION__ >= 199901L)
-     /* C99 */
-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-     /* C99 */
-#  elif defined(__GNUC__)
-#    define restrict __restrict__
-#  elif defined(_MSC_VER)
-#    define restrict __restrict
-#  else
-#    define restrict /* */
-#  endif
-#endif
-
 
 /* Function visibility */
 #ifndef PUBLIC
@@ -160,35 +121,10 @@ typedef unsigned char boolean;
 #endif
 
 
-/* The __FUNCTION__ gcc variable is generally only used for debugging.
- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
- */
+/* XXX: Use standard `__func__` instead */
 #ifndef __FUNCTION__
-# if !defined(__GNUC__)
-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
-#   define __FUNCTION__ __func__
-#  else
-#   define __FUNCTION__ "<unknown>"
-#  endif
-# endif
-# if defined(_MSC_VER) && _MSC_VER < 1300
-#  define __FUNCTION__ "<unknown>"
-# endif
+#  define __FUNCTION__ __func__
 #endif
-#ifndef __func__
-#  if (__STDC_VERSION__ >= 199901L) || \
-      (defined(__SUNPRO_C) && defined(__C99FEATURES__))
-       /* __func__ is part of C99 */
-#  elif defined(_MSC_VER)
-#    if _MSC_VER >= 1300
-#      define __func__ __FUNCTION__
-#    else
-#      define __func__ "<unknown>"
-#    endif
-#  endif
-#endif
-
 
 
 /* This should match linux gcc cdecl semantics everywhere, so that we
diff --git a/src/gallium/state_trackers/egl/Makefile.am b/src/gallium/state_trackers/egl/Makefile.am
index e19e9a3..f78b36e 100644
--- a/src/gallium/state_trackers/egl/Makefile.am
+++ b/src/gallium/state_trackers/egl/Makefile.am
@@ -27,7 +27,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
 AM_CFLAGS = $(GALLIUM_CFLAGS)
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/egl/main \
-	-I$(top_srcdir)/src/egl/wayland/wayland-drm/ \
+	-I$(top_builddir)/src/egl/wayland/wayland-drm/ \
 	-I$(top_srcdir)/include
 
 noinst_LTLIBRARIES = libegl.la
diff --git a/src/gallium/state_trackers/xa/Makefile.am b/src/gallium/state_trackers/xa/Makefile.am
index 5b53ef9..57d55c4 100644
--- a/src/gallium/state_trackers/xa/Makefile.am
+++ b/src/gallium/state_trackers/xa/Makefile.am
@@ -24,7 +24,9 @@ include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CFLAGS = \
 	-Wall -pedantic \
-	$(GALLIUM_CFLAGS)
+	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS)
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gallium/ \
 	-I$(top_srcdir)/src/gallium/winsys \
diff --git a/src/gallium/winsys/svga/drm/Makefile.am b/src/gallium/winsys/svga/drm/Makefile.am
index 53bbcc2..d7ada3c 100644
--- a/src/gallium/winsys/svga/drm/Makefile.am
+++ b/src/gallium/winsys/svga/drm/Makefile.am
@@ -31,6 +31,8 @@ AM_CPPFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(LIBDRM_CFLAGS)
 
+AM_CFLAGS = $(VISIBILITY_CFLAGS)
+
 #On some systems -std= must be added to CFLAGS to be the last -std=
 CFLAGS += -std=gnu99 -D_FILE_OFFSET_BITS=64
 
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index 02d85b8..dee9709 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -446,6 +446,8 @@ const glsl_type *glsl_type::get_scalar_type() const
       return int_type;
    case GLSL_TYPE_FLOAT:
       return float_type;
+   case GLSL_TYPE_BOOL:
+      return bool_type;
    default:
       /* Handle everything else */
       return type;
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index d8cafd5..78ce13e 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -695,6 +695,11 @@ check_node_type(ir_instruction *ir, void *data)
 void
 validate_ir_tree(exec_list *instructions)
 {
+   /* We shouldn't have any reason to validate IR in a release build,
+    * and it's half composed of assert()s anyway which wouldn't do
+    * anything.
+    */
+#ifdef DEBUG
    ir_validate v;
 
    v.run(instructions);
@@ -704,4 +709,5 @@ validate_ir_tree(exec_list *instructions)
 
       visit_tree(ir, check_node_type, NULL);
    }
+#endif
 }
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 57e7a9a..cf0420c 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1067,13 +1067,11 @@ link_intrastage_shaders(void *mem_ctx,
 
    free(linking_shaders);
 
-#ifdef DEBUG
    /* At this point linked should contain all of the linked IR, so
     * validate it to make sure nothing went wrong.
     */
    if (linked)
       validate_ir_tree(linked->ir);
-#endif
 
    /* Make a pass over all variable declarations to ensure that arrays with
     * unspecified sizes have a size specified.  The size is inferred from the
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index 4aa900a..f01709b 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -39,6 +39,7 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/mapi/glapi \
 	-I$(top_builddir)/src/mapi \
 	-I$(top_builddir)/src/mapi/glapi \
+	$(VISIBILITY_CFLAGS) \
 	$(SHARED_GLAPI_CFLAGS) \
 	$(EXTRA_DEFINES_XF86VIDMODE) \
 	-D_REENTRANT \
diff --git a/src/mapi/glapi/gen/gl_x86-64_asm.py b/src/mapi/glapi/gen/gl_x86-64_asm.py
index a3548c2..19e0e15 100644
--- a/src/mapi/glapi/gen/gl_x86-64_asm.py
+++ b/src/mapi/glapi/gen/gl_x86-64_asm.py
@@ -181,19 +181,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
 
     def printRealFooter(self):
         print ''
-        print '#if defined(GLX_USE_TLS) && defined(__linux__)'
-        print '	.section ".note.ABI-tag", "a"'
-        print '	.p2align 2'
-        print '	.long	1f - 0f   /* name length */'
-        print '	.long	3f - 2f   /* data length */'
-        print '	.long	1         /* note length */'
-        print '0:	.asciz "GNU"      /* vendor name */'
-        print '1:	.p2align 2'
-        print '2:	.long	0         /* note data: the ABI tag */'
-        print '	.long	2,4,20    /* Minimum kernel version w/TLS */'
-        print '3:	.p2align 2        /* pad out section */'
-        print '#endif /* GLX_USE_TLS */'
-        print ''
         print '#if defined (__ELF__) && defined (__linux__)'
         print '	.section .note.GNU-stack,"",%progbits'
         print '#endif'
diff --git a/src/mapi/glapi/gen/gl_x86_asm.py b/src/mapi/glapi/gen/gl_x86_asm.py
index 8b0f6ee..919bbc0 100644
--- a/src/mapi/glapi/gen/gl_x86_asm.py
+++ b/src/mapi/glapi/gen/gl_x86_asm.py
@@ -189,19 +189,6 @@ class PrintGenericStubs(gl_XML.gl_print_base):
         print '\t\tALIGNTEXT16'
         print 'GLNAME(gl_dispatch_functions_end):'
         print ''
-        print '#if defined(GLX_USE_TLS) && defined(__linux__)'
-        print '	.section ".note.ABI-tag", "a"'
-        print '	.p2align 2'
-        print '	.long	1f - 0f   /* name length */'
-        print '	.long	3f - 2f   /* data length */'
-        print '	.long	1         /* note length */'
-        print '0:	.asciz "GNU"      /* vendor name */'
-        print '1:	.p2align 2'
-        print '2:	.long	0         /* note data: the ABI tag */'
-        print '	.long	2,4,20    /* Minimum kernel version w/TLS */'
-        print '3:	.p2align 2        /* pad out section */'
-        print '#endif /* GLX_USE_TLS */'
-        print ''
         print '#if defined (__ELF__) && defined (__linux__)'
         print '	.section .note.GNU-stack,"",%progbits'
         print '#endif'
diff --git a/src/mapi/mapi/entry_x86-64_tls.h b/src/mapi/mapi/entry_x86-64_tls.h
index 72d4125..36cad00 100644
--- a/src/mapi/mapi/entry_x86-64_tls.h
+++ b/src/mapi/mapi/entry_x86-64_tls.h
@@ -28,19 +28,6 @@
 
 #include "u_macros.h"
 
-#ifdef __linux__
-__asm__(".section .note.ABI-tag, \"a\"\n\t"
-        ".p2align 2\n\t"
-        ".long 1f - 0f\n\t"      /* name length */
-        ".long 3f - 2f\n\t"      /* data length */
-        ".long 1\n\t"            /* note length */
-        "0: .asciz \"GNU\"\n\t"  /* vendor name */
-        "1: .p2align 2\n\t"
-        "2: .long 0\n\t"         /* note data: the ABI tag */
-        ".long 2,4,20\n\t"       /* Minimum kernel version w/TLS */
-        "3: .p2align 2\n\t");    /* pad out section */
-#endif /* __linux__ */
-
 __asm__(".text\n"
         ".balign 32\n"
         "x86_64_entry_start:");
diff --git a/src/mapi/mapi/entry_x86_tls.h b/src/mapi/mapi/entry_x86_tls.h
index de91812..58d09ca 100644
--- a/src/mapi/mapi/entry_x86_tls.h
+++ b/src/mapi/mapi/entry_x86_tls.h
@@ -29,19 +29,6 @@
 #include <string.h>
 #include "u_macros.h"
 
-#ifdef __linux__
-__asm__(".section .note.ABI-tag, \"a\"\n\t"
-        ".p2align 2\n\t"
-        ".long 1f - 0f\n\t"      /* name length */
-        ".long 3f - 2f\n\t"      /* data length */
-        ".long 1\n\t"            /* note length */
-        "0: .asciz \"GNU\"\n\t"  /* vendor name */
-        "1: .p2align 2\n\t"
-        "2: .long 0\n\t"         /* note data: the ABI tag */
-        ".long 2,4,20\n\t"       /* Minimum kernel version w/TLS */
-        "3: .p2align 2\n\t");    /* pad out section */
-#endif /* __linux__ */
-
 __asm__(".text");
 
 __asm__("x86_current_tls:\n\t"
diff --git a/src/mapi/mapi/u_compiler.h b/src/mapi/mapi/u_compiler.h
index 2b019ed..f376e97 100644
--- a/src/mapi/mapi/u_compiler.h
+++ b/src/mapi/mapi/u_compiler.h
@@ -1,28 +1,10 @@
 #ifndef _U_COMPILER_H_
 #define _U_COMPILER_H_
 
-/* Function inlining */
-#ifndef inline
-#  ifdef __cplusplus
-     /* C++ supports inline keyword */
-#  elif defined(__GNUC__)
-#    define inline __inline__
-#  elif defined(_MSC_VER)
-#    define inline __inline
-#  elif defined(__ICL)
-#    define inline __inline
-#  elif defined(__INTEL_COMPILER)
-     /* Intel compiler supports inline keyword */
-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#    define inline __inline
-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-     /* C99 supports inline keyword */
-#  elif (__STDC_VERSION__ >= 199901L)
-     /* C99 supports inline keyword */
-#  else
-#    define inline
-#  endif
-#endif
+#include "c99_compat.h" /* inline, __func__, etc. */
+
+
+/* XXX: Use standard `inline` keyword instead */
 #ifndef INLINE
 #  define INLINE inline
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 53d8e54..cde1a06 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -40,6 +40,8 @@
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
 
+#include "brw_context.h"
+
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
 static const char *buffer_names[] = {
@@ -219,7 +221,8 @@ brw_fast_clear_depth(struct gl_context *ctx)
 static void
 brw_clear(struct gl_context *ctx, GLbitfield mask)
 {
-   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = &brw->intel;
 
    if (!_mesa_check_conditional_render(ctx))
       return;
@@ -229,6 +232,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    }
 
    intel_prepare_render(intel);
+   brw_workaround_depthstencil_alignment(brw);
 
    if (mask & BUFFER_BIT_DEPTH) {
       if (brw_fast_clear_depth(ctx)) {
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 79cc12f..4bcfb95 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -437,6 +437,7 @@
 #define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED            0x1B9
 #define BRW_SURFACEFORMAT_B10G10R10A2_UINT               0x1BA
 #define BRW_SURFACEFORMAT_B10G10R10A2_SINT               0x1BB
+#define BRW_SURFACEFORMAT_RAW                            0x1FF
 #define BRW_SURFACE_FORMAT_SHIFT	18
 #define BRW_SURFACE_FORMAT_MASK		INTEL_MASK(26, 18)
 
@@ -857,6 +858,7 @@ enum brw_message_target {
    GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
    GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
+   HSW_SFID_DATAPORT_DATA_CACHE_1    = 12,
 };
 
 #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
@@ -965,7 +967,44 @@ enum brw_message_target {
 
 /* GEN7 */
 #define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          10
+#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ                           0
+#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ                 1
+#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ                      2
 #define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ                       3
+#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ                        4
+#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ                       5
+#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP                          6
+#define GEN7_DATAPORT_DC_MEMORY_FENCE                               7
+#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE                          8
+#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE                     10
+#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE                      11
+#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE                       12
+#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE                      13
+
+/* HSW */
+#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ                      0
+#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ            1
+#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ                 2
+#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ                  3
+#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ                   4
+#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE                          7
+#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE                     8
+#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE                10
+#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE                 11
+#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE                  12
+
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ                  1
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP                     2
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2             3
+#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ                      4
+#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ                    5
+#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP                       6
+#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2               7
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE                 9
+#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE                     10
+#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP                     11
+#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2             12
+#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
 
 /* dataport atomic operations. */
 #define BRW_AOP_AND                   1
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b34754a..40cae37 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2539,15 +2539,22 @@ void brw_shader_time_add(struct brw_compile *p,
    brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
                                       base_mrf, 0));
 
+   uint32_t sfid, msg_type;
+   if (intel->is_haswell) {
+      sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
+      msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+   } else {
+      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+   }
+
    bool header_present = false;
    bool eot = false;
    uint32_t mlen = 2; /* offset, value */
    uint32_t rlen = 0;
-   brw_set_message_descriptor(p, send,
-                              GEN7_SFID_DATAPORT_DATA_CACHE,
-                              mlen, rlen, header_present, eot);
+   brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
 
-   send->bits3.ud |= 6 << 14; /* untyped atomic op */
+   send->bits3.ud |= msg_type << 14;
    send->bits3.ud |= 0 << 13; /* no return data */
    send->bits3.ud |= 1 << 12; /* SIMD8 mode */
    send->bits3.ud |= BRW_AOP_ADD << 8;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f80219e..4924441 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2295,7 +2295,8 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
 void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int reg_size = dispatch_width / 8;
+   int write_len = inst->regs_written() * reg_size;
    int first_write_grf = inst->dst.reg;
    bool needs_dep[BRW_MAX_MRF];
    assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2334,14 +2335,19 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
        * instruction but a MOV that might have left us an outstanding
        * dependency has more latency than a MOV.
        */
-      if (scan_inst->dst.file == GRF &&
-          scan_inst->dst.reg >= first_write_grf &&
-          scan_inst->dst.reg < first_write_grf + write_len &&
-          needs_dep[scan_inst->dst.reg - first_write_grf]) {
-         inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
-         needs_dep[scan_inst->dst.reg - first_write_grf] = false;
-         if (scan_inst_16wide)
-            needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false;
+      if (scan_inst->dst.file == GRF) {
+         for (int i = 0; i < scan_inst->regs_written(); i++) {
+            int reg = scan_inst->dst.reg + i * reg_size;
+
+            if (reg >= first_write_grf &&
+                reg < first_write_grf + write_len &&
+                needs_dep[reg - first_write_grf]) {
+               inst->insert_before(DEP_RESOLVE_MOV(reg));
+               needs_dep[reg - first_write_grf] = false;
+               if (scan_inst_16wide)
+                  needs_dep[reg - first_write_grf + 1] = false;
+            }
+         }
       }
 
       /* Clear the flag for registers that actually got read (as expected). */
@@ -2494,6 +2500,8 @@ fs_visitor::lower_uniform_pull_constant_loads()
          inst->insert_before(setup2);
          inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
          inst->src[1] = payload;
+
+         this->live_intervals_valid = false;
       } else {
          /* Before register allocation, we didn't tell the scheduler about the
           * MRF we use.  We know it's safe to use this MRF because nothing
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index db8f397..4c7991d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -190,6 +190,37 @@ fs_visitor::calculate_live_intervals()
 	    int reg = inst->src[i].reg;
 
 	    use[reg] = ip;
+
+            /* In most cases, a register can be written over safely by the
+             * same instruction that is its last use.  For a single
+             * instruction, the sources are dereferenced before writing of the
+             * destination starts (naturally).  This gets more complicated for
+             * simd16, because the instruction:
+             *
+             * mov(16)      g4<1>F      g4<8,8,1>F   g6<8,8,1>F
+             *
+             * is actually decoded in hardware as:
+             *
+             * mov(8)       g4<1>F      g4<8,8,1>F   g6<8,8,1>F
+             * mov(8)       g5<1>F      g5<8,8,1>F   g7<8,8,1>F
+             *
+             * Which is safe.  However, if we have uniform accesses
+             * happening, we get into trouble:
+             *
+             * mov(8)       g4<1>F      g4<0,1,0>F   g6<8,8,1>F
+             * mov(8)       g5<1>F      g4<0,1,0>F   g7<8,8,1>F
+             *
+             * Now our destination for the first instruction overwrote the
+             * second instruction's src0, and we get garbage for those 8
+             * pixels.  There's a similar issue for the pre-gen6
+             * pixel_x/pixel_y, which are registers of 16-bit values and thus
+             * would get stomped by the first decode as well.
+             */
+            if (dispatch_width == 16 && (inst->src[i].smear ||
+                                         (this->pixel_x.reg == reg ||
+                                          this->pixel_y.reg == reg))) {
+               use[reg]++;
+            }
 	 }
       }
 
@@ -264,28 +295,5 @@ fs_visitor::virtual_grf_interferes(int a, int b)
    int start = MAX2(a_def, b_def);
    int end = MIN2(a_use, b_use);
 
-   /* If the register is used to store 16 values of less than float
-    * size (only the case for pixel_[xy]), then we can't allocate
-    * another dword-sized thing to that register that would be used in
-    * the same instruction.  This is because when the GPU decodes (for
-    * example):
-    *
-    * (declare (in ) vec4 gl_FragCoord@0x97766a0)
-    * add(16)         g6<1>F          g6<8,8,1>UW     0.5F { align1 compr };
-    *
-    * it's actually processed as:
-    * add(8)         g6<1>F          g6<8,8,1>UW     0.5F { align1 };
-    * add(8)         g7<1>F          g6.8<8,8,1>UW   0.5F { align1 sechalf };
-    *
-    * so our second half values in g6 got overwritten in the first
-    * half.
-    */
-   if (dispatch_width == 16 && (this->pixel_x.reg == a ||
-				this->pixel_x.reg == b ||
-				this->pixel_y.reg == a ||
-				this->pixel_y.reg == b)) {
-      return start <= end;
-   }
-
    return start < end;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index ecc61c4..02ce57b 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -216,6 +216,8 @@ void gen7_set_surface_mcs_info(struct brw_context *brw,
                                bool is_render_target);
 void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
 void gen7_init_vtable_surface_functions(struct brw_context *brw);
+void gen7_create_shader_time_surface(struct brw_context *brw,
+                                     uint32_t *out_offset);
 
 /* brw_wm_sampler_state.c */
 uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 4da7eaa..2aefc0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -137,14 +137,11 @@ const struct brw_tracked_state brw_vs_ubo_surfaces = {
 static void
 brw_vs_upload_binding_table(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    uint32_t *bind;
    int i;
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-      intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0,
-                                          brw->shader_time.bo->size,
-                                          &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
+      gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
 
       assert(brw->vs.prog_data->num_surfaces <= SURF_INDEX_VS_SHADER_TIME);
       brw->vs.prog_data->num_surfaces = SURF_INDEX_VS_SHADER_TIME;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6ec7d71..657a56f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -770,7 +770,8 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
    case GL_RED:
    case GL_RG:
    case GL_RGB:
-      swizzles[3] = SWIZZLE_ONE;
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+         swizzles[3] = SWIZZLE_ONE;
       break;
    }
 
@@ -1468,14 +1469,11 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = {
 static void
 brw_upload_wm_binding_table(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    uint32_t *bind;
    int i;
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-      intel->vtbl.create_constant_surface(brw, brw->shader_time.bo, 0,
-                                          brw->shader_time.bo->size,
-                                          &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
+      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
    }
 
    /* Might want to calculate nr_surfaces first, to avoid taking up so much
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index d32f636..7ac5d5f 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -126,7 +126,7 @@ gen6_upload_blend_state(struct brw_context *brw)
           * not read the alpha channel, but will instead use the correct
           * implicit value for alpha.
           */
-         if (!_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE))
+         if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE))
          {
             srcRGB = brw_fix_xRGB_alpha(srcRGB);
             srcA = brw_fix_xRGB_alpha(srcA);
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 24f1b9c..2913fc6 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -413,6 +413,46 @@ gen7_create_constant_surface(struct brw_context *brw,
    gen7_check_surface_setup(surf, false /* is_render_target */);
 }
 
+/**
+ * Create a surface for shader time.
+ */
+void
+gen7_create_shader_time_surface(struct brw_context *brw, uint32_t *out_offset)
+{
+   struct intel_context *intel = &brw->intel;
+   const int w = brw->shader_time.bo->size - 1;
+
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+                                    8 * 4, 32, out_offset);
+   memset(surf, 0, 8 * 4);
+
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+             BRW_SURFACEFORMAT_RAW << BRW_SURFACE_FORMAT_SHIFT |
+             BRW_SURFACE_RC_READ_WRITE;
+
+   surf[1] = brw->shader_time.bo->offset; /* reloc */
+
+   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
+             SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH);
+
+   /* Unlike texture or renderbuffer surfaces, we only do untyped operations
+    * on the shader_time surface, so there's no need to set HSW channel
+    * overrides.
+    */
+
+   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
+    * bspec ("Data Cache") says that the data cache does not exist as
+    * a separate cache and is just the sampler cache.
+    */
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+                           *out_offset + 4,
+                           brw->shader_time.bo, 0,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
+
 static void
 gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
 {
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 5ec93f1..4173c0f 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -312,7 +312,7 @@ intel_create_image_from_name(__DRIscreen *screen,
        cpp = _mesa_get_format_bytes(image->format);
     image->region = intel_region_alloc_for_handle(intelScreen,
 						  cpp, width, height,
-						  pitch, name, "image");
+						  pitch * cpp, name, "image");
     if (image->region == NULL) {
        free(image);
        return NULL;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
index f56b3b2..6c119d5 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -69,7 +69,8 @@ nouveau_flush(struct gl_context *ctx)
 		__DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
 		__DRIdrawable *drawable = nctx->dri_context->driDrawablePriv;
 
-		dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
+		if (drawable && drawable->loaderPrivate)
+			dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
 	}
 }
 
diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c
index 7eda4e0..4ffc4ef 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -469,7 +469,7 @@ nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visua
 		goto fail;
 
 	/* 3D engine. */
-	if (context_chipset(ctx) >= 0x17)
+	if (context_chipset(ctx) >= 0x17 && context_chipset(ctx) != 0x1a)
 		celsius_class = NV17_3D_CLASS;
 	else if (context_chipset(ctx) >= 0x11)
 		celsius_class = NV15_3D_CLASS;
diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am
index c4b178b..2503401 100644
--- a/src/mesa/drivers/osmesa/Makefile.am
+++ b/src/mesa/drivers/osmesa/Makefile.am
@@ -24,6 +24,7 @@
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/mapi \
+	-I$(top_builddir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
 	$(DEFINES) \
 	$(API_DEFINES)
diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index b22b994..8b23665 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -48,6 +48,8 @@
 #include <float.h>
 #include <stdarg.h>
 
+#include "c99_compat.h" /* inline, __func__, etc. */
+
 
 #ifdef __cplusplus
 extern "C" {
@@ -111,30 +113,7 @@ extern "C" {
 
 
 
-/**
- * Function inlining
- */
-#ifndef inline
-#  ifdef __cplusplus
-     /* C++ supports inline keyword */
-#  elif defined(__GNUC__)
-#    define inline __inline__
-#  elif defined(_MSC_VER)
-#    define inline __inline
-#  elif defined(__ICL)
-#    define inline __inline
-#  elif defined(__INTEL_COMPILER)
-     /* Intel compiler supports inline keyword */
-#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#    define inline __inline
-#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-     /* C99 supports inline keyword */
-#  elif (__STDC_VERSION__ >= 199901L)
-     /* C99 supports inline keyword */
-#  else
-#    define inline
-#  endif
-#endif
+/* XXX: Use standard `inline` keyword instead */
 #ifndef INLINE
 #  define INLINE inline
 #endif
@@ -177,35 +156,10 @@ extern "C" {
 #  endif
 #endif
 
-/**
- * The __FUNCTION__ gcc variable is generally only used for debugging.
- * If we're not using gcc, define __FUNCTION__ as a cpp symbol here.
- * Don't define it if using a newer Windows compiler.
- */
+/* XXX: Use standard `__func__` instead */
 #ifndef __FUNCTION__
-# if !defined(__GNUC__) && !defined(__xlC__) &&	\
-      (!defined(_MSC_VER) || _MSC_VER < 1300)
-#  if (__STDC_VERSION__ >= 199901L) /* C99 */ || \
-    (defined(__SUNPRO_C) && defined(__C99FEATURES__))
-#   define __FUNCTION__ __func__
-#  else
-#   define __FUNCTION__ "<unknown>"
-#  endif
-# endif
+#  define __FUNCTION__ __func__
 #endif
-#ifndef __func__
-#  if (__STDC_VERSION__ >= 199901L) || \
-      (defined(__SUNPRO_C) && defined(__C99FEATURES__))
-       /* __func__ is part of C99 */
-#  elif defined(_MSC_VER)
-#    if _MSC_VER >= 1300
-#      define __func__ __FUNCTION__
-#    else
-#      define __func__ "<unknown>"
-#    endif
-#  endif
-#endif
-
 
 /**
  * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32.
@@ -353,8 +307,9 @@ static INLINE GLuint CPU_TO_LE32(GLuint x)
  * USE_IEEE: Determine if we're using IEEE floating point
  */
 #if defined(__i386__) || defined(__386__) || defined(__sparc__) || \
-    defined(__s390x__) || defined(__powerpc__) || \
+    defined(__s390__) || defined(__s390x__) || defined(__powerpc__) || \
     defined(__x86_64__) || \
+    defined(__m68k__) || \
     defined(ia64) || defined(__ia64__) || \
     defined(__hppa__) || defined(hpux) || \
     defined(__mips) || defined(_MIPS_ARCH) || \
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 257f839..61c1151 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3160,7 +3160,9 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
       }
    }
 
-   if (!mask) {
+   if (!mask ||
+       (srcX1 - srcX0) == 0 || (srcY1 - srcY0) == 0 ||
+       (dstX1 - dstX0) == 0 || (dstY1 - dstY0) == 0) {
       return;
    }
 
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 5f4e2fa..6fb2f5d 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -34,6 +34,7 @@
 #include "state.h"
 #include "texcompress.h"
 #include "framebuffer.h"
+#include "samplerobj.h"
 
 /* This is a table driven implemetation of the glGet*v() functions.
  * The basic idea is that most getters just look up an int somewhere
@@ -823,7 +824,16 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
       {
          struct gl_sampler_object *samp =
             ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler;
-         v->value_int = samp ? samp->Name : 0;
+
+         /*
+          * The sampler object may have been deleted on another context,
+          * so we try to lookup the sampler object before returning its Name.
+          */
+         if (samp && _mesa_lookup_samplerobj(ctx, samp->Name)) {
+            v->value_int = samp->Name;
+         } else {
+            v->value_int = 0;
+         }
       }
       break;
    /* GL_ARB_uniform_buffer_object */
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 9aab889..15c1c4d 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -412,7 +412,7 @@ descriptor=[
   [ "DEPTH_SCALE", "CONTEXT_FLOAT(Pixel.DepthScale), NO_EXTRA" ],
   [ "DOUBLEBUFFER", "BUFFER_INT(Visual.doubleBufferMode), NO_EXTRA" ],
   [ "DRAW_BUFFER", "BUFFER_ENUM(ColorDrawBuffer[0]), NO_EXTRA" ],
-  [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ],
+  [ "EDGE_FLAG", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_flush_current" ],
   [ "FEEDBACK_BUFFER_SIZE", "CONTEXT_INT(Feedback.BufferSize), NO_EXTRA" ],
   [ "FEEDBACK_BUFFER_TYPE", "CONTEXT_ENUM(Feedback.Type), NO_EXTRA" ],
   [ "FOG_INDEX", "CONTEXT_FLOAT(Fog.Index), NO_EXTRA" ],
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 3369623..8f906ae 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1274,6 +1274,7 @@ struct gl_texture_object
    GLfloat Priority;		/**< in [0,1] */
    GLint BaseLevel;		/**< min mipmap level, OpenGL 1.2 */
    GLint MaxLevel;		/**< max mipmap level, OpenGL 1.2 */
+   GLint ImmutableLevels;       /**< ES 3.0 / ARB_texture_view */
    GLint _MaxLevel;		/**< actual max mipmap level (q in the spec) */
    GLfloat _MaxLambda;		/**< = _MaxLevel - BaseLevel (q - b in spec) */
    GLint CropRect[4];           /**< GL_OES_draw_texture */
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index 319a444..5cff329 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -40,7 +40,7 @@
 #include "main/samplerobj.h"
 
 
-static struct gl_sampler_object *
+struct gl_sampler_object *
 _mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name)
 {
    if (name == 0)
@@ -206,9 +206,19 @@ _mesa_DeleteSamplers(GLsizei count, const GLuint *samplers)
 
    for (i = 0; i < count; i++) {
       if (samplers[i]) {
+         GLuint j;
          struct gl_sampler_object *sampObj =
             _mesa_lookup_samplerobj(ctx, samplers[i]);
+   
          if (sampObj) {
+            /* If the sampler is currently bound, unbind it. */
+            for (j = 0; j < ctx->Const.MaxCombinedTextureImageUnits; j++) {
+               if (ctx->Texture.Unit[j].Sampler == sampObj) {
+                  FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+                  _mesa_reference_sampler_object(ctx, &ctx->Texture.Unit[j].Sampler, NULL);
+               }
+            }
+
             /* The ID is immediately freed for re-use */
             _mesa_HashRemove(ctx->Shared->SamplerObjects, samplers[i]);
             /* But the object exists until its reference count goes to zero */
diff --git a/src/mesa/main/samplerobj.h b/src/mesa/main/samplerobj.h
index 3114257..69e3899 100644
--- a/src/mesa/main/samplerobj.h
+++ b/src/mesa/main/samplerobj.h
@@ -62,6 +62,8 @@ _mesa_reference_sampler_object(struct gl_context *ctx,
       _mesa_reference_sampler_object_(ctx, ptr, samp);
 }
 
+extern struct gl_sampler_object *
+_mesa_lookup_samplerobj(struct gl_context *ctx, GLuint name);
 
 extern struct gl_sampler_object *
 _mesa_new_sampler_object(struct gl_context *ctx, GLuint name);
diff --git a/src/mesa/main/tests/hash_table/Makefile.am b/src/mesa/main/tests/hash_table/Makefile.am
index 272c63a..f63841d 100644
--- a/src/mesa/main/tests/hash_table/Makefile.am
+++ b/src/mesa/main/tests/hash_table/Makefile.am
@@ -19,6 +19,7 @@
 #  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 AM_CPPFLAGS = \
+	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/mesa/main \
 	$(API_DEFINES) $(DEFINES) $(INCLUDE_DIRS)
 
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 7299a4b..74b09ef 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -518,6 +518,7 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
    if (type_needs_clamping(type)) {
       /* the returned image type can't have negative values */
       if (dataType == GL_FLOAT ||
+          dataType == GL_HALF_FLOAT ||
           dataType == GL_SIGNED_NORMALIZED ||
           format == GL_LUMINANCE ||
           format == GL_LUMINANCE_ALPHA) {
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 1b9525b..1b91b89 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1362,6 +1362,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target,
          return GL_FALSE;
       return GL_TRUE;
 
+   case GL_TEXTURE_CUBE_MAP:
    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
@@ -3438,19 +3439,21 @@ copyteximage(struct gl_context *ctx, GLuint dims,
          _mesa_init_teximage_fields(ctx, texImage, width, height, 1,
                                     border, internalFormat, texFormat);
 
-         /* Allocate texture memory (no pixel data yet) */
-         ctx->Driver.AllocTextureImageBuffer(ctx, texImage);
+         if (width && height) {
+            /* Allocate texture memory (no pixel data yet) */
+            ctx->Driver.AllocTextureImageBuffer(ctx, texImage);
 
-         if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
-                                        &width, &height)) {
-            struct gl_renderbuffer *srcRb =
-               get_copy_tex_image_source(ctx, texImage->TexFormat);
+            if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+                                           &width, &height)) {
+               struct gl_renderbuffer *srcRb =
+                  get_copy_tex_image_source(ctx, texImage->TexFormat);
 
-            ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ,
-                                        srcRb, srcX, srcY, width, height);
-         }
+               ctx->Driver.CopyTexSubImage(ctx, dims, texImage, dstX, dstY, dstZ,
+                                           srcRb, srcX, srcY, width, height);
+            }
 
-         check_gen_mipmap(ctx, target, texObj, level);
+            check_gen_mipmap(ctx, target, texObj, level);
+         }
 
          _mesa_update_fbo_texture(ctx, texObj, face, level);
 
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 6f18ec6..dd67baa 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = (GLfloat) obj->Immutable;
          break;
 
+      case GL_TEXTURE_IMMUTABLE_LEVELS:
+         if (!_mesa_is_gles3(ctx))
+            goto invalid_pname;
+         *params = (GLfloat) obj->ImmutableLevels;
+         break;
+
       case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
          if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
             goto invalid_pname;
@@ -1609,6 +1615,12 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          *params = (GLint) obj->Immutable;
          break;
 
+      case GL_TEXTURE_IMMUTABLE_LEVELS:
+         if (!_mesa_is_gles3(ctx))
+            goto invalid_pname;
+         *params = obj->ImmutableLevels;
+         break;
+
       case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
          if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
             goto invalid_pname;
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 00f19ba..675fd74 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -397,6 +397,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
       }
 
       texObj->Immutable = GL_TRUE;
+      texObj->ImmutableLevels = levels;
    }
 }
 
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index efb386e..f5b5c41 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -253,7 +253,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
    struct vbo_save_context *save = &vbo_context(ctx)->save;
    GLboolean remap_vertex_store = GL_FALSE;
 
-   if (save->vertex_store->buffer) {
+   if (save->vertex_store && save->vertex_store->buffer) {
       /* The vertex store is currently mapped but we're about to replay
        * a display list.  This can happen when a nested display list is
        * being build with GL_COMPILE_AND_EXECUTE.