1975 lines
76 KiB
Diff
1975 lines
76 KiB
Diff
diff --git a/bin/get-pick-list.sh b/bin/get-pick-list.sh
|
||
index a141afe..d3ac511 100755
|
||
--- a/bin/get-pick-list.sh
|
||
+++ b/bin/get-pick-list.sh
|
||
@@ -8,7 +8,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
|
||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||
|
||
# Grep for commits that were marked as a candidate for the stable tree.
|
||
-git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: This is a candidate' HEAD..origin/master |\
|
||
+git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: .*[Cc]andidate' HEAD..origin/master |\
|
||
while read sha
|
||
do
|
||
# Check to see whether the patch is on the ignore list.
|
||
diff --git a/common.py b/common.py
|
||
index 6ff9608..1d618e6 100644
|
||
--- a/common.py
|
||
+++ b/common.py
|
||
@@ -100,4 +100,4 @@ def AddOptions(opts):
|
||
opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
|
||
opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no'))
|
||
if host_platform == 'windows':
|
||
- opts.Add(EnumOption('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
|
||
+ opts.Add(EnumOption('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0', '10.0', '11.0')))
|
||
diff --git a/configure.ac b/configure.ac
|
||
index 5701f8a..d75cf65 100644
|
||
--- a/configure.ac
|
||
+++ b/configure.ac
|
||
@@ -1682,6 +1682,9 @@ if test "x$enable_gallium_llvm" = xyes; then
|
||
if $LLVM_CONFIG --components | grep -q '\<mcjit\>'; then
|
||
LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
|
||
fi
|
||
+ if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
|
||
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
|
||
+ fi
|
||
|
||
if test "x$enable_opencl" = xyes; then
|
||
LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
|
||
diff --git a/docs/index.html b/docs/index.html
|
||
index 5c92204..5d7229d 100644
|
||
--- a/docs/index.html
|
||
+++ b/docs/index.html
|
||
@@ -16,6 +16,23 @@
|
||
|
||
<h1>News</h1>
|
||
|
||
+<h2>February 22, 2013</h2>
|
||
+
|
||
+<p>
|
||
+<a href="relnotes-9.1.html">Mesa 9.1</a> is released.
|
||
+This is a new development release.
|
||
+See the release notes for more information about the release.
|
||
+</p>
|
||
+
|
||
+
|
||
+<h2>February 21, 2013</h2>
|
||
+
|
||
+<p>
|
||
+<a href="relnotes-9.0.3.html">Mesa 9.0.3</a> is released.
|
||
+This is a bug fix release.
|
||
+</p>
|
||
+
|
||
+
|
||
<h2>January 22, 2013</h2>
|
||
|
||
<p>
|
||
diff --git a/docs/relnotes-9.1.html b/docs/relnotes-9.1.html
|
||
index 24ba9f9..8232ab8 100644
|
||
--- a/docs/relnotes-9.1.html
|
||
+++ b/docs/relnotes-9.1.html
|
||
@@ -14,7 +14,7 @@
|
||
<iframe src="contents.html"></iframe>
|
||
<div class="content">
|
||
|
||
-<h1>Mesa 9.1 Release Notes / date February 22, 2013</h1>
|
||
+<h1>Mesa 9.1 Release Notes / February 22, 2013</h1>
|
||
|
||
<p>
|
||
Mesa 9.1 is a new development release.
|
||
@@ -33,7 +33,9 @@ because GL_ARB_compatibility is not supported.
|
||
|
||
<h2>MD5 checksums</h2>
|
||
<pre>
|
||
-tbd
|
||
+86d40f3056f89949368764bf84aff55e MesaLib-9.1.tar.gz
|
||
+d3891e02215422e120271d976ff1947e MesaLib-9.1.tar.bz2
|
||
+01645f28f53351c23b0beb6c688911d8 MesaLib-9.1.zip
|
||
</pre>
|
||
|
||
|
||
diff --git a/docs/relnotes.html b/docs/relnotes.html
|
||
index e373091..2e11bc4 100644
|
||
--- a/docs/relnotes.html
|
||
+++ b/docs/relnotes.html
|
||
@@ -22,6 +22,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
||
|
||
<ul>
|
||
<li><a href="relnotes-9.1.html">9.1 release notes</a>
|
||
+<li><a href="relnotes-9.0.3.html">9.0.3 release notes</a>
|
||
<li><a href="relnotes-9.0.2.html">9.0.2 release notes</a>
|
||
<li><a href="relnotes-9.0.1.html">9.0.1 release notes</a>
|
||
<li><a href="relnotes-9.0.html">9.0 release notes</a>
|
||
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
|
||
index 09dca5b..1e388f8 100644
|
||
--- a/include/pci_ids/i965_pci_ids.h
|
||
+++ b/include/pci_ids/i965_pci_ids.h
|
||
@@ -53,12 +53,12 @@ CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2)
|
||
CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1)
|
||
CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2)
|
||
CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2)
|
||
-CHIPSET(0x0D12, HASWELL_CRW_GT1, hsw_gt1)
|
||
-CHIPSET(0x0D22, HASWELL_CRW_GT2, hsw_gt2)
|
||
-CHIPSET(0x0D32, HASWELL_CRW_GT2_PLUS, hsw_gt2)
|
||
-CHIPSET(0x0D16, HASWELL_CRW_M_GT1, hsw_gt1)
|
||
-CHIPSET(0x0D26, HASWELL_CRW_M_GT2, hsw_gt2)
|
||
-CHIPSET(0x0D36, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
|
||
-CHIPSET(0x0D1A, HASWELL_CRW_S_GT1, hsw_gt1)
|
||
-CHIPSET(0x0D2A, HASWELL_CRW_S_GT2, hsw_gt2)
|
||
-CHIPSET(0x0D3A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
|
||
+CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1)
|
||
+CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2)
|
||
+CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2)
|
||
+CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1)
|
||
+CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2)
|
||
+CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
|
||
+CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1)
|
||
+CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2)
|
||
+CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
|
||
diff --git a/include/pci_ids/r600_pci_ids.h b/include/pci_ids/r600_pci_ids.h
|
||
index 7ceb820..9c9bab2 100644
|
||
--- a/include/pci_ids/r600_pci_ids.h
|
||
+++ b/include/pci_ids/r600_pci_ids.h
|
||
@@ -298,6 +298,10 @@ CHIPSET(0x9907, ARUBA_9907, ARUBA)
|
||
CHIPSET(0x9908, ARUBA_9908, ARUBA)
|
||
CHIPSET(0x9909, ARUBA_9909, ARUBA)
|
||
CHIPSET(0x990A, ARUBA_990A, ARUBA)
|
||
+CHIPSET(0x990B, ARUBA_990B, ARUBA)
|
||
+CHIPSET(0x990C, ARUBA_990C, ARUBA)
|
||
+CHIPSET(0x990D, ARUBA_990D, ARUBA)
|
||
+CHIPSET(0x990E, ARUBA_990E, ARUBA)
|
||
CHIPSET(0x990F, ARUBA_990F, ARUBA)
|
||
CHIPSET(0x9910, ARUBA_9910, ARUBA)
|
||
CHIPSET(0x9913, ARUBA_9913, ARUBA)
|
||
@@ -309,6 +313,13 @@ CHIPSET(0x9991, ARUBA_9991, ARUBA)
|
||
CHIPSET(0x9992, ARUBA_9992, ARUBA)
|
||
CHIPSET(0x9993, ARUBA_9993, ARUBA)
|
||
CHIPSET(0x9994, ARUBA_9994, ARUBA)
|
||
+CHIPSET(0x9995, ARUBA_9995, ARUBA)
|
||
+CHIPSET(0x9996, ARUBA_9996, ARUBA)
|
||
+CHIPSET(0x9997, ARUBA_9997, ARUBA)
|
||
+CHIPSET(0x9998, ARUBA_9998, ARUBA)
|
||
+CHIPSET(0x9999, ARUBA_9999, ARUBA)
|
||
+CHIPSET(0x999A, ARUBA_999A, ARUBA)
|
||
+CHIPSET(0x999B, ARUBA_999B, ARUBA)
|
||
CHIPSET(0x99A0, ARUBA_99A0, ARUBA)
|
||
CHIPSET(0x99A2, ARUBA_99A2, ARUBA)
|
||
CHIPSET(0x99A4, ARUBA_99A4, ARUBA)
|
||
diff --git a/scons/gallium.py b/scons/gallium.py
|
||
index 4b51b6e..b28be5d 100755
|
||
--- a/scons/gallium.py
|
||
+++ b/scons/gallium.py
|
||
@@ -289,6 +289,7 @@ def generate(env):
|
||
'_CRT_SECURE_NO_DEPRECATE',
|
||
'_SCL_SECURE_NO_WARNINGS',
|
||
'_SCL_SECURE_NO_DEPRECATE',
|
||
+ '_ALLOW_KEYWORD_MACROS',
|
||
]
|
||
if env['build'] in ('debug', 'checked'):
|
||
cppdefines += ['_DEBUG']
|
||
@@ -401,6 +402,8 @@ def generate(env):
|
||
'/Oi', # enable intrinsic functions
|
||
]
|
||
else:
|
||
+ if distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('11.0'):
|
||
+ print 'scons: warning: Visual Studio versions prior to 2012 are known to produce incorrect code when optimizations are enabled ( https://bugs.freedesktop.org/show_bug.cgi?id=58718 )'
|
||
ccflags += [
|
||
'/O2', # optimize for speed
|
||
]
|
||
diff --git a/scons/llvm.py b/scons/llvm.py
|
||
index e1ed760..7f00c6c 100644
|
||
--- a/scons/llvm.py
|
||
+++ b/scons/llvm.py
|
||
@@ -92,7 +92,19 @@ def generate(env):
|
||
'HAVE_STDINT_H',
|
||
])
|
||
env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
|
||
- if llvm_version >= distutils.version.LooseVersion('3.0'):
|
||
+ if llvm_version >= distutils.version.LooseVersion('3.2'):
|
||
+ # 3.2
|
||
+ env.Prepend(LIBS = [
|
||
+ 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
|
||
+ 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
|
||
+ 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
|
||
+ 'LLVMX86Utils', 'LLVMX86Info', 'LLVMJIT',
|
||
+ 'LLVMExecutionEngine', 'LLVMCodeGen', 'LLVMScalarOpts',
|
||
+ 'LLVMInstCombine', 'LLVMTransformUtils', 'LLVMipa',
|
||
+ 'LLVMAnalysis', 'LLVMTarget', 'LLVMMC', 'LLVMCore',
|
||
+ 'LLVMSupport', 'LLVMRuntimeDyld', 'LLVMObject'
|
||
+ ])
|
||
+ elif llvm_version >= distutils.version.LooseVersion('3.0'):
|
||
# 3.0
|
||
env.Prepend(LIBS = [
|
||
'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
|
||
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
|
||
index 351fbf4..e17d5be 100644
|
||
--- a/src/egl/drivers/dri2/egl_dri2.c
|
||
+++ b/src/egl/drivers/dri2/egl_dri2.c
|
||
@@ -195,7 +195,14 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
|
||
for (i = 0; attr_list[i] != EGL_NONE; i += 2)
|
||
_eglSetConfigKey(&base, attr_list[i], attr_list[i+1]);
|
||
|
||
- if (depth > 0 && depth != base.BufferSize)
|
||
+ /* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig. Otherwise
|
||
+ * it will only match a 32-bit RGBA visual. On a composited window manager
|
||
+ * on X11, this will make all of the EGLConfigs with destination alpha get
|
||
+ * blended by the compositor. This is probably not what the application
|
||
+ * wants... especially on drivers that only have 32-bit RGBA EGLConfigs!
|
||
+ */
|
||
+ if (depth > 0 && depth != base.BufferSize
|
||
+ && !(depth == 24 && base.BufferSize == 32))
|
||
return NULL;
|
||
|
||
if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks)))
|
||
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
|
||
index 7b879c4..3110809 100644
|
||
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
|
||
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
|
||
@@ -167,12 +167,17 @@ static void interp( const struct clip_stage *clip,
|
||
{
|
||
int k;
|
||
t_nopersp = t;
|
||
- for (k = 0; k < 2; k++)
|
||
+ /* find either in.x != out.x or in.y != out.y */
|
||
+ for (k = 0; k < 2; k++) {
|
||
if (in->clip[k] != out->clip[k]) {
|
||
- t_nopersp = (dst->clip[k] - out->clip[k]) /
|
||
- (in->clip[k] - out->clip[k]);
|
||
+ /* do divide by W, then compute linear interpolation factor */
|
||
+ float in_coord = in->clip[k] / in->clip[3];
|
||
+ float out_coord = out->clip[k] / out->clip[3];
|
||
+ float dst_coord = dst->clip[k] / dst->clip[3];
|
||
+ t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
|
||
break;
|
||
}
|
||
+ }
|
||
}
|
||
|
||
/* Other attributes
|
||
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
|
||
index 3da52b1..3578525 100644
|
||
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
|
||
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
|
||
@@ -127,10 +127,44 @@ static void offset_first_tri( struct draw_stage *stage,
|
||
struct prim_header *header )
|
||
{
|
||
struct offset_stage *offset = offset_stage(stage);
|
||
+ const struct pipe_rasterizer_state *rast = stage->draw->rasterizer;
|
||
+ unsigned fill_mode = rast->fill_front;
|
||
+ boolean do_offset;
|
||
+
|
||
+ if (rast->fill_back != rast->fill_front) {
|
||
+ /* Need to check for back-facing triangle */
|
||
+ boolean ccw = header->det < 0.0f;
|
||
+ if (ccw != rast->front_ccw)
|
||
+ fill_mode = rast->fill_back;
|
||
+ }
|
||
+
|
||
+ /* Now determine if we need to do offsetting for the point/line/fill mode */
|
||
+ switch (fill_mode) {
|
||
+ case PIPE_POLYGON_MODE_FILL:
|
||
+ do_offset = rast->offset_tri;
|
||
+ break;
|
||
+ case PIPE_POLYGON_MODE_LINE:
|
||
+ do_offset = rast->offset_line;
|
||
+ break;
|
||
+ case PIPE_POLYGON_MODE_POINT:
|
||
+ do_offset = rast->offset_point;
|
||
+ break;
|
||
+ default:
|
||
+ assert(!"invalid fill_mode in offset_first_tri()");
|
||
+ do_offset = rast->offset_tri;
|
||
+ }
|
||
+
|
||
+ if (do_offset) {
|
||
+ offset->scale = rast->offset_scale;
|
||
+ offset->clamp = rast->offset_clamp;
|
||
+ offset->units = (float) (rast->offset_units * stage->draw->mrd);
|
||
+ }
|
||
+ else {
|
||
+ offset->scale = 0.0f;
|
||
+ offset->clamp = 0.0f;
|
||
+ offset->units = 0.0f;
|
||
+ }
|
||
|
||
- offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd);
|
||
- offset->scale = stage->draw->rasterizer->offset_scale;
|
||
- offset->clamp = stage->draw->rasterizer->offset_clamp;
|
||
|
||
stage->tri = offset_tri;
|
||
stage->tri( stage, header );
|
||
diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
|
||
new file mode 100644
|
||
index 0000000..4b1d0d1
|
||
--- /dev/null
|
||
+++ b/src/gallium/auxiliary/util/u_range.h
|
||
@@ -0,0 +1,89 @@
|
||
+/*
|
||
+ * Copyright 2013 Marek Olšák <maraeo@gmail.com>
|
||
+ *
|
||
+ * Permission is hereby granted, free of charge, to any person obtaining a
|
||
+ * copy of this software and associated documentation files (the "Software"),
|
||
+ * to deal in the Software without restriction, including without limitation
|
||
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
|
||
+ * license, and/or sell copies of the Software, and to permit persons to whom
|
||
+ * the Software is furnished to do so, subject to the following conditions:
|
||
+ *
|
||
+ * The above copyright notice and this permission notice (including the next
|
||
+ * paragraph) shall be included in all copies or substantial portions of the
|
||
+ * Software.
|
||
+ *
|
||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||
+
|
||
+/**
|
||
+ * @file
|
||
+ * 1D integer range, capable of the union and intersection operations.
|
||
+ *
|
||
+ * It only maintains a single interval which is extended when the union is
|
||
+ * done. This implementation is partially thread-safe (readers are not
|
||
+ * protected by a lock).
|
||
+ *
|
||
+ * @author Marek Olšák
|
||
+ */
|
||
+
|
||
+#ifndef U_RANGE_H
|
||
+#define U_RANGE_H
|
||
+
|
||
+#include "os/os_thread.h"
|
||
+
|
||
+struct util_range {
|
||
+ unsigned start; /* inclusive */
|
||
+ unsigned end; /* exclusive */
|
||
+
|
||
+ /* for the range to be consistent with multiple contexts: */
|
||
+ pipe_mutex write_mutex;
|
||
+};
|
||
+
|
||
+
|
||
+static INLINE void
|
||
+util_range_set_empty(struct util_range *range)
|
||
+{
|
||
+ range->start = ~0;
|
||
+ range->end = 0;
|
||
+}
|
||
+
|
||
+/* This is like a union of two sets. */
|
||
+static INLINE void
|
||
+util_range_add(struct util_range *range, unsigned start, unsigned end)
|
||
+{
|
||
+ if (start < range->start || end > range->end) {
|
||
+ pipe_mutex_lock(range->write_mutex);
|
||
+ range->start = MIN2(start, range->start);
|
||
+ range->end = MAX2(end, range->end);
|
||
+ pipe_mutex_unlock(range->write_mutex);
|
||
+ }
|
||
+}
|
||
+
|
||
+static INLINE boolean
|
||
+util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
|
||
+{
|
||
+ return MAX2(start, range->start) < MIN2(end, range->end);
|
||
+}
|
||
+
|
||
+
|
||
+/* Init/deinit */
|
||
+
|
||
+static INLINE void
|
||
+util_range_init(struct util_range *range)
|
||
+{
|
||
+ pipe_mutex_init(range->write_mutex);
|
||
+ util_range_set_empty(range);
|
||
+}
|
||
+
|
||
+static INLINE void
|
||
+util_range_destroy(struct util_range *range)
|
||
+{
|
||
+ pipe_mutex_destroy(range->write_mutex);
|
||
+}
|
||
+
|
||
+#endif
|
||
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
|
||
index 40ccaf6..ca8df71 100644
|
||
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
|
||
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
|
||
@@ -46,6 +46,10 @@ clear_flags(struct pipe_rasterizer_state *rast)
|
||
{
|
||
rast->light_twoside = 0;
|
||
rast->offset_tri = 0;
|
||
+ rast->offset_line = 0;
|
||
+ rast->offset_point = 0;
|
||
+ rast->offset_units = 0.0f;
|
||
+ rast->offset_scale = 0.0f;
|
||
}
|
||
|
||
|
||
@@ -74,6 +78,8 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
|
||
*/
|
||
need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
|
||
rast->fill_back != PIPE_POLYGON_MODE_FILL ||
|
||
+ rast->offset_point ||
|
||
+ rast->offset_line ||
|
||
rast->point_smooth ||
|
||
rast->line_smooth ||
|
||
rast->line_stipple_enable ||
|
||
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
|
||
index 2e9c6bf..f17a04a 100644
|
||
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
|
||
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
|
||
@@ -295,7 +295,9 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
|
||
/* assert(lpr->base.bind); */
|
||
|
||
if (resource_is_texture(&lpr->base)) {
|
||
- if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) {
|
||
+ if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
|
||
+ PIPE_BIND_SCANOUT |
|
||
+ PIPE_BIND_SHARED)) {
|
||
/* displayable surface */
|
||
if (!llvmpipe_displaytarget_layout(screen, lpr))
|
||
goto fail;
|
||
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
|
||
index bb47530..bb43353 100644
|
||
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
|
||
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
|
||
@@ -283,4 +283,7 @@ void evergreen_dma_copy(struct r600_context *rctx,
|
||
src_offset += csize << shift;
|
||
size -= csize;
|
||
}
|
||
+
|
||
+ util_range_add(&rdst->valid_buffer_range, dst_offset,
|
||
+ dst_offset + size);
|
||
}
|
||
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
|
||
index 389ad3c..804c037 100644
|
||
--- a/src/gallium/drivers/r600/evergreen_state.c
|
||
+++ b/src/gallium/drivers/r600/evergreen_state.c
|
||
@@ -808,6 +808,7 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
|
||
dsa->valuemask[1] = state->stencil[1].valuemask;
|
||
dsa->writemask[0] = state->stencil[0].writemask;
|
||
dsa->writemask[1] = state->stencil[1].writemask;
|
||
+ dsa->zwritemask = state->depth.writemask;
|
||
|
||
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
|
||
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
|
||
@@ -1321,6 +1322,10 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
|
||
* elements. */
|
||
surf->cb_color_dim = pipe_buffer->width0;
|
||
|
||
+ /* Set the buffer range the GPU will have access to: */
|
||
+ util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
|
||
+ 0, pipe_buffer->width0);
|
||
+
|
||
surf->cb_color_cmask = surf->cb_color_base;
|
||
surf->cb_color_cmask_slice = 0;
|
||
surf->cb_color_fmask = surf->cb_color_base;
|
||
@@ -1405,10 +1410,15 @@ void evergreen_init_color_surface(struct r600_context *rctx,
|
||
S_028C74_NON_DISP_TILING_ORDER(non_disp_tiling) |
|
||
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||
|
||
- if (rctx->chip_class == CAYMAN && rtex->resource.b.b.nr_samples > 1) {
|
||
- unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
|
||
- color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
|
||
- S_028C74_NUM_FRAGMENTS(log_samples);
|
||
+ if (rctx->chip_class == CAYMAN) {
|
||
+ color_attrib |= S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] ==
|
||
+ UTIL_FORMAT_SWIZZLE_1);
|
||
+
|
||
+ if (rtex->resource.b.b.nr_samples > 1) {
|
||
+ unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
|
||
+ color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
|
||
+ S_028C74_NUM_FRAGMENTS(log_samples);
|
||
+ }
|
||
}
|
||
|
||
ntype = V_028C70_NUMBER_UNORM;
|
||
@@ -1647,6 +1657,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
|
||
}
|
||
if (rctx->framebuffer.state.zsbuf) {
|
||
rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
|
||
+
|
||
+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
|
||
+ if (rtex->htile) {
|
||
+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META;
|
||
+ }
|
||
}
|
||
|
||
util_copy_framebuffer_state(&rctx->framebuffer.state, state);
|
||
@@ -2222,7 +2237,14 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
|
||
}
|
||
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
|
||
}
|
||
- if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled) {
|
||
+ /* FIXME we should be able to use hyperz even if we are not writing to
|
||
+ * zbuffer but somehow this trigger GPU lockup. See :
|
||
+ *
|
||
+ * https://bugs.freedesktop.org/show_bug.cgi?id=60848
|
||
+ *
|
||
+ * Disable hyperz for now if not writing to zbuffer.
|
||
+ */
|
||
+ if (rctx->db_state.rsurf && rctx->db_state.rsurf->htile_enabled && rctx->zwritemask) {
|
||
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL */
|
||
db_render_override |= S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
|
||
/* This is to fix a lockup when hyperz and alpha test are enabled at
|
||
@@ -3654,6 +3676,17 @@ boolean evergreen_dma_blit(struct pipe_context *ctx,
|
||
return FALSE;
|
||
}
|
||
|
||
+ /* 128 bpp surfaces require non_disp_tiling for both
|
||
+ * tiled and linear buffers on cayman. However, async
|
||
+ * DMA only supports it on the tiled side. As such
|
||
+ * the tile order is backwards after a L2T/T2L packet.
|
||
+ */
|
||
+ if ((rctx->chip_class == CAYMAN) &&
|
||
+ (src_mode != dst_mode) &&
|
||
+ (util_format_get_blocksize(src->format) >= 16)) {
|
||
+ return FALSE;
|
||
+ }
|
||
+
|
||
if (src_mode == dst_mode) {
|
||
uint64_t dst_offset, src_offset;
|
||
/* simple dma blit would do NOTE code here assume :
|
||
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
|
||
index 11dbb3b..0115293 100644
|
||
--- a/src/gallium/drivers/r600/r600.h
|
||
+++ b/src/gallium/drivers/r600/r600.h
|
||
@@ -28,6 +28,7 @@
|
||
|
||
#include "../../winsys/radeon/drm/radeon_winsys.h"
|
||
#include "util/u_double_list.h"
|
||
+#include "util/u_range.h"
|
||
#include "util/u_transfer.h"
|
||
|
||
#define R600_ERR(fmt, args...) \
|
||
@@ -50,6 +51,16 @@ struct r600_resource {
|
||
|
||
/* Resource state. */
|
||
unsigned domains;
|
||
+
|
||
+ /* The buffer range which is initialized (with a write transfer,
|
||
+ * streamout, DMA, or as a random access target). The rest of
|
||
+ * the buffer is considered invalid and can be mapped unsynchronized.
|
||
+ *
|
||
+ * This allows unsychronized mapping of a buffer range which hasn't
|
||
+ * been used yet. It's for applications which forget to use
|
||
+ * the unsynchronized map flag and expect the driver to figure it out.
|
||
+ */
|
||
+ struct util_range valid_buffer_range;
|
||
};
|
||
|
||
#define R600_BLOCK_MAX_BO 32
|
||
@@ -152,6 +163,7 @@ struct r600_so_target {
|
||
#define R600_CONTEXT_FLUSH_AND_INV (1 << 4)
|
||
#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 5)
|
||
#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 6)
|
||
+#define R600_CONTEXT_FLUSH_AND_INV_DB_META (1 << 7)
|
||
|
||
struct r600_context;
|
||
struct r600_screen;
|
||
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
|
||
index f25c6aa..bda425c 100644
|
||
--- a/src/gallium/drivers/r600/r600_asm.c
|
||
+++ b/src/gallium/drivers/r600/r600_asm.c
|
||
@@ -322,6 +322,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecod
|
||
output->swizzle_y == bc->cf_last->output.swizzle_y &&
|
||
output->swizzle_z == bc->cf_last->output.swizzle_z &&
|
||
output->swizzle_w == bc->cf_last->output.swizzle_w &&
|
||
+ output->comp_mask == bc->cf_last->output.comp_mask &&
|
||
(output->burst_count + bc->cf_last->output.burst_count) <= 16) {
|
||
|
||
if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
|
||
@@ -873,12 +874,6 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
|
||
bank_swizzle[4] = SQ_ALU_SCL_210;
|
||
while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
|
||
|
||
- if (max_slots == 4) {
|
||
- for (i = 0; i < max_slots; i++) {
|
||
- if (bank_swizzle[i] == SQ_ALU_VEC_210)
|
||
- return -1;
|
||
- }
|
||
- }
|
||
init_bank_swizzle(&bs);
|
||
if (scalar_only == false) {
|
||
for (i = 0; i < 4; i++) {
|
||
@@ -910,8 +905,10 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
|
||
bank_swizzle[i]++;
|
||
if (bank_swizzle[i] <= SQ_ALU_VEC_210)
|
||
break;
|
||
- else
|
||
+ else if (i < max_slots - 1)
|
||
bank_swizzle[i] = SQ_ALU_VEC_012;
|
||
+ else
|
||
+ return -1;
|
||
}
|
||
}
|
||
}
|
||
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
|
||
index 6df0d91..bb85fc1 100644
|
||
--- a/src/gallium/drivers/r600/r600_buffer.c
|
||
+++ b/src/gallium/drivers/r600/r600_buffer.c
|
||
@@ -34,6 +34,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
|
||
{
|
||
struct r600_resource *rbuffer = r600_resource(buf);
|
||
|
||
+ util_range_destroy(&rbuffer->valid_buffer_range);
|
||
pb_reference(&rbuffer->buf, NULL);
|
||
FREE(rbuffer);
|
||
}
|
||
@@ -98,6 +99,14 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||
|
||
assert(box->x + box->width <= resource->width0);
|
||
|
||
+ /* See if the buffer range being mapped has never been initialized,
|
||
+ * in which case it can be mapped unsynchronized. */
|
||
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
|
||
+ usage & PIPE_TRANSFER_WRITE &&
|
||
+ !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
|
||
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||
+ }
|
||
+
|
||
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
|
||
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
|
||
assert(usage & PIPE_TRANSFER_WRITE);
|
||
@@ -178,6 +187,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
|
||
{
|
||
struct r600_context *rctx = (struct r600_context*)pipe;
|
||
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
|
||
+ struct r600_resource *rbuffer = r600_resource(transfer->resource);
|
||
|
||
if (rtransfer->staging) {
|
||
struct pipe_resource *dst, *src;
|
||
@@ -189,7 +199,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
|
||
doffset = transfer->box.x;
|
||
soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
|
||
/* Copy the staging buffer into the original one. */
|
||
- if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset)) {
|
||
+ if (rctx->rings.dma.cs && !(size % 4) && !(doffset % 4) && !(soffset % 4)) {
|
||
if (rctx->screen->chip_class >= EVERGREEN) {
|
||
evergreen_dma_copy(rctx, dst, src, doffset, soffset, size);
|
||
} else {
|
||
@@ -203,6 +213,11 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
|
||
}
|
||
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
|
||
}
|
||
+
|
||
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
|
||
+ util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
|
||
+ transfer->box.x + transfer->box.width);
|
||
+ }
|
||
util_slab_free(&rctx->pool_transfers, transfer);
|
||
}
|
||
|
||
@@ -259,6 +274,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
|
||
|
||
res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
|
||
res->domains = domains;
|
||
+ util_range_set_empty(&res->valid_buffer_range);
|
||
return true;
|
||
}
|
||
|
||
@@ -275,6 +291,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
|
||
pipe_reference_init(&rbuffer->b.b.reference, 1);
|
||
rbuffer->b.b.screen = screen;
|
||
rbuffer->b.vtbl = &r600_buffer_vtbl;
|
||
+ util_range_init(&rbuffer->valid_buffer_range);
|
||
|
||
if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE, templ->usage)) {
|
||
FREE(rbuffer);
|
||
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
|
||
index 9091ec0..322381a 100644
|
||
--- a/src/gallium/drivers/r600/r600_hw_context.c
|
||
+++ b/src/gallium/drivers/r600/r600_hw_context.c
|
||
@@ -648,6 +648,12 @@ void r600_flush_emit(struct r600_context *rctx)
|
||
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0);
|
||
}
|
||
|
||
+ if (rctx->chip_class >= R700 &&
|
||
+ (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META)) {
|
||
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
|
||
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0);
|
||
+ }
|
||
+
|
||
if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
|
||
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
|
||
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
|
||
@@ -742,6 +748,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
|
||
*/
|
||
ctx->flags |= R600_CONTEXT_FLUSH_AND_INV |
|
||
R600_CONTEXT_FLUSH_AND_INV_CB_META |
|
||
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
|
||
R600_CONTEXT_WAIT_3D_IDLE |
|
||
R600_CONTEXT_WAIT_CP_DMA_IDLE;
|
||
|
||
@@ -1119,6 +1126,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
||
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES |
|
||
R600_CONTEXT_FLUSH_AND_INV |
|
||
R600_CONTEXT_FLUSH_AND_INV_CB_META |
|
||
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
|
||
R600_CONTEXT_STREAMOUT_FLUSH |
|
||
R600_CONTEXT_WAIT_3D_IDLE;
|
||
|
||
@@ -1164,6 +1172,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
||
|
||
/* Invalidate the read caches. */
|
||
rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES;
|
||
+
|
||
+ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
|
||
+ dst_offset + size);
|
||
}
|
||
|
||
void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw)
|
||
@@ -1210,4 +1221,7 @@ void r600_dma_copy(struct r600_context *rctx,
|
||
src_offset += csize << shift;
|
||
size -= csize;
|
||
}
|
||
+
|
||
+ util_range_add(&rdst->valid_buffer_range, dst_offset,
|
||
+ dst_offset + size);
|
||
}
|
||
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
|
||
index 692e6ec..3b50f68 100644
|
||
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
|
||
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
|
||
@@ -29,7 +29,7 @@
|
||
#include "r600_pipe.h"
|
||
|
||
/* the number of CS dwords for flushing and drawing */
|
||
-#define R600_MAX_FLUSH_CS_DWORDS 12
|
||
+#define R600_MAX_FLUSH_CS_DWORDS 16
|
||
#define R600_MAX_DRAW_CS_DWORDS 34
|
||
#define R600_TRACE_CS_DWORDS 7
|
||
|
||
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
|
||
index fa66fcc..7a41688 100644
|
||
--- a/src/gallium/drivers/r600/r600_llvm.c
|
||
+++ b/src/gallium/drivers/r600/r600_llvm.c
|
||
@@ -38,8 +38,12 @@ static LLVMValueRef llvm_fetch_const(
|
||
LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.SwizzleX], "");
|
||
offset[1] = LLVMBuildAdd(bld_base->base.gallivm->builder, offset[1], index, "");
|
||
}
|
||
+ unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
|
||
+ if (reg->Register.Dimension) {
|
||
+ ConstantAddressSpace += reg->Dimension.Index;
|
||
+ }
|
||
LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024),
|
||
- CONSTANT_BUFFER_0_ADDR_SPACE);
|
||
+ ConstantAddressSpace);
|
||
LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, "");
|
||
LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
|
||
LLVMValueRef cvecval = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
|
||
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
|
||
index a59578d..a7973a5 100644
|
||
--- a/src/gallium/drivers/r600/r600_pipe.c
|
||
+++ b/src/gallium/drivers/r600/r600_pipe.c
|
||
@@ -22,6 +22,7 @@
|
||
*/
|
||
#include "r600_pipe.h"
|
||
#include "r600_public.h"
|
||
+#include "r600d.h"
|
||
|
||
#include <errno.h>
|
||
#include "pipe/p_shader_tokens.h"
|
||
@@ -165,12 +166,23 @@ static void r600_flush_gfx_ring(void *ctx, unsigned flags)
|
||
static void r600_flush_dma_ring(void *ctx, unsigned flags)
|
||
{
|
||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||
+ struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
|
||
+ unsigned padding_dw, i;
|
||
|
||
- if (!rctx->rings.dma.cs->cdw) {
|
||
+ if (!cs->cdw) {
|
||
return;
|
||
}
|
||
+
|
||
+ /* Pad the DMA CS to a multiple of 8 dwords. */
|
||
+ padding_dw = 8 - cs->cdw % 8;
|
||
+ if (padding_dw < 8) {
|
||
+ for (i = 0; i < padding_dw; i++) {
|
||
+ cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
|
||
+ }
|
||
+ }
|
||
+
|
||
rctx->rings.dma.flushing = true;
|
||
- rctx->ws->cs_flush(rctx->rings.dma.cs, flags);
|
||
+ rctx->ws->cs_flush(cs, flags);
|
||
rctx->rings.dma.flushing = false;
|
||
}
|
||
|
||
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
|
||
index ec59c92..1be4321 100644
|
||
--- a/src/gallium/drivers/r600/r600_pipe.h
|
||
+++ b/src/gallium/drivers/r600/r600_pipe.h
|
||
@@ -298,7 +298,8 @@ struct r600_dsa_state {
|
||
unsigned alpha_ref;
|
||
ubyte valuemask[2];
|
||
ubyte writemask[2];
|
||
- unsigned sx_alpha_test_control;
|
||
+ unsigned zwritemask;
|
||
+ unsigned sx_alpha_test_control;
|
||
};
|
||
|
||
struct r600_pipe_shader;
|
||
@@ -513,6 +514,7 @@ struct r600_context {
|
||
bool alpha_to_one;
|
||
bool force_blend_disable;
|
||
boolean dual_src_blend;
|
||
+ unsigned zwritemask;
|
||
|
||
/* Index buffer. */
|
||
struct pipe_index_buffer index_buffer;
|
||
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
|
||
index 3f165f7..70232fd 100644
|
||
--- a/src/gallium/drivers/r600/r600_state.c
|
||
+++ b/src/gallium/drivers/r600/r600_state.c
|
||
@@ -802,6 +802,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
|
||
dsa->valuemask[1] = state->stencil[1].valuemask;
|
||
dsa->writemask[0] = state->stencil[0].writemask;
|
||
dsa->writemask[1] = state->stencil[1].writemask;
|
||
+ dsa->zwritemask = state->depth.writemask;
|
||
|
||
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
|
||
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
|
||
@@ -1515,6 +1516,11 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
|
||
}
|
||
if (rctx->framebuffer.state.zsbuf) {
|
||
rctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
|
||
+
|
||
+ rtex = (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
|
||
+ if (rctx->chip_class >= R700 && rtex->htile) {
|
||
+ rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META;
|
||
+ }
|
||
}
|
||
|
||
/* Set the new state. */
|
||
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
|
||
index 88bb62b..f0e9de3 100644
|
||
--- a/src/gallium/drivers/r600/r600_state_common.c
|
||
+++ b/src/gallium/drivers/r600/r600_state_common.c
|
||
@@ -284,6 +284,16 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
|
||
ref.valuemask[1] = dsa->valuemask[1];
|
||
ref.writemask[0] = dsa->writemask[0];
|
||
ref.writemask[1] = dsa->writemask[1];
|
||
+ if (rctx->zwritemask != dsa->zwritemask) {
|
||
+ rctx->zwritemask = dsa->zwritemask;
|
||
+ if (rctx->chip_class >= EVERGREEN) {
|
||
+ /* work around some issue when not writting to zbuffer
|
||
+ * we are having lockup on evergreen so do not enable
|
||
+ * hyperz when not writting zbuffer
|
||
+ */
|
||
+ rctx->db_misc_state.atom.dirty = true;
|
||
+ }
|
||
+ }
|
||
|
||
r600_set_stencil_ref(ctx, &ref);
|
||
|
||
@@ -972,6 +982,7 @@ r600_create_so_target(struct pipe_context *ctx,
|
||
{
|
||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||
struct r600_so_target *t;
|
||
+ struct r600_resource *rbuffer = (struct r600_resource*)buffer;
|
||
|
||
t = CALLOC_STRUCT(r600_so_target);
|
||
if (!t) {
|
||
@@ -991,6 +1002,9 @@ r600_create_so_target(struct pipe_context *ctx,
|
||
pipe_resource_reference(&t->b.buffer, buffer);
|
||
t->b.buffer_offset = buffer_offset;
|
||
t->b.buffer_size = buffer_size;
|
||
+
|
||
+ util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
|
||
+ buffer_offset + buffer_size);
|
||
return &t->b;
|
||
}
|
||
|
||
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
|
||
index 621e7a1..81e5a6c 100644
|
||
--- a/src/gallium/drivers/r600/r600d.h
|
||
+++ b/src/gallium/drivers/r600/r600d.h
|
||
@@ -119,6 +119,7 @@
|
||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
|
||
#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
|
||
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
|
||
+#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
|
||
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
|
||
#define EVENT_TYPE(x) ((x) << 0)
|
||
#define EVENT_INDEX(x) ((x) << 8)
|
||
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
|
||
index 0f90991..8902ae4 100644
|
||
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
|
||
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
|
||
@@ -766,6 +766,22 @@ static void emit_icmp(
|
||
emit_data->output[emit_data->chan] = v;
|
||
}
|
||
|
||
+static void emit_ucmp(
|
||
+ const struct lp_build_tgsi_action * action,
|
||
+ struct lp_build_tgsi_context * bld_base,
|
||
+ struct lp_build_emit_data * emit_data)
|
||
+{
|
||
+ unsigned pred;
|
||
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||
+ LLVMContextRef context = bld_base->base.gallivm->context;
|
||
+
|
||
+
|
||
+ LLVMValueRef v = LLVMBuildFCmp(builder, LLVMRealUGE,
|
||
+ emit_data->args[0], lp_build_const_float(bld_base->base.gallivm, 0.), "");
|
||
+
|
||
+ emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, v, emit_data->args[2], emit_data->args[1], "");
|
||
+}
|
||
+
|
||
static void emit_cmp(
|
||
const struct lp_build_tgsi_action *action,
|
||
struct lp_build_tgsi_context * bld_base,
|
||
@@ -1241,6 +1257,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
|
||
bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
|
||
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
|
||
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
|
||
+ bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
|
||
|
||
bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
|
||
bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
|
||
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
|
||
index 2545634..7922928 100644
|
||
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
|
||
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
|
||
@@ -309,14 +309,8 @@ static void declare_input_fs(
|
||
/* XXX: Handle all possible interpolation modes */
|
||
switch (decl->Interp.Interpolate) {
|
||
case TGSI_INTERPOLATE_COLOR:
|
||
- /* XXX: Flat shading hangs the GPU */
|
||
- if (si_shader_ctx->rctx->queued.named.rasterizer &&
|
||
- si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
|
||
-#if 0
|
||
+ if (si_shader_ctx->key.flatshade) {
|
||
intr_name = "llvm.SI.fs.interp.constant";
|
||
-#else
|
||
- intr_name = "llvm.SI.fs.interp.linear.center";
|
||
-#endif
|
||
} else {
|
||
if (decl->Interp.Centroid)
|
||
intr_name = "llvm.SI.fs.interp.persp.centroid";
|
||
@@ -325,11 +319,8 @@ static void declare_input_fs(
|
||
}
|
||
break;
|
||
case TGSI_INTERPOLATE_CONSTANT:
|
||
- /* XXX: Flat shading hangs the GPU */
|
||
-#if 0
|
||
intr_name = "llvm.SI.fs.interp.constant";
|
||
break;
|
||
-#endif
|
||
case TGSI_INTERPOLATE_LINEAR:
|
||
if (decl->Interp.Centroid)
|
||
intr_name = "llvm.SI.fs.interp.linear.centroid";
|
||
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
|
||
index 07b2f9f..f54f67c 100644
|
||
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
|
||
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
|
||
@@ -82,6 +82,7 @@ struct si_shader_key {
|
||
unsigned nr_cbufs:4;
|
||
unsigned color_two_side:1;
|
||
unsigned alpha_func:3;
|
||
+ unsigned flatshade:1;
|
||
float alpha_ref;
|
||
};
|
||
|
||
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
|
||
index a6b1983..39817fb 100644
|
||
--- a/src/gallium/drivers/radeonsi/si_state.c
|
||
+++ b/src/gallium/drivers/radeonsi/si_state.c
|
||
@@ -421,8 +421,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
||
rs->offset_units = state->offset_units;
|
||
rs->offset_scale = state->offset_scale * 12.0f;
|
||
|
||
- /* XXX: Flat shading hangs the GPU */
|
||
- tmp = S_0286D4_FLAT_SHADE_ENA(0);
|
||
+ tmp = S_0286D4_FLAT_SHADE_ENA(1);
|
||
if (state->sprite_coord_enable) {
|
||
tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
|
||
S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
|
||
@@ -1859,7 +1858,7 @@ static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *c
|
||
key.export_16bpc = rctx->export_16bpc;
|
||
if (rctx->queued.named.rasterizer) {
|
||
key.color_two_side = rctx->queued.named.rasterizer->two_side;
|
||
- /*key.flatshade = rctx->queued.named.rasterizer->flatshade;*/
|
||
+ key.flatshade = rctx->queued.named.rasterizer->flatshade;
|
||
}
|
||
if (rctx->queued.named.dsa) {
|
||
key.alpha_func = rctx->queued.named.dsa->alpha_func;
|
||
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
|
||
index 3704410..8c35625 100644
|
||
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
|
||
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
|
||
@@ -128,11 +128,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
|
||
continue;
|
||
}
|
||
|
||
- /* XXX: Flat shading hangs the GPU */
|
||
- if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
||
- (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
|
||
- rctx->queued.named.rasterizer->flatshade))
|
||
- have_linear = TRUE;
|
||
if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
|
||
have_linear = TRUE;
|
||
if (shader->shader.input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
|
||
@@ -327,15 +322,12 @@ static void si_update_spi_map(struct r600_context *rctx)
|
||
bcolor:
|
||
tmp = 0;
|
||
|
||
-#if 0
|
||
- /* XXX: Flat shading hangs the GPU */
|
||
if (name == TGSI_SEMANTIC_POSITION ||
|
||
ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
||
(ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
|
||
- rctx->rasterizer && rctx->rasterizer->flatshade)) {
|
||
+ rctx->ps_shader->current->key.flatshade)) {
|
||
tmp |= S_028644_FLAT_SHADE(1);
|
||
}
|
||
-#endif
|
||
|
||
if (name == TGSI_SEMANTIC_GENERIC &&
|
||
rctx->sprite_coord_enable & (1 << ps->input[i].sid)) {
|
||
@@ -453,8 +445,14 @@ static void si_vertex_buffer_update(struct r600_context *rctx)
|
||
si_pm4_sh_data_add(pm4, va & 0xFFFFFFFF);
|
||
si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) |
|
||
S_008F04_STRIDE(vb->stride)));
|
||
- si_pm4_sh_data_add(pm4, (vb->buffer->width0 - vb->buffer_offset) /
|
||
- MAX2(vb->stride, 1));
|
||
+ if (vb->stride)
|
||
+ /* Round up by rounding down and adding 1 */
|
||
+ si_pm4_sh_data_add(pm4,
|
||
+ (vb->buffer->width0 - offset -
|
||
+ util_format_get_blocksize(ve->src_format)) /
|
||
+ vb->stride + 1);
|
||
+ else
|
||
+ si_pm4_sh_data_add(pm4, vb->buffer->width0 - offset);
|
||
si_pm4_sh_data_add(pm4, rctx->vertex_elements->rsrc_word3[i]);
|
||
|
||
if (!bound[ve->vertex_buffer_index]) {
|
||
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
|
||
index 607584f..021175c 100644
|
||
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
|
||
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
|
||
@@ -438,7 +438,6 @@ create_xmesa_buffer(Drawable d, BufferType type,
|
||
{
|
||
XMesaDisplay xmdpy = xmesa_init_display(vis->display);
|
||
XMesaBuffer b;
|
||
- uint width, height;
|
||
|
||
ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER);
|
||
|
||
@@ -457,7 +456,7 @@ create_xmesa_buffer(Drawable d, BufferType type,
|
||
b->type = type;
|
||
b->cmap = cmap;
|
||
|
||
- get_drawable_size(vis->display, d, &width, &height);
|
||
+ get_drawable_size(vis->display, d, &b->width, &b->height);
|
||
|
||
/*
|
||
* Create framebuffer, but we'll plug in our own renderbuffers below.
|
||
diff --git a/src/gallium/targets/dri-vmwgfx/Makefile.am b/src/gallium/targets/dri-vmwgfx/Makefile.am
|
||
index 06ebf88..ca7df65 100644
|
||
--- a/src/gallium/targets/dri-vmwgfx/Makefile.am
|
||
+++ b/src/gallium/targets/dri-vmwgfx/Makefile.am
|
||
@@ -58,17 +58,13 @@ vmwgfx_dri_la_LIBADD = \
|
||
$(top_builddir)/src/gallium/drivers/svga/libsvga.la \
|
||
$(GALLIUM_DRI_LIB_DEPS)
|
||
|
||
-if HAVE_MESA_LLVM
|
||
vmwgfx_dri_la_LINK = $(CXXLINK) $(vmwgfx_dri_la_LDFLAGS)
|
||
# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
|
||
nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-cpp.cpp
|
||
|
||
+if HAVE_MESA_LLVM
|
||
vmwgfx_dri_la_LDFLAGS += $(LLVM_LDFLAGS)
|
||
vmwgfx_dri_la_LIBADD += $(LLVM_LIBS)
|
||
-else
|
||
-vmwgfx_dri_la_LINK = $(LINK) $(vmwgfx_dri_la_LDFLAGS)
|
||
-# Mention a dummy pure C file to trigger generation of the $(LINK) variable
|
||
-nodist_EXTRA_vmwgfx_dri_la_SOURCES = dummy-c.c
|
||
endif
|
||
|
||
# Provide compatibility with scripts for the old Mesa build system for
|
||
diff --git a/src/gallium/targets/vdpau-softpipe/Makefile.am b/src/gallium/targets/vdpau-softpipe/Makefile.am
|
||
index 3372b5c..7bde2f8 100644
|
||
--- a/src/gallium/targets/vdpau-softpipe/Makefile.am
|
||
+++ b/src/gallium/targets/vdpau-softpipe/Makefile.am
|
||
@@ -35,7 +35,7 @@ vdpaudir = $(VDPAU_LIB_INSTALL_DIR)
|
||
vdpau_LTLIBRARIES = libvdpau_softpipe.la
|
||
|
||
libvdpau_softpipe_la_SOURCES = \
|
||
- $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_dri.c
|
||
+ $(top_srcdir)/src/gallium/auxiliary/vl/vl_winsys_xsp.c
|
||
|
||
libvdpau_softpipe_la_LDFLAGS = \
|
||
-module \
|
||
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
|
||
index 2d41c26..f4ac526 100644
|
||
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
|
||
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
|
||
@@ -957,16 +957,16 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
|
||
|
||
bo->flinked = TRUE;
|
||
bo->flink = flink.name;
|
||
+
|
||
+ pipe_mutex_lock(bo->mgr->bo_handles_mutex);
|
||
+ util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)bo->flink, bo);
|
||
+ pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
|
||
}
|
||
whandle->handle = bo->flink;
|
||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||
whandle->handle = bo->handle;
|
||
}
|
||
|
||
- pipe_mutex_lock(bo->mgr->bo_handles_mutex);
|
||
- util_hash_table_set(bo->mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
|
||
- pipe_mutex_unlock(bo->mgr->bo_handles_mutex);
|
||
-
|
||
whandle->stride = stride;
|
||
return TRUE;
|
||
}
|
||
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
|
||
index 519929e..a3a0530 100644
|
||
--- a/src/gbm/backends/dri/gbm_dri.c
|
||
+++ b/src/gbm/backends/dri/gbm_dri.c
|
||
@@ -481,6 +481,7 @@ create_dumb(struct gbm_device *gbm,
|
||
bo->base.base.width = width;
|
||
bo->base.base.height = height;
|
||
bo->base.base.stride = create_arg.pitch;
|
||
+ bo->base.base.format = format;
|
||
bo->base.base.handle.u32 = create_arg.handle;
|
||
bo->handle = create_arg.handle;
|
||
bo->size = create_arg.size;
|
||
@@ -529,6 +530,7 @@ gbm_dri_bo_create(struct gbm_device *gbm,
|
||
bo->base.base.gbm = gbm;
|
||
bo->base.base.width = width;
|
||
bo->base.base.height = height;
|
||
+ bo->base.base.format = format;
|
||
|
||
switch (format) {
|
||
case GBM_FORMAT_RGB565:
|
||
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
|
||
index 4e32b50..29a209e 100644
|
||
--- a/src/mesa/drivers/common/meta.c
|
||
+++ b/src/mesa/drivers/common/meta.c
|
||
@@ -1910,6 +1910,14 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||
GLuint *tmp = malloc(srcW * srcH * sizeof(GLuint));
|
||
|
||
if (tmp) {
|
||
+
|
||
+ newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT);
|
||
+ _mesa_ReadPixels(srcX, srcY, srcW, srcH, GL_DEPTH_COMPONENT,
|
||
+ GL_UNSIGNED_INT, tmp);
|
||
+ setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT,
|
||
+ srcW, srcH, GL_DEPTH_COMPONENT,
|
||
+ GL_UNSIGNED_INT, tmp);
|
||
+
|
||
/* texcoords (after texture allocation!) */
|
||
{
|
||
verts[0].s = 0.0F;
|
||
@@ -1928,15 +1936,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||
if (!blit->DepthFP)
|
||
init_blit_depth_pixels(ctx);
|
||
|
||
- /* maybe change tex format here */
|
||
- newTex = alloc_texture(depthTex, srcW, srcH, GL_DEPTH_COMPONENT);
|
||
-
|
||
- _mesa_ReadPixels(srcX, srcY, srcW, srcH,
|
||
- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
|
||
-
|
||
- setup_drawpix_texture(ctx, depthTex, newTex, GL_DEPTH_COMPONENT, srcW, srcH,
|
||
- GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
|
||
-
|
||
_mesa_BindProgramARB(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP);
|
||
_mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE);
|
||
_mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
|
||
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
|
||
index dc140df..77670ef 100644
|
||
--- a/src/mesa/drivers/dri/i965/Makefile.am
|
||
+++ b/src/mesa/drivers/dri/i965/Makefile.am
|
||
@@ -62,6 +62,7 @@ TEST_LIBS = \
|
||
../common/libdri_test_stubs.la
|
||
|
||
i965_dri_la_SOURCES =
|
||
+nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp
|
||
i965_dri_la_LIBADD = $(COMMON_LIBS)
|
||
i965_dri_la_LDFLAGS = -module -avoid-version -shared
|
||
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
|
||
index 8dab431..f80219e 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
|
||
@@ -258,6 +258,26 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
|
||
return instructions;
|
||
}
|
||
|
||
+/**
|
||
+ * A helper for MOV generation for fixing up broken hardware SEND dependency
|
||
+ * handling.
|
||
+ */
|
||
+fs_inst *
|
||
+fs_visitor::DEP_RESOLVE_MOV(int grf)
|
||
+{
|
||
+ fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
|
||
+
|
||
+ inst->ir = NULL;
|
||
+ inst->annotation = "send dependency resolve";
|
||
+
|
||
+ /* The caller always wants uncompressed to emit the minimal extra
|
||
+ * dependencies, and to avoid having to deal with aligning its regs to 2.
|
||
+ */
|
||
+ inst->force_uncompressed = true;
|
||
+
|
||
+ return inst;
|
||
+}
|
||
+
|
||
bool
|
||
fs_inst::equals(fs_inst *inst)
|
||
{
|
||
@@ -1690,8 +1710,6 @@ fs_visitor::setup_pull_constants()
|
||
dst, index, offset);
|
||
pull->ir = inst->ir;
|
||
pull->annotation = inst->annotation;
|
||
- pull->base_mrf = 14;
|
||
- pull->mlen = 1;
|
||
|
||
inst->insert_before(pull);
|
||
|
||
@@ -1911,6 +1929,7 @@ fs_visitor::register_coalesce()
|
||
|
||
bool has_source_modifiers = (inst->src[0].abs ||
|
||
inst->src[0].negate ||
|
||
+ inst->src[0].smear != -1 ||
|
||
inst->src[0].file == UNIFORM);
|
||
|
||
/* Found a move of a GRF to a GRF. Let's see if we can coalesce
|
||
@@ -2228,6 +2247,265 @@ fs_visitor::remove_duplicate_mrf_writes()
|
||
return progress;
|
||
}
|
||
|
||
+static void
|
||
+clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
|
||
+ int first_grf, int grf_len)
|
||
+{
|
||
+ bool inst_16wide = (dispatch_width > 8 &&
|
||
+ !inst->force_uncompressed &&
|
||
+ !inst->force_sechalf);
|
||
+
|
||
+ /* Clear the flag for registers that actually got read (as expected). */
|
||
+ for (int i = 0; i < 3; i++) {
|
||
+ int grf;
|
||
+ if (inst->src[i].file == GRF) {
|
||
+ grf = inst->src[i].reg;
|
||
+ } else if (inst->src[i].file == FIXED_HW_REG &&
|
||
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
|
||
+ grf = inst->src[i].fixed_hw_reg.nr;
|
||
+ } else {
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ if (grf >= first_grf &&
|
||
+ grf < first_grf + grf_len) {
|
||
+ deps[grf - first_grf] = false;
|
||
+ if (inst_16wide)
|
||
+ deps[grf - first_grf + 1] = false;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/**
|
||
+ * Implements this workaround for the original 965:
|
||
+ *
|
||
+ * "[DevBW, DevCL] Implementation Restrictions: As the hardware does not
|
||
+ * check for post destination dependencies on this instruction, software
|
||
+ * must ensure that there is no destination hazard for the case of ‘write
|
||
+ * followed by a posted write’ shown in the following example.
|
||
+ *
|
||
+ * 1. mov r3 0
|
||
+ * 2. send r3.xy <rest of send instruction>
|
||
+ * 3. mov r2 r3
|
||
+ *
|
||
+ * Due to no post-destination dependency check on the ‘send’, the above
|
||
+ * code sequence could have two instructions (1 and 2) in flight at the
|
||
+ * same time that both consider ‘r3’ as the target of their final writes.
|
||
+ */
|
||
+void
|
||
+fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
|
||
+{
|
||
+ int write_len = inst->regs_written() * dispatch_width / 8;
|
||
+ int first_write_grf = inst->dst.reg;
|
||
+ bool needs_dep[BRW_MAX_MRF];
|
||
+ assert(write_len < (int)sizeof(needs_dep) - 1);
|
||
+
|
||
+ memset(needs_dep, false, sizeof(needs_dep));
|
||
+ memset(needs_dep, true, write_len);
|
||
+
|
||
+ clear_deps_for_inst_src(inst, dispatch_width,
|
||
+ needs_dep, first_write_grf, write_len);
|
||
+
|
||
+ /* Walk backwards looking for writes to registers we're writing which
|
||
+ * aren't read since being written. If we hit the start of the program,
|
||
+ * we assume that there are no outstanding dependencies on entry to the
|
||
+ * program.
|
||
+ */
|
||
+ for (fs_inst *scan_inst = (fs_inst *)inst->prev;
|
||
+ scan_inst != NULL;
|
||
+ scan_inst = (fs_inst *)scan_inst->prev) {
|
||
+
|
||
+ /* If we hit control flow, assume that there *are* outstanding
|
||
+ * dependencies, and force their cleanup before our instruction.
|
||
+ */
|
||
+ if (scan_inst->is_control_flow()) {
|
||
+ for (int i = 0; i < write_len; i++) {
|
||
+ if (needs_dep[i]) {
|
||
+ inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ bool scan_inst_16wide = (dispatch_width > 8 &&
|
||
+ !scan_inst->force_uncompressed &&
|
||
+ !scan_inst->force_sechalf);
|
||
+
|
||
+ /* We insert our reads as late as possible on the assumption that any
|
||
+ * instruction but a MOV that might have left us an outstanding
|
||
+ * dependency has more latency than a MOV.
|
||
+ */
|
||
+ if (scan_inst->dst.file == GRF &&
|
||
+ scan_inst->dst.reg >= first_write_grf &&
|
||
+ scan_inst->dst.reg < first_write_grf + write_len &&
|
||
+ needs_dep[scan_inst->dst.reg - first_write_grf]) {
|
||
+ inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
|
||
+ needs_dep[scan_inst->dst.reg - first_write_grf] = false;
|
||
+ if (scan_inst_16wide)
|
||
+ needs_dep[scan_inst->dst.reg - first_write_grf + 1] = false;
|
||
+ }
|
||
+
|
||
+ /* Clear the flag for registers that actually got read (as expected). */
|
||
+ clear_deps_for_inst_src(scan_inst, dispatch_width,
|
||
+ needs_dep, first_write_grf, write_len);
|
||
+
|
||
+ /* Continue the loop only if we haven't resolved all the dependencies */
|
||
+ int i;
|
||
+ for (i = 0; i < write_len; i++) {
|
||
+ if (needs_dep[i])
|
||
+ break;
|
||
+ }
|
||
+ if (i == write_len)
|
||
+ return;
|
||
+ }
|
||
+}
|
||
+
|
||
+/**
|
||
+ * Implements this workaround for the original 965:
|
||
+ *
|
||
+ * "[DevBW, DevCL] Errata: A destination register from a send can not be
|
||
+ * used as a destination register until after it has been sourced by an
|
||
+ * instruction with a different destination register.
|
||
+ */
|
||
+void
|
||
+fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
|
||
+{
|
||
+ int write_len = inst->regs_written() * dispatch_width / 8;
|
||
+ int first_write_grf = inst->dst.reg;
|
||
+ bool needs_dep[BRW_MAX_MRF];
|
||
+ assert(write_len < (int)sizeof(needs_dep) - 1);
|
||
+
|
||
+ memset(needs_dep, false, sizeof(needs_dep));
|
||
+ memset(needs_dep, true, write_len);
|
||
+ /* Walk forwards looking for writes to registers we're writing which aren't
|
||
+ * read before being written.
|
||
+ */
|
||
+ for (fs_inst *scan_inst = (fs_inst *)inst->next;
|
||
+ !scan_inst->is_tail_sentinel();
|
||
+ scan_inst = (fs_inst *)scan_inst->next) {
|
||
+ /* If we hit control flow, force resolve all remaining dependencies. */
|
||
+ if (scan_inst->is_control_flow()) {
|
||
+ for (int i = 0; i < write_len; i++) {
|
||
+ if (needs_dep[i])
|
||
+ scan_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Clear the flag for registers that actually got read (as expected). */
|
||
+ clear_deps_for_inst_src(scan_inst, dispatch_width,
|
||
+ needs_dep, first_write_grf, write_len);
|
||
+
|
||
+ /* We insert our reads as late as possible since they're reading the
|
||
+ * result of a SEND, which has massive latency.
|
||
+ */
|
||
+ if (scan_inst->dst.file == GRF &&
|
||
+ scan_inst->dst.reg >= first_write_grf &&
|
||
+ scan_inst->dst.reg < first_write_grf + write_len &&
|
||
+ needs_dep[scan_inst->dst.reg - first_write_grf]) {
|
||
+ scan_inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
|
||
+ needs_dep[scan_inst->dst.reg - first_write_grf] = false;
|
||
+ }
|
||
+
|
||
+ /* Continue the loop only if we haven't resolved all the dependencies */
|
||
+ int i;
|
||
+ for (i = 0; i < write_len; i++) {
|
||
+ if (needs_dep[i])
|
||
+ break;
|
||
+ }
|
||
+ if (i == write_len)
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ /* If we hit the end of the program, resolve all remaining dependencies out
|
||
+ * of paranoia.
|
||
+ */
|
||
+ fs_inst *last_inst = (fs_inst *)this->instructions.get_tail();
|
||
+ assert(last_inst->eot);
|
||
+ for (int i = 0; i < write_len; i++) {
|
||
+ if (needs_dep[i])
|
||
+ last_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
|
||
+ }
|
||
+}
|
||
+
|
||
+void
|
||
+fs_visitor::insert_gen4_send_dependency_workarounds()
|
||
+{
|
||
+ if (intel->gen != 4 || intel->is_g4x)
|
||
+ return;
|
||
+
|
||
+ /* Note that we're done with register allocation, so GRF fs_regs always
|
||
+ * have a .reg_offset of 0.
|
||
+ */
|
||
+
|
||
+ foreach_list_safe(node, &this->instructions) {
|
||
+ fs_inst *inst = (fs_inst *)node;
|
||
+
|
||
+ if (inst->mlen != 0 && inst->dst.file == GRF) {
|
||
+ insert_gen4_pre_send_dependency_workarounds(inst);
|
||
+ insert_gen4_post_send_dependency_workarounds(inst);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/**
|
||
+ * Turns the generic expression-style uniform pull constant load instruction
|
||
+ * into a hardware-specific series of instructions for loading a pull
|
||
+ * constant.
|
||
+ *
|
||
+ * The expression style allows the CSE pass before this to optimize out
|
||
+ * repeated loads from the same offset, and gives the pre-register-allocation
|
||
+ * scheduling full flexibility, while the conversion to native instructions
|
||
+ * allows the post-register-allocation scheduler the best information
|
||
+ * possible.
|
||
+ */
|
||
+void
|
||
+fs_visitor::lower_uniform_pull_constant_loads()
|
||
+{
|
||
+ foreach_list(node, &this->instructions) {
|
||
+ fs_inst *inst = (fs_inst *)node;
|
||
+
|
||
+ if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
||
+ continue;
|
||
+
|
||
+ if (intel->gen >= 7) {
|
||
+ fs_reg const_offset_reg = inst->src[1];
|
||
+ assert(const_offset_reg.file == IMM &&
|
||
+ const_offset_reg.type == BRW_REGISTER_TYPE_UD);
|
||
+ const_offset_reg.imm.u /= 16;
|
||
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
|
||
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
|
||
+ BRW_REGISTER_TYPE_UD);
|
||
+
|
||
+ fs_inst *setup1 = MOV(payload, fs_reg(g0));
|
||
+ setup1->force_writemask_all = true;
|
||
+ /* We don't need the second half of this vgrf to be filled with g1
|
||
+ * in the 16-wide case, but if we use force_uncompressed then live
|
||
+ * variable analysis won't consider this a def!
|
||
+ */
|
||
+
|
||
+ fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET,
|
||
+ payload, payload,
|
||
+ const_offset_reg);
|
||
+
|
||
+ setup1->ir = inst->ir;
|
||
+ setup1->annotation = inst->annotation;
|
||
+ inst->insert_before(setup1);
|
||
+ setup2->ir = inst->ir;
|
||
+ setup2->annotation = inst->annotation;
|
||
+ inst->insert_before(setup2);
|
||
+ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
|
||
+ inst->src[1] = payload;
|
||
+ } else {
|
||
+ /* Before register allocation, we didn't tell the scheduler about the
|
||
+ * MRF we use. We know it's safe to use this MRF because nothing
|
||
+ * else does except for register spill/unspill, which generates and
|
||
+ * uses its MRF within a single IR instruction.
|
||
+ */
|
||
+ inst->base_mrf = 14;
|
||
+ inst->mlen = 1;
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
void
|
||
fs_visitor::dump_instruction(fs_inst *inst)
|
||
{
|
||
@@ -2500,6 +2778,8 @@ fs_visitor::run()
|
||
|
||
schedule_instructions(false);
|
||
|
||
+ lower_uniform_pull_constant_loads();
|
||
+
|
||
assign_curb_setup();
|
||
assign_urb_setup();
|
||
|
||
@@ -2522,6 +2802,12 @@ fs_visitor::run()
|
||
assert(force_uncompressed_stack == 0);
|
||
assert(force_sechalf_stack == 0);
|
||
|
||
+ /* This must come after all optimization and register allocation, since
|
||
+ * it inserts dead code that happens to have side effects, and it does
|
||
+ * so based on the actual physical registers in use.
|
||
+ */
|
||
+ insert_gen4_send_dependency_workarounds();
|
||
+
|
||
if (failed)
|
||
return false;
|
||
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
|
||
index 88fecb9..d1bb111 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs.h
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
|
||
@@ -285,6 +285,7 @@ public:
|
||
fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
|
||
fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
|
||
uint32_t condition);
|
||
+ fs_inst *DEP_RESOLVE_MOV(int grf);
|
||
|
||
int type_size(const struct glsl_type *type);
|
||
fs_inst *get_instruction_generating_reg(fs_inst *start,
|
||
@@ -329,7 +330,11 @@ public:
|
||
bool remove_duplicate_mrf_writes();
|
||
bool virtual_grf_interferes(int a, int b);
|
||
void schedule_instructions(bool post_reg_alloc);
|
||
+ void insert_gen4_send_dependency_workarounds();
|
||
+ void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
|
||
+ void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
|
||
void fail(const char *msg, ...);
|
||
+ void lower_uniform_pull_constant_loads();
|
||
|
||
void push_force_uncompressed();
|
||
void pop_force_uncompressed();
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
|
||
index c4ec1d9..194ed07 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
|
||
@@ -223,7 +223,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||
inst->src[arg].file = entry->src.file;
|
||
inst->src[arg].reg = entry->src.reg;
|
||
inst->src[arg].reg_offset = entry->src.reg_offset;
|
||
- inst->src[arg].smear = entry->src.smear;
|
||
+ if (entry->src.smear != -1)
|
||
+ inst->src[arg].smear = entry->src.smear;
|
||
|
||
if (!inst->src[arg].abs) {
|
||
inst->src[arg].abs = entry->src.abs;
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
|
||
index 70c143a..a13ca36 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
|
||
@@ -105,7 +105,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
|
||
/* Match current instruction's expression against those in AEB. */
|
||
if (inst->opcode == entry->generator->opcode &&
|
||
inst->saturate == entry->generator->saturate &&
|
||
- operands_match(entry->generator->src, inst->src)) {
|
||
+ inst->dst.type == entry->generator->dst.type &&
|
||
+ operands_match(entry->generator->src, inst->src)) {
|
||
|
||
found = true;
|
||
progress = true;
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
|
||
index 45072da..365a2ec 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
|
||
@@ -604,29 +604,8 @@ fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
|
||
{
|
||
assert(inst->mlen != 0);
|
||
|
||
- /* Clear any post destination dependencies that would be ignored by
|
||
- * the block read. See the B-Spec for pre-gen5 send instruction.
|
||
- *
|
||
- * This could use a better solution, since texture sampling and
|
||
- * math reads could potentially run into it as well -- anywhere
|
||
- * that we have a SEND with a destination that is a register that
|
||
- * was written but not read within the last N instructions (what's
|
||
- * N? unsure). This is rare because of dead code elimination, but
|
||
- * not impossible.
|
||
- */
|
||
- if (intel->gen == 4 && !intel->is_g4x)
|
||
- brw_MOV(p, brw_null_reg(), dst);
|
||
-
|
||
brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
|
||
inst->offset);
|
||
-
|
||
- if (intel->gen == 4 && !intel->is_g4x) {
|
||
- /* gen4 errata: destination from a send can't be used as a
|
||
- * destination until it's been read. Just read it so we don't
|
||
- * have to worry.
|
||
- */
|
||
- brw_MOV(p, brw_null_reg(), dst);
|
||
- }
|
||
}
|
||
|
||
void
|
||
@@ -637,19 +616,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
||
{
|
||
assert(inst->mlen != 0);
|
||
|
||
- /* Clear any post destination dependencies that would be ignored by
|
||
- * the block read. See the B-Spec for pre-gen5 send instruction.
|
||
- *
|
||
- * This could use a better solution, since texture sampling and
|
||
- * math reads could potentially run into it as well -- anywhere
|
||
- * that we have a SEND with a destination that is a register that
|
||
- * was written but not read within the last N instructions (what's
|
||
- * N? unsure). This is rare because of dead code elimination, but
|
||
- * not impossible.
|
||
- */
|
||
- if (intel->gen == 4 && !intel->is_g4x)
|
||
- brw_MOV(p, brw_null_reg(), dst);
|
||
-
|
||
assert(index.file == BRW_IMMEDIATE_VALUE &&
|
||
index.type == BRW_REGISTER_TYPE_UD);
|
||
uint32_t surf_index = index.dw1.ud;
|
||
@@ -660,14 +626,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
||
|
||
brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
|
||
read_offset, surf_index);
|
||
-
|
||
- if (intel->gen == 4 && !intel->is_g4x) {
|
||
- /* gen4 errata: destination from a send can't be used as a
|
||
- * destination until it's been read. Just read it so we don't
|
||
- * have to worry.
|
||
- */
|
||
- brw_MOV(p, brw_null_reg(), dst);
|
||
- }
|
||
}
|
||
|
||
void
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
|
||
index d4f6fc9..573921c 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
|
||
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
|
||
@@ -597,31 +597,9 @@ fs_visitor::visit(ir_expression *ir)
|
||
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
|
||
packed_consts.type = result.type;
|
||
|
||
- if (intel->gen >= 7) {
|
||
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16);
|
||
- fs_reg payload = fs_reg(this, glsl_type::uint_type);
|
||
- struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
|
||
- BRW_REGISTER_TYPE_UD);
|
||
- fs_inst *setup = emit(MOV(payload, fs_reg(g0)));
|
||
- setup->force_writemask_all = true;
|
||
- /* We don't need the second half of this vgrf to be filled with g1
|
||
- * in the 16-wide case, but if we use force_uncompressed then live
|
||
- * variable analysis won't consider this a def!
|
||
- */
|
||
-
|
||
- emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload,
|
||
- payload, const_offset_reg);
|
||
- emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts,
|
||
- surf_index, payload);
|
||
- } else {
|
||
- fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]);
|
||
- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||
- packed_consts,
|
||
- surf_index,
|
||
- const_offset_reg));
|
||
- pull->base_mrf = 14;
|
||
- pull->mlen = 1;
|
||
- }
|
||
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
|
||
+ emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||
+ packed_consts, surf_index, const_offset_reg));
|
||
|
||
packed_consts.smear = const_offset->value.u[0] % 16 / 4;
|
||
for (int i = 0; i < ir->type->vector_elements; i++) {
|
||
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
|
||
index 3d53843..48635c5 100644
|
||
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
|
||
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
|
||
@@ -238,6 +238,23 @@ static void calc_wm_input_sizes( struct brw_context *brw )
|
||
|
||
calc_sizes(&t);
|
||
|
||
+ /* _NEW_POINT
|
||
+ *
|
||
+ * If the SF will be replacing the vertex output with a reference to
|
||
+ * gl_PointCoord, then tell the fragment shader that the value actually
|
||
+ * does vary.
|
||
+ */
|
||
+ if (ctx->Point.PointSprite) {
|
||
+ for (int i = 0; i < 8; i++) {
|
||
+ if (ctx->Point.CoordReplace[i]) {
|
||
+ t.size_masks[4-1] |= FRAG_BIT_TEX(i);
|
||
+ t.size_masks[3-1] |= FRAG_BIT_TEX(i);
|
||
+ t.size_masks[2-1] |= FRAG_BIT_TEX(i);
|
||
+ t.size_masks[1-1] |= FRAG_BIT_TEX(i);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
|
||
memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
|
||
brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
|
||
@@ -246,7 +263,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
|
||
|
||
const struct brw_tracked_state brw_wm_input_sizes = {
|
||
.dirty = {
|
||
- .mesa = _NEW_LIGHT | _NEW_PROGRAM,
|
||
+ .mesa = _NEW_LIGHT | _NEW_PROGRAM | _NEW_POINT,
|
||
.brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
|
||
.cache = 0
|
||
},
|
||
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
|
||
index 9c00ba8..885f6c2 100644
|
||
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
|
||
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
|
||
@@ -114,15 +114,15 @@
|
||
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */
|
||
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
|
||
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
|
||
-#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */
|
||
-#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22
|
||
-#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32
|
||
-#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */
|
||
-#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
|
||
-#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
|
||
-#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */
|
||
-#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
|
||
-#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
|
||
+#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */
|
||
+#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12
|
||
+#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D22
|
||
+#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */
|
||
+#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
|
||
+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26
|
||
+#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */
|
||
+#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
|
||
+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A
|
||
|
||
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
|
||
devid == PCI_CHIP_I915_GM || \
|
||
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
|
||
index a951283..6d91534 100644
|
||
--- a/src/mesa/main/attrib.c
|
||
+++ b/src/mesa/main/attrib.c
|
||
@@ -130,6 +130,9 @@ struct gl_enable_attrib
|
||
GLboolean VertexProgramPointSize;
|
||
GLboolean VertexProgramTwoSide;
|
||
|
||
+ /* GL_ARB_fragment_program */
|
||
+ GLboolean FragmentProgram;
|
||
+
|
||
/* GL_ARB_point_sprite / GL_NV_point_sprite */
|
||
GLboolean PointSprite;
|
||
GLboolean FragmentShaderATI;
|
||
@@ -316,6 +319,10 @@ _mesa_PushAttrib(GLbitfield mask)
|
||
attr->VertexProgram = ctx->VertexProgram.Enabled;
|
||
attr->VertexProgramPointSize = ctx->VertexProgram.PointSizeEnabled;
|
||
attr->VertexProgramTwoSide = ctx->VertexProgram.TwoSideEnabled;
|
||
+
|
||
+ /* GL_ARB_fragment_program */
|
||
+ attr->FragmentProgram = ctx->FragmentProgram.Enabled;
|
||
+
|
||
save_attrib_data(&head, GL_ENABLE_BIT, attr);
|
||
|
||
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
|
||
@@ -607,6 +614,11 @@ pop_enable_group(struct gl_context *ctx, const struct gl_enable_attrib *enable)
|
||
enable->VertexProgramTwoSide,
|
||
GL_VERTEX_PROGRAM_TWO_SIDE_ARB);
|
||
|
||
+ /* GL_ARB_fragment_program */
|
||
+ TEST_AND_UPDATE(ctx->FragmentProgram.Enabled,
|
||
+ enable->FragmentProgram,
|
||
+ GL_FRAGMENT_PROGRAM_ARB);
|
||
+
|
||
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
|
||
TEST_AND_UPDATE(ctx->Color.sRGBEnabled, enable->sRGBEnabled,
|
||
GL_FRAMEBUFFER_SRGB);
|
||
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
|
||
index 5e9e539..df57b76 100644
|
||
--- a/src/mesa/main/context.c
|
||
+++ b/src/mesa/main/context.c
|
||
@@ -1072,7 +1072,6 @@ _mesa_initialize_context(struct gl_context *ctx,
|
||
case API_OPENGLES2:
|
||
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
|
||
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
|
||
- ctx->Point.PointSprite = GL_TRUE; /* always on for ES 2.x */
|
||
break;
|
||
}
|
||
|
||
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
|
||
index 8728540..c1e1658 100644
|
||
--- a/src/mesa/main/glformats.c
|
||
+++ b/src/mesa/main/glformats.c
|
||
@@ -917,7 +917,7 @@ _mesa_is_compressed_format(struct gl_context *ctx, GLenum format)
|
||
case GL_COMPRESSED_SIGNED_RG11_EAC:
|
||
case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||
case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
|
||
- return _mesa_is_gles3(ctx);
|
||
+ return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility;
|
||
case GL_PALETTE4_RGB8_OES:
|
||
case GL_PALETTE4_RGBA8_OES:
|
||
case GL_PALETTE4_R5_G6_B5_OES:
|
||
diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c
|
||
index 1778640..c925d4c 100644
|
||
--- a/src/mesa/main/points.c
|
||
+++ b/src/mesa/main/points.c
|
||
@@ -253,7 +253,8 @@ _mesa_init_point(struct gl_context *ctx)
|
||
* In a core context, the state will default to true, and the setters and
|
||
* getters are disabled.
|
||
*/
|
||
- ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE);
|
||
+ ctx->Point.PointSprite = (ctx->API == API_OPENGL_CORE ||
|
||
+ ctx->API == API_OPENGLES2);
|
||
|
||
ctx->Point.SpriteRMode = GL_ZERO; /* GL_NV_point_sprite (only!) */
|
||
ctx->Point.SpriteOrigin = GL_UPPER_LEFT; /* GL_ARB_point_sprite */
|
||
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
|
||
index d1723b8..1b9525b 100644
|
||
--- a/src/mesa/main/teximage.c
|
||
+++ b/src/mesa/main/teximage.c
|
||
@@ -520,7 +520,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
|
||
}
|
||
}
|
||
|
||
- if (_mesa_is_gles3(ctx)) {
|
||
+ if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
|
||
switch (internalFormat) {
|
||
case GL_COMPRESSED_RGB8_ETC2:
|
||
case GL_COMPRESSED_SRGB8_ETC2:
|
||
@@ -3187,6 +3187,12 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
|
||
return;
|
||
}
|
||
|
||
+ if (!image) {
|
||
+ _mesa_error(ctx, GL_INVALID_OPERATION,
|
||
+ "glEGLImageTargetTexture2D(image=%p)", image);
|
||
+ return;
|
||
+ }
|
||
+
|
||
if (ctx->NewState & _NEW_PIXEL)
|
||
_mesa_update_state(ctx);
|
||
|
||
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
|
||
index 52ede13..6f18ec6 100644
|
||
--- a/src/mesa/main/texparam.c
|
||
+++ b/src/mesa/main/texparam.c
|
||
@@ -1432,6 +1432,12 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
|
||
*params = (GLfloat) obj->Immutable;
|
||
break;
|
||
|
||
+ case GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES:
|
||
+ if (!_mesa_is_gles(ctx) || !ctx->Extensions.OES_EGL_image_external)
|
||
+ goto invalid_pname;
|
||
+ *params = obj->RequiredTextureImageUnits;
|
||
+ break;
|
||
+
|
||
case GL_TEXTURE_SRGB_DECODE_EXT:
|
||
if (!ctx->Extensions.EXT_texture_sRGB_decode)
|
||
goto invalid_pname;
|
||
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
|
||
index f20df9e..7fdfa72 100644
|
||
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
|
||
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
|
||
@@ -135,16 +135,12 @@ static void update_raster_state( struct st_context *st )
|
||
|
||
/* _NEW_POLYGON
|
||
*/
|
||
- if (ctx->Polygon.OffsetUnits != 0.0 ||
|
||
- ctx->Polygon.OffsetFactor != 0.0) {
|
||
- raster->offset_point = ctx->Polygon.OffsetPoint;
|
||
- raster->offset_line = ctx->Polygon.OffsetLine;
|
||
- raster->offset_tri = ctx->Polygon.OffsetFill;
|
||
- }
|
||
-
|
||
if (ctx->Polygon.OffsetPoint ||
|
||
ctx->Polygon.OffsetLine ||
|
||
ctx->Polygon.OffsetFill) {
|
||
+ raster->offset_point = ctx->Polygon.OffsetPoint;
|
||
+ raster->offset_line = ctx->Polygon.OffsetLine;
|
||
+ raster->offset_tri = ctx->Polygon.OffsetFill;
|
||
raster->offset_units = ctx->Polygon.OffsetUnits;
|
||
raster->offset_scale = ctx->Polygon.OffsetFactor;
|
||
}
|
||
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
|
||
index 63dbdb2..36fffe9 100644
|
||
--- a/src/mesa/state_tracker/st_cb_bitmap.c
|
||
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
|
||
@@ -675,11 +675,12 @@ st_flush_bitmap_cache(struct st_context *st)
|
||
* \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
|
||
*/
|
||
static GLboolean
|
||
-accum_bitmap(struct st_context *st,
|
||
+accum_bitmap(struct gl_context *ctx,
|
||
GLint x, GLint y, GLsizei width, GLsizei height,
|
||
const struct gl_pixelstore_attrib *unpack,
|
||
const GLubyte *bitmap )
|
||
{
|
||
+ struct st_context *st = ctx->st;
|
||
struct bitmap_cache *cache = st->bitmap.cache;
|
||
int px = -999, py = -999;
|
||
const GLfloat z = st->ctx->Current.RasterPos[2];
|
||
@@ -729,9 +730,17 @@ accum_bitmap(struct st_context *st,
|
||
/* create the transfer if needed */
|
||
create_cache_trans(st);
|
||
|
||
+ /* PBO source... */
|
||
+ bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
|
||
+ if (!bitmap) {
|
||
+ return FALSE;
|
||
+ }
|
||
+
|
||
unpack_bitmap(st, px, py, width, height, unpack, bitmap,
|
||
cache->buffer, BITMAP_CACHE_WIDTH);
|
||
|
||
+ _mesa_unmap_pbo_source(ctx, unpack);
|
||
+
|
||
return GL_TRUE; /* accumulated */
|
||
}
|
||
|
||
@@ -764,7 +773,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
|
||
semantic_indexes);
|
||
}
|
||
|
||
- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
|
||
+ if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap))
|
||
return;
|
||
|
||
pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
|
||
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
|
||
index de62264..bff8d9b 100644
|
||
--- a/src/mesa/state_tracker/st_draw.c
|
||
+++ b/src/mesa/state_tracker/st_draw.c
|
||
@@ -283,7 +283,7 @@ st_draw_vbo(struct gl_context *ctx,
|
||
/* don't trim, restarts might be inside index list */
|
||
cso_draw_vbo(st->cso_context, &info);
|
||
}
|
||
- else if (u_trim_pipe_prim(info.mode, &info.count))
|
||
+ else if (u_trim_pipe_prim(prims[i].mode, &info.count))
|
||
cso_draw_vbo(st->cso_context, &info);
|
||
}
|
||
|
||
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
|
||
index a9111b5..f56f7cb 100644
|
||
--- a/src/mesa/state_tracker/st_program.c
|
||
+++ b/src/mesa/state_tracker/st_program.c
|
||
@@ -1142,7 +1142,7 @@ st_print_shaders(struct gl_context *ctx)
|
||
static void
|
||
destroy_program_variants(struct st_context *st, struct gl_program *program)
|
||
{
|
||
- if (!program)
|
||
+ if (!program || program == &_mesa_DummyProgram)
|
||
return;
|
||
|
||
switch (program->Target) {
|