1
0
forked from rpms/mesa
mesa/arm-vc4-neon.patch

362 lines
13 KiB
Diff
Raw Normal View History

2017-04-24 22:36:52 +00:00
From patchwork Mon Apr 24 21:59:48 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [Mesa-dev,1/4] vc4: Only build the NEON code on arm32.
From: Eric Anholt <eric@anholt.net>
X-Patchwork-Id: 152503
Message-Id: <20170424215951.21544-1-eric@anholt.net>
To: mesa-dev@lists.freedesktop.org
Cc: mesa-stable@lists.freedesktop.org
Date: Mon, 24 Apr 2017 14:59:48 -0700
NEON is sufficiently different on arm64 that we can't just reuse this
code. Disable it on arm64 for now.
v2: Use PIPE_ARCH_ARM instead, as __ARM_ARCH may be 8 for a 32-bit build
for a v8 CPU.
Signed-off-by: Eric Anholt <eric@anholt.net>
Cc: <mesa-stable@lists.freedesktop.org>
---
src/gallium/drivers/vc4/vc4_tiling_lt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_tiling_lt.c b/src/gallium/drivers/vc4/vc4_tiling_lt.c
index c9cbc65e2dbc..f37a92e9390e 100644
--- a/src/gallium/drivers/vc4/vc4_tiling_lt.c
+++ b/src/gallium/drivers/vc4/vc4_tiling_lt.c
@@ -61,7 +61,7 @@ static void
vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
{
uint32_t gpu_stride = vc4_utile_stride(cpp);
-#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
+#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
if (gpu_stride == 8) {
__asm__ volatile (
/* Load from the GPU in one shot, no interleave, to
@@ -118,7 +118,7 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
{
uint32_t gpu_stride = vc4_utile_stride(cpp);
-#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
+#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
if (gpu_stride == 8) {
__asm__ volatile (
/* Load each 8-byte line from cpu-side source,
From patchwork Mon Apr 24 21:59:49 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [Mesa-dev,2/4] gallium: Enable ARM NEON CPU detection.
From: Eric Anholt <eric@anholt.net>
X-Patchwork-Id: 152500
Message-Id: <20170424215951.21544-2-eric@anholt.net>
To: mesa-dev@lists.freedesktop.org
Date: Mon, 24 Apr 2017 14:59:49 -0700
I wrote this code with reference to pixman, though I've only decided to
cover Linux (what I'm testing) and Android (seems obvious enough). Linux
has getauxval() as a cleaner interface to the /proc entry, but it's more
glibc-specific and I didn't want to add detection for that.
This will be used to enable NEON at runtime on ARMv6 builds of vc4.
v2: Actually initialize the temp vars in the Android path (noticed by
daniels)
v3: Actually pull in the cpufeatures library (change by robher).
Use O_CLOEXEC. Break out of the loop when we find our feature.
v4: Drop VFP code, which was confused about what it was detecting and not
actually used yet.
---
src/gallium/auxiliary/util/u_cpu_detect.c | 43 +++++++++++++++++++++++++++++++
src/gallium/auxiliary/util/u_cpu_detect.h | 1 +
3 files changed, 46 insertions(+)
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 845fc6b34d5c..76115bf8d55d 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -59,12 +59,18 @@
#if defined(PIPE_OS_LINUX)
#include <signal.h>
+#include <fcntl.h>
+#include <elf.h>
#endif
#ifdef PIPE_OS_UNIX
#include <unistd.h>
#endif
+#if defined(PIPE_OS_ANDROID)
+#include <cpu-features.h>
+#endif
+
#if defined(PIPE_OS_WINDOWS)
#include <windows.h>
#if defined(PIPE_CC_MSVC)
@@ -294,6 +300,38 @@ PIPE_ALIGN_STACK static inline boolean sse2_has_daz(void)
#endif /* X86 or X86_64 */
+#if defined(PIPE_ARCH_ARM)
+static void
+check_os_arm_support(void)
+{
+#if defined(PIPE_OS_ANDROID)
+ AndroidCpuFamily cpu_family = android_getCpuFamily();
+ uint64_t cpu_features = android_getCpuFeatures();
+
+ if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
+ if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)
+ util_cpu_caps.has_neon = 1;
+ }
+#elif defined(PIPE_OS_LINUX)
+ Elf32_auxv_t aux;
+ int fd;
+
+ fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
+ if (fd >= 0) {
+ while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
+ if (aux.a_type == AT_HWCAP) {
+ uint32_t hwcap = aux.a_un.a_val;
+
+ util_cpu_caps.has_neon = (hwcap >> 12) & 1;
+ break;
+ }
+ }
+ close (fd);
+ }
+#endif /* PIPE_OS_LINUX */
+}
+#endif /* PIPE_ARCH_ARM */
+
void
util_cpu_detect(void)
{
@@ -443,6 +481,10 @@ util_cpu_detect(void)
}
#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+#if defined(PIPE_ARCH_ARM)
+ check_os_arm_support();
+#endif
+
#if defined(PIPE_ARCH_PPC)
check_os_altivec_support();
#endif /* PIPE_ARCH_PPC */
@@ -471,6 +513,7 @@ util_cpu_detect(void)
debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
+ debug_printf("util_cpu_caps.has_neon = %u\n", util_cpu_caps.has_neon);
debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
debug_printf("util_cpu_caps.has_avx512f = %u\n", util_cpu_caps.has_avx512f);
debug_printf("util_cpu_caps.has_avx512dq = %u\n", util_cpu_caps.has_avx512dq);
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index 3bd7294f0759..4a34ac4d9a63 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -72,6 +72,7 @@ struct util_cpu_caps {
unsigned has_xop:1;
unsigned has_altivec:1;
unsigned has_daz:1;
+ unsigned has_neon:1;
unsigned has_avx512f:1;
unsigned has_avx512dq:1;
From patchwork Mon Apr 24 21:59:50 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Subject: [Mesa-dev,
3/4] vc4: Use a wrapper file to set VC4_BUILD_NEON instead of CFLAGS.
From: Eric Anholt <eric@anholt.net>
X-Patchwork-Id: 152502
Message-Id: <20170424215951.21544-3-eric@anholt.net>
To: mesa-dev@lists.freedesktop.org
Date: Mon, 24 Apr 2017 14:59:50 -0700
Android.mk was setting the flag across the entire driver, so we didn't
have non-NEON versions getting built. This was going to be a problem with
the next commit, when I start auto-detecting NEON support and use the
non-NEON version when appropriate.
---
Rob: I'm happy to just drop this patch if you'd rather go the other
route for the Android build. I do think this makes for a slightly
faster and simpler build, due to not having the intermediate lib.
src/gallium/drivers/vc4/Makefile.am | 6 ------
src/gallium/drivers/vc4/Makefile.sources | 1 +
src/gallium/drivers/vc4/vc4_tiling_lt_neon.c | 30 ++++++++++++++++++++++++++++
4 files changed, 31 insertions(+), 8 deletions(-)
create mode 100644 src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index b361a0c588a8..0ed49b128b2d 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -41,10 +41,4 @@ libvc4_la_SOURCES = $(C_SOURCES)
libvc4_la_LIBADD = $(SIM_LIB) $(VC4_LIBS)
libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
-noinst_LTLIBRARIES += libvc4_neon.la
-libvc4_la_LIBADD += libvc4_neon.la
-
-libvc4_neon_la_SOURCES = vc4_tiling_lt.c
-libvc4_neon_la_CFLAGS = $(AM_CFLAGS) -DVC4_BUILD_NEON
-
EXTRA_DIST = kernel/README
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 10de34361260..442d7a561782 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -56,6 +56,7 @@ C_SOURCES := \
vc4_state.c \
vc4_tiling.c \
vc4_tiling_lt.c \
+ vc4_tiling_lt_neon.c \
vc4_tiling.h \
vc4_uniforms.c \
$()
diff --git a/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c b/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
new file mode 100644
index 000000000000..7ba66ae4cdf4
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Wrapper file for building vc4_tiling_lt.c with the "build NEON assembly if
+ * possible" flag set, since Android.mk doesn't have a way to set CFLAGS for a
+ * single file.
+ */
+
+#define VC4_BUILD_NEON
+#include "vc4_tiling_lt.c"
From patchwork Mon Apr 24 21:59:51 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [Mesa-dev,
4/4] vc4: Use runtime CPU detection for whether NEON is available.
From: Eric Anholt <eric@anholt.net>
X-Patchwork-Id: 152501
Message-Id: <20170424215951.21544-4-eric@anholt.net>
To: mesa-dev@lists.freedesktop.org
Date: Mon, 24 Apr 2017 14:59:51 -0700
This will allow Raspbian's ARMv6 builds to take advantage of the new NEON
code, and could prevent problems if vc4 ends up getting used on a v7 CPU
without NEON.
v2: Drop dead NEON_SUFFIX (noted by Erik Faye-Lund)
---
src/gallium/drivers/vc4/vc4_screen.c | 3 +++
src/gallium/drivers/vc4/vc4_tiling.h | 27 +++++++++++++--------------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index b5b1ced49fd5..ce6a9dbaa6cc 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -27,6 +27,7 @@
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
+#include "util/u_cpu_detect.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -628,6 +629,8 @@ vc4_screen_create(int fd)
if (!vc4_get_chip_info(screen))
goto fail;
+ util_cpu_detect();
+
slab_create_parent(&screen->transfer_pool, sizeof(struct vc4_transfer), 16);
vc4_fence_init(screen);
diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h
index ba1ad6fb3f7d..3168ec20a606 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.h
+++ b/src/gallium/drivers/vc4/vc4_tiling.h
@@ -27,6 +27,7 @@
#include <stdbool.h>
#include <stdint.h>
#include "util/macros.h"
+#include "util/u_cpu_detect.h"
/** Return the width in pixels of a 64-byte microtile. */
static inline uint32_t
@@ -83,23 +84,18 @@ void vc4_store_tiled_image(void *dst, uint32_t dst_stride,
uint8_t tiling_format, int cpp,
const struct pipe_box *box);
-/* If we're building for ARMv7 (Pi 2+), assume it has NEON. For Raspbian we
- * should extend this to have some runtime detection of being built for ARMv6
- * on a Pi 2+.
- */
-#if defined(__ARM_ARCH) && __ARM_ARCH == 7
-#define NEON_SUFFIX(x) x ## _neon
-#else
-#define NEON_SUFFIX(x) x ## _base
-#endif
-
static inline void
vc4_load_lt_image(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
int cpp, const struct pipe_box *box)
{
- NEON_SUFFIX(vc4_load_lt_image)(dst, dst_stride, src, src_stride,
+ if (util_cpu_caps.has_neon) {
+ vc4_load_lt_image_neon(dst, dst_stride, src, src_stride,
cpp, box);
+ } else {
+ vc4_load_lt_image_base(dst, dst_stride, src, src_stride,
+ cpp, box);
+ }
}
static inline void
@@ -107,10 +103,13 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride,
void *src, uint32_t src_stride,
int cpp, const struct pipe_box *box)
{
- NEON_SUFFIX(vc4_store_lt_image)(dst, dst_stride, src, src_stride,
+ if (util_cpu_caps.has_neon) {
+ vc4_store_lt_image_neon(dst, dst_stride, src, src_stride,
cpp, box);
+ } else {
+ vc4_store_lt_image_base(dst, dst_stride, src, src_stride,
+ cpp, box);
+ }
}
-#undef NEON_SUFFIX
-
#endif /* VC4_TILING_H */