123 lines
4.8 KiB
Diff
123 lines
4.8 KiB
Diff
From 1488b4c4efcf51c212fb4d1553964e4bf52e6aa5 Mon Sep 17 00:00:00 2001
|
|
From: Siarhei Siamashka <siarhei.siamashka@gmail.com>
|
|
Date: Mon, 25 Jun 2012 02:00:27 +0300
|
|
Subject: [PATCH] Use OpenMP for bilinear scaled fast paths
|
|
|
|
Does it actually make sense? I remember somebody was strongly opposing
|
|
the idea of spawning threads in pixman in the past, but can't find
|
|
this e-mail right now.
|
|
|
|
Even if using multithreaded rendering is acceptable, the next question is
|
|
whether to rely on OpenMP for it. Currently OpenMP is disabled in Android
|
|
toolchain by default:
|
|
https://groups.google.com/forum/#!topic/android-ndk/pUfqxURgNbQ
|
|
Clang/LLVM does not support OpenMP either.
|
|
|
|
Some benchmarks with cairo-perf-trace (gcc 4.7.1, CFLAGS="-O2 -fopenmp"):
|
|
|
|
=== Core i7 860 @2.8GHz ===
|
|
|
|
before patch:
|
|
[ 0] image firefox-fishtank 66.912 66.931 0.13% 3/3
|
|
|
|
export OMP_NUM_THREADS=1
|
|
[ 0] image firefox-fishtank 67.285 67.393 0.12% 3/3
|
|
|
|
export OMP_NUM_THREADS=2
|
|
[ 0] image firefox-fishtank 40.156 40.192 0.07% 3/3
|
|
|
|
export OMP_NUM_THREADS=3
|
|
[ 0] image firefox-fishtank 31.152 31.241 0.21% 3/3
|
|
|
|
export OMP_NUM_THREADS=4
|
|
[ 0] image firefox-fishtank 26.507 26.540 0.15% 3/3
|
|
|
|
=== Radeon HD 6770 (xf86-video-ati-6.14.4, Mesa 8.1-devel (git-6e7756d)) ====
|
|
|
|
[ 0] xlib firefox-fishtank 34.135 34.156 0.23% 3/3
|
|
[ 0] gl firefox-fishtank 5.671 5.755 0.89% 3/3
|
|
---
|
|
pixman/Makefile.am | 2 +-
|
|
pixman/pixman-inlines.h | 24 +++++++++++++++---------
|
|
2 files changed, 16 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
|
|
index 1b232ad..d098169 100644
|
|
--- a/pixman/Makefile.am
|
|
+++ b/pixman/Makefile.am
|
|
@@ -52,7 +52,7 @@ if USE_SSE2
|
|
noinst_LTLIBRARIES += libpixman-sse2.la
|
|
libpixman_sse2_la_SOURCES = \
|
|
pixman-sse2.c
|
|
-libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
|
|
+libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) $(OPENMP_CFLAGS)
|
|
libpixman_sse2_la_LIBADD = $(DEP_LIBS)
|
|
libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS)
|
|
libpixman_1_la_LIBADD += libpixman-sse2.la
|
|
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
|
|
index 3532867..7ba0d09 100644
|
|
--- a/pixman/pixman-inlines.h
|
|
+++ b/pixman/pixman-inlines.h
|
|
@@ -765,6 +765,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
|
|
* range and can fit into unsigned byte or be used with 8-bit SIMD
|
|
* multiplication instructions.
|
|
*/
|
|
+
|
|
+#define OMP_BILINEAR_PARALLEL_FOR _Pragma("omp parallel for default(none) \
|
|
+ firstprivate(height,dst_line,dst_stride,unit_y,unit_x,src_first_line, \
|
|
+ src_stride,max_vx,right_pad,left_pad,left_tz,right_tz,src_width, \
|
|
+ src_width_fixed,src_image,need_src_extension,mask_line, \
|
|
+ mask_stride,v,vy,width) \
|
|
+ private(vx,y1,y2,mask) schedule(static) if(height > 1)")
|
|
+
|
|
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
|
|
dst_type_t, repeat_mode, flags) \
|
|
static void \
|
|
@@ -782,7 +790,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
|
|
pixman_fixed_t unit_x, unit_y; \
|
|
int32_t left_pad, left_tz, right_tz, right_pad; \
|
|
\
|
|
- dst_type_t *dst; \
|
|
+ int i; \
|
|
mask_type_t solid_mask; \
|
|
const mask_type_t *mask = &solid_mask; \
|
|
int src_stride, mask_stride, dst_stride; \
|
|
@@ -864,20 +872,19 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
|
|
src_width_fixed = pixman_int_to_fixed (src_width); \
|
|
} \
|
|
\
|
|
- while (--height >= 0) \
|
|
+ OMP_BILINEAR_PARALLEL_FOR \
|
|
+ for (i = 0; i < height; i++) \
|
|
{ \
|
|
int weight1, weight2; \
|
|
- dst = dst_line; \
|
|
- dst_line += dst_stride; \
|
|
+ dst_type_t *dst = dst_line + (uintptr_t)dst_stride * i; \
|
|
vx = v.vector[0]; \
|
|
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
|
|
{ \
|
|
- mask = mask_line; \
|
|
- mask_line += mask_stride; \
|
|
+ mask = mask_line + (uintptr_t)mask_stride * i; \
|
|
} \
|
|
\
|
|
- y1 = pixman_fixed_to_int (vy); \
|
|
- weight2 = (vy >> 8) & 0xff; \
|
|
+ y1 = pixman_fixed_to_int (vy + unit_y * i); \
|
|
+ weight2 = ((vy + unit_y * i) >> 8) & 0xff; \
|
|
if (weight2) \
|
|
{ \
|
|
/* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
|
|
@@ -890,7 +897,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
|
|
y2 = y1; \
|
|
weight1 = weight2 = 128; \
|
|
} \
|
|
- vy += unit_y; \
|
|
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
|
|
{ \
|
|
src_type_t *src1, *src2; \
|
|
--
|
|
1.7.4
|
|
|