From 1488b4c4efcf51c212fb4d1553964e4bf52e6aa5 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Mon, 25 Jun 2012 02:00:27 +0300 Subject: [PATCH] Use OpenMP for bilinear scaled fast paths Does it actually make sense? I remember somebody was strongly opposing the idea of spawning threads in pixman in the past, but can't find this e-mail right now. Even if using multithreaded rendering is acceptable, the next question is whether to rely on OpenMP for it. Currently OpenMP is disabled in Android toolchain by default: https://groups.google.com/forum/#!topic/android-ndk/pUfqxURgNbQ Clang/LLVM does not support OpenMP either. Some benchmarks with cairo-perf-trace (gcc 4.7.1, CFLAGS="-O2 -fopenmp"): === Core i7 860 @2.8GHz === before patch: [ 0] image firefox-fishtank 66.912 66.931 0.13% 3/3 export OMP_NUM_THREADS=1 [ 0] image firefox-fishtank 67.285 67.393 0.12% 3/3 export OMP_NUM_THREADS=2 [ 0] image firefox-fishtank 40.156 40.192 0.07% 3/3 export OMP_NUM_THREADS=3 [ 0] image firefox-fishtank 31.152 31.241 0.21% 3/3 export OMP_NUM_THREADS=4 [ 0] image firefox-fishtank 26.507 26.540 0.15% 3/3 === Radeon HD 6770 (xf86-video-ati-6.14.4, Mesa 8.1-devel (git-6e7756d)) ==== [ 0] xlib firefox-fishtank 34.135 34.156 0.23% 3/3 [ 0] gl firefox-fishtank 5.671 5.755 0.89% 3/3 --- pixman/Makefile.am | 2 +- pixman/pixman-inlines.h | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 1b232ad..d098169 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -52,7 +52,7 @@ if USE_SSE2 noinst_LTLIBRARIES += libpixman-sse2.la libpixman_sse2_la_SOURCES = \ pixman-sse2.c -libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) +libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) $(OPENMP_CFLAGS) libpixman_sse2_la_LIBADD = $(DEP_LIBS) libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS) libpixman_1_la_LIBADD += libpixman-sse2.la diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h index 3532867..7ba0d09 100644 --- a/pixman/pixman-inlines.h +++ b/pixman/pixman-inlines.h @@ -765,6 +765,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, * range and can fit into unsigned byte or be used with 8-bit SIMD * multiplication instructions. */ + +#define OMP_BILINEAR_PARALLEL_FOR _Pragma("omp parallel for default(none) \ + firstprivate(height,dst_line,dst_stride,unit_y,unit_x,src_first_line, \ + src_stride,max_vx,right_pad,left_pad,left_tz,right_tz,src_width, \ + src_width_fixed,src_image,need_src_extension,mask_line, \ + mask_stride,v,vy,width) \ + private(vx,y1,y2,mask) schedule(static) if(height > 1)") + #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ dst_type_t, repeat_mode, flags) \ static void \ @@ -782,7 +790,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, pixman_fixed_t unit_x, unit_y; \ int32_t left_pad, left_tz, right_tz, right_pad; \ \ - dst_type_t *dst; \ + int i; \ mask_type_t solid_mask; \ const mask_type_t *mask = &solid_mask; \ int src_stride, mask_stride, dst_stride; \ @@ -864,20 +872,19 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, src_width_fixed = pixman_int_to_fixed (src_width); \ } \ \ - while (--height >= 0) \ + OMP_BILINEAR_PARALLEL_FOR \ + for (i = 0; i < height; i++) \ { \ int weight1, weight2; \ - dst = dst_line; \ - dst_line += dst_stride; \ + dst_type_t *dst = dst_line + (uintptr_t)dst_stride * i; \ vx = v.vector[0]; \ if (flags & FLAG_HAVE_NON_SOLID_MASK) \ { \ - mask = mask_line; \ - mask_line += mask_stride; \ + mask = mask_line + (uintptr_t)mask_stride * i; \ } \ \ - y1 = pixman_fixed_to_int (vy); \ - weight2 = (vy >> 8) & 0xff; \ + y1 = pixman_fixed_to_int (vy + unit_y * i); \ + weight2 = ((vy + unit_y * i) >> 8) & 0xff; \ if (weight2) \ { \ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ @@ -890,7 +897,6 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, y2 = y1; \ weight1 = weight2 = 128; \ } \ - vy += unit_y; \ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ { \ src_type_t *src1, *src2; \ -- 1.7.4