pixman 0.21.2: updates
authorKoen Kooi <koen@openembedded.org>
Sat, 4 Dec 2010 12:38:13 +0000 (13:38 +0100)
committerKoen Kooi <koen@openembedded.org>
Sat, 4 Dec 2010 12:38:13 +0000 (13:38 +0100)
* add 12 more NEON patch
* add 1 C fast path
* 1 autofoo fix
* one copyright fix
* rediff existing patches

Signed-off-by: Koen Kooi <koen@openembedded.org>
24 files changed:
recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.21.2/0017-add-_pixman_bits_override_accessors.patch [moved from recipes/xorg-lib/pixman-0.21.2/0000-Add-pixman_bits_override_accessors.patch with 94% similarity]
recipes/xorg-lib/pixman-0.21.2/0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch [moved from recipes/xorg-lib/pixman-0.21.2/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch with 95% similarity]
recipes/xorg-lib/pixman-0.21.2/0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch [moved from recipes/xorg-lib/pixman-0.21.2/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch with 94% similarity]
recipes/xorg-lib/pixman-0.21.2/0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch [moved from recipes/xorg-lib/pixman-0.21.2/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch with 94% similarity]
recipes/xorg-lib/pixman-0.21.2/0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch [moved from recipes/xorg-lib/pixman-0.21.2/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch with 91% similarity]
recipes/xorg-lib/pixman-0.21.2/0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch [moved from recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch with 92% similarity]
recipes/xorg-lib/pixman-0.21.2/0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch [moved from recipes/xorg-lib/pixman-0.21.2/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch with 93% similarity]
recipes/xorg-lib/pixman-0.21.2/0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch [moved from recipes/xorg-lib/pixman-0.21.2/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch with 86% similarity]
recipes/xorg-lib/pixman_0.21.2.bb

diff --git a/recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch b/recipes/xorg-lib/pixman-0.21.2/0002-Fix-argument-quoting-for-AC_INIT.patch
new file mode 100644 (file)
index 0000000..ebf6eaf
--- /dev/null
@@ -0,0 +1,35 @@
+From e7ee43c39d2370716a4d011afa8f5067eced9899 Mon Sep 17 00:00:00 2001
+From: Cyril Brulebois <kibi@debian.org>
+Date: Wed, 17 Nov 2010 16:16:56 +0100
+Subject: [PATCH 02/24] Fix argument quoting for AC_INIT.
+
+One gets rid of this accordingly:
+| autoreconf -vfi
+| autoreconf: Entering directory `.'
+| autoreconf: configure.ac: not using Gettext
+| autoreconf: running: aclocal --force
+| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
+| autoreconf: configure.ac: tracing
+| configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
+
+Signed-off-by: Cyril Brulebois <kibi@debian.org>
+---
+ configure.ac |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index db1da21..147e1bf 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -58,7 +58,7 @@ m4_define([pixman_micro], 3)
+ m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
+-AC_INIT(pixman, pixman_version, "pixman@lists.freedesktop.org", pixman)
++AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman)
+ AM_INIT_AUTOMAKE([foreign dist-bzip2])
+ # Suppress verbose compile lines
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch b/recipes/xorg-lib/pixman-0.21.2/0003-Sun-s-copyrights-belong-to-Oracle-now.patch
new file mode 100644 (file)
index 0000000..e48a2b3
--- /dev/null
@@ -0,0 +1,39 @@
+From 654961efe405ad1a7e54a77548ca8af322ecc1f8 Mon Sep 17 00:00:00 2001
+From: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date: Sun, 21 Nov 2010 11:42:22 -0800
+Subject: [PATCH 03/24] Sun's copyrights belong to Oracle now
+
+Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+---
+ COPYING                      |    2 +-
+ pixman/solaris-hwcap.mapfile |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/COPYING b/COPYING
+index 3092a34..15f9517 100644
+--- a/COPYING
++++ b/COPYING
+@@ -18,7 +18,7 @@ possible. They may also add themselves to the list below.
+  * Copyright 2008 AndrĂ© Tupinambá
+  * Copyright 2008 Mozilla Corporation
+  * Copyright 2008 Frederic Plourde
+- * Copyright 2009 Sun Microsystems, Inc.
++ * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
+  *
+  * Permission is hereby granted, free of charge, to any person obtaining a
+  * copy of this software and associated documentation files (the "Software"),
+diff --git a/pixman/solaris-hwcap.mapfile b/pixman/solaris-hwcap.mapfile
+index 3605ca7..87efce1 100644
+--- a/pixman/solaris-hwcap.mapfile
++++ b/pixman/solaris-hwcap.mapfile
+@@ -1,6 +1,6 @@
+ ###############################################################################
+ #
+-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
++# Copyright 2009, Oracle and/or its affiliates. All rights reserved.
+ #
+ # Permission is hereby granted, free of charge, to any person obtaining a
+ # copy of this software and associated documentation files (the "Software"),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch b/recipes/xorg-lib/pixman-0.21.2/0004-C-fast-path-for-a1-fill-operation.patch
new file mode 100644 (file)
index 0000000..75eaac7
--- /dev/null
@@ -0,0 +1,159 @@
+From 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 15 Nov 2010 18:26:43 +0200
+Subject: [PATCH 04/24] C fast path for a1 fill operation
+
+Can be used as one of the solutions to fix bug
+https://bugs.freedesktop.org/show_bug.cgi?id=31604
+---
+ pixman/pixman-fast-path.c |   87 ++++++++++++++++++++++++++++++++++++++++++++-
+ pixman/pixman.c           |    7 +++-
+ 2 files changed, 91 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
+index 5d5fa95..37dfbae 100644
+--- a/pixman/pixman-fast-path.c
++++ b/pixman/pixman-fast-path.c
+@@ -1334,7 +1334,11 @@ fast_composite_solid_fill (pixman_implementation_t *imp,
+     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+-    if (dst_image->bits.format == PIXMAN_a8)
++    if (dst_image->bits.format == PIXMAN_a1)
++    {
++      src = src >> 31;
++    }
++    else if (dst_image->bits.format == PIXMAN_a8)
+     {
+       src = src >> 24;
+     }
+@@ -1655,6 +1659,7 @@ static const pixman_fast_path_t c_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
+     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
+     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
++    PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
+     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
+     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
+@@ -1733,6 +1738,82 @@ static const pixman_fast_path_t c_fast_paths[] =
+     {   PIXMAN_OP_NONE        },
+ };
++#ifdef WORDS_BIGENDIAN
++#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
++#else
++#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
++#endif
++
++static force_inline void
++pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
++{
++    if (offs)
++    {
++      int leading_pixels = 32 - offs;
++      if (leading_pixels >= width)
++      {
++          if (v)
++              *dst |= A1_FILL_MASK (width, offs);
++          else
++              *dst &= ~A1_FILL_MASK (width, offs);
++          return;
++      }
++      else
++      {
++          if (v)
++              *dst++ |= A1_FILL_MASK (leading_pixels, offs);
++          else
++              *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
++          width -= leading_pixels;
++      }
++    }
++    while (width >= 32)
++    {
++      if (v)
++          *dst++ = 0xFFFFFFFF;
++      else
++          *dst++ = 0;
++      width -= 32;
++    }
++    if (width > 0)
++    {
++      if (v)
++          *dst |= A1_FILL_MASK (width, 0);
++      else
++          *dst &= ~A1_FILL_MASK (width, 0);
++    }
++}
++
++static void
++pixman_fill1 (uint32_t *bits,
++              int       stride,
++              int       x,
++              int       y,
++              int       width,
++              int       height,
++              uint32_t  xor)
++{
++    uint32_t *dst = bits + y * stride + (x >> 5);
++    int offs = x & 31;
++
++    if (xor & 1)
++    {
++      while (height--)
++      {
++          pixman_fill1_line (dst, offs, width, 1);
++          dst += stride;
++      }
++    }
++    else
++    {
++      while (height--)
++      {
++          pixman_fill1_line (dst, offs, width, 0);
++          dst += stride;
++      }
++    }
++}
++
+ static void
+ pixman_fill8 (uint32_t *bits,
+               int       stride,
+@@ -1819,6 +1900,10 @@ fast_path_fill (pixman_implementation_t *imp,
+ {
+     switch (bpp)
+     {
++    case 1:
++      pixman_fill1 (bits, stride, x, y, width, height, xor);
++      break;
++
+     case 8:
+       pixman_fill8 (bits, stride, x, y, width, height, xor);
+       break;
+diff --git a/pixman/pixman.c b/pixman/pixman.c
+index 045c556..ec565f9 100644
+--- a/pixman/pixman.c
++++ b/pixman/pixman.c
+@@ -875,7 +875,8 @@ color_to_pixel (pixman_color_t *     color,
+           format == PIXMAN_b8g8r8x8     ||
+           format == PIXMAN_r5g6b5       ||
+           format == PIXMAN_b5g6r5       ||
+-          format == PIXMAN_a8))
++          format == PIXMAN_a8           ||
++          format == PIXMAN_a1))
+     {
+       return FALSE;
+     }
+@@ -895,7 +896,9 @@ color_to_pixel (pixman_color_t *     color,
+           ((c & 0x000000ff) << 24);
+     }
+-    if (format == PIXMAN_a8)
++    if (format == PIXMAN_a1)
++      c = c >> 31;
++    else if (format == PIXMAN_a8)
+       c = c >> 24;
+     else if (format == PIXMAN_r5g6b5 ||
+              format == PIXMAN_b5g6r5)
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch
new file mode 100644 (file)
index 0000000..a7a9b11
--- /dev/null
@@ -0,0 +1,113 @@
+From 98d08b37f17a3379d0ceff8bb7de8f943873fbd8 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 26 Nov 2010 08:55:49 +0200
+Subject: [PATCH 05/24] ARM: added 'neon_composite_over_n_8_8' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   68 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    3 ++
+ 2 files changed, 71 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 91ec27d..a3875ee 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1203,6 +1203,74 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_over_n_8_8_process_pixblock_head
++    vmull.u8    q0,  d24, d8
++    vmull.u8    q1,  d25, d8
++    vmull.u8    q6,  d26, d8
++    vmull.u8    q7,  d27, d8
++    vrshr.u16   q10, q0,  #8
++    vrshr.u16   q11, q1,  #8
++    vrshr.u16   q12, q6,  #8
++    vrshr.u16   q13, q7,  #8
++    vraddhn.u16 d0,  q0,  q10
++    vraddhn.u16 d1,  q1,  q11
++    vraddhn.u16 d2,  q6,  q12
++    vraddhn.u16 d3,  q7,  q13
++    vmvn.8      q12, q0
++    vmvn.8      q13, q1
++    vmull.u8    q8,  d24, d4
++    vmull.u8    q9,  d25, d5
++    vmull.u8    q10, d26, d6
++    vmull.u8    q11, d27, d7
++.endm
++
++.macro pixman_composite_over_n_8_8_process_pixblock_tail
++    vrshr.u16   q14, q8,  #8
++    vrshr.u16   q15, q9,  #8
++    vrshr.u16   q12, q10, #8
++    vrshr.u16   q13, q11, #8
++    vraddhn.u16 d28, q14, q8
++    vraddhn.u16 d29, q15, q9
++    vraddhn.u16 d30, q12, q10
++    vraddhn.u16 d31, q13, q11
++    vqadd.u8    q14, q0,  q14
++    vqadd.u8    q15, q1,  q15
++.endm
++
++/* TODO: expand macros and do better instructions scheduling */
++.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
++    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
++    pixman_composite_over_n_8_8_process_pixblock_tail
++    vld1.8      {d24, d25, d26, d27}, [MASK]!
++    cache_preload 32, 32
++    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
++    pixman_composite_over_n_8_8_process_pixblock_head
++.endm
++
++.macro pixman_composite_over_n_8_8_init
++    add         DUMMY, sp, #ARGS_STACK_OFFSET
++    vpush       {d8-d15}
++    vld1.32     {d8[0]}, [DUMMY]
++    vdup.8      d8, d8[3]
++.endm
++
++.macro pixman_composite_over_n_8_8_cleanup
++    vpop        {d8-d15}
++.endm
++
++generate_composite_function \
++    pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
++    FLAG_DST_READWRITE, \
++    32, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_over_n_8_8_init, \
++    pixman_composite_over_n_8_8_cleanup, \
++    pixman_composite_over_n_8_8_process_pixblock_head, \
++    pixman_composite_over_n_8_8_process_pixblock_tail, \
++    pixman_composite_over_n_8_8_process_pixblock_tail_head
++
++/******************************************************************************/
++
+ .macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+     /*
+      * 'combine_mask_ca' replacement
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 2f82069..72ef75e 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -76,6 +76,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+                                       uint32_t, 1, uint32_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
++                                      uint8_t, 1, uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
+@@ -235,6 +237,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
+     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
+     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
++    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
+     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
+     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
+     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch b/recipes/xorg-lib/pixman-0.21.2/0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch
new file mode 100644 (file)
index 0000000..71a41a7
--- /dev/null
@@ -0,0 +1,157 @@
+From 3be86a92ccab240859062a541cdb871d81c9501a Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 28 Nov 2010 21:45:06 +0200
+Subject: [PATCH 06/24] ARM: introduced 'fetch_mask_pixblock' macro to simplify code
+
+This macro hides the implementation details of pixels fetching
+for the mask image just like 'fetch_src_pixblock' does for the
+source image. This provides more possibilities for reusing the
+same code blocks in different compositing functions.
+
+This patch does not introduce any functional changes and the
+resulting code in the compiled object file is exactly the same.
+---
+ pixman/pixman-arm-neon-asm.S |   26 +++++++++++++-------------
+ pixman/pixman-arm-neon-asm.h |    5 +++++
+ 2 files changed, 18 insertions(+), 13 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index a3875ee..155a236 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -841,7 +841,7 @@ generate_composite_function \
+     pixman_composite_over_n_8_0565_process_pixblock_tail
+     vst1.16     {d28, d29}, [DST_W, :128]!
+     vld1.16     {d4, d5}, [DST_R, :128]!
+-    vld1.8      {d24}, [MASK]!
++    fetch_mask_pixblock
+     cache_preload 8, 8
+     pixman_composite_over_n_8_0565_process_pixblock_head
+ .endm
+@@ -889,7 +889,7 @@ generate_composite_function \
+     pixman_composite_over_n_8_0565_process_pixblock_tail
+     fetch_src_pixblock
+     cache_preload 8, 8
+-    vld1.8      {d24}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_over_n_8_0565_process_pixblock_head
+     vst1.16     {d28, d29}, [DST_W, :128]!
+ .endm
+@@ -1171,7 +1171,7 @@ generate_composite_function \
+     pixman_composite_over_n_8_8888_process_pixblock_tail
+     vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
+-    vld1.8      {d24}, [MASK]!
++    fetch_mask_pixblock
+     cache_preload 8, 8
+     pixman_composite_over_n_8_8888_process_pixblock_head
+ .endm
+@@ -1241,7 +1241,7 @@ generate_composite_function \
+ .macro pixman_composite_over_n_8_8_process_pixblock_tail_head
+     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+     pixman_composite_over_n_8_8_process_pixblock_tail
+-    vld1.8      {d24, d25, d26, d27}, [MASK]!
++    fetch_mask_pixblock
+     cache_preload 32, 32
+     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+     pixman_composite_over_n_8_8_process_pixblock_head
+@@ -1341,7 +1341,7 @@ generate_composite_function \
+         vraddhn.u16 d29, q15, q9
+         vraddhn.u16 d30, q6, q10
+         vraddhn.u16 d31, q7, q11
+-    vld4.8      {d24, d25, d26, d27}, [MASK]!
++    fetch_mask_pixblock
+         vqadd.u8    q14, q0, q14
+         vqadd.u8    q15, q1, q15
+     cache_preload 8, 8
+@@ -1405,7 +1405,7 @@ generate_composite_function \
+     pixman_composite_add_n_8_8_process_pixblock_tail
+     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+-    vld1.8      {d24, d25, d26, d27}, [MASK]!
++    fetch_mask_pixblock
+     cache_preload 32, 32
+     pixman_composite_add_n_8_8_process_pixblock_head
+ .endm
+@@ -1462,7 +1462,7 @@ generate_composite_function \
+     pixman_composite_add_8_8_8_process_pixblock_tail
+     vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
+     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
+-    vld1.8      {d24, d25, d26, d27}, [MASK]!
++    fetch_mask_pixblock
+     fetch_src_pixblock
+     cache_preload 32, 32
+     pixman_composite_add_8_8_8_process_pixblock_head
+@@ -1515,7 +1515,7 @@ generate_composite_function \
+     pixman_composite_add_8888_8888_8888_process_pixblock_tail
+     vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
+-    vld4.8      {d24, d25, d26, d27}, [MASK]!
++    fetch_mask_pixblock
+     fetch_src_pixblock
+     cache_preload 8, 8
+     pixman_composite_add_8888_8888_8888_process_pixblock_head
+@@ -1587,7 +1587,7 @@ generate_composite_function_single_scanline \
+     pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
+     fetch_src_pixblock
+     cache_preload 8, 8
+-    vld4.8     {d12, d13, d14, d15}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+ .endm
+@@ -1658,7 +1658,7 @@ generate_composite_function \
+     pixman_composite_over_8888_n_8888_process_pixblock_tail
+     fetch_src_pixblock
+     cache_preload 8, 8
+-    vld4.8     {d12, d13, d14, d15}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_over_8888_n_8888_process_pixblock_head
+     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+ .endm
+@@ -1700,7 +1700,7 @@ generate_composite_function_single_scanline \
+     pixman_composite_over_8888_n_8888_process_pixblock_tail
+     fetch_src_pixblock
+     cache_preload 8, 8
+-    vld1.8     {d15}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_over_8888_n_8888_process_pixblock_head
+     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
+ .endm
+@@ -1917,7 +1917,7 @@ generate_composite_function \
+ /* TODO: expand macros and do better instructions scheduling */
+ .macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
+-    vld1.8     {d15}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_over_0565_8_0565_process_pixblock_tail
+     fetch_src_pixblock
+     vld1.16    {d10, d11}, [DST_R, :128]!
+@@ -1969,7 +1969,7 @@ generate_composite_function \
+ /* TODO: expand macros and do better instructions scheduling */
+ .macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
+-    vld1.8     {d15}, [MASK]!
++    fetch_mask_pixblock
+     pixman_composite_add_0565_8_0565_process_pixblock_tail
+     fetch_src_pixblock
+     vld1.16    {d10, d11}, [DST_R, :128]!
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index c75bdc3..24fa361 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -431,6 +431,11 @@
+ .endif
+ .endm
++.macro fetch_mask_pixblock
++    pixld       pixblock_size, mask_bpp, \
++                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
++.endm
++
+ /*
+  * Macro which is used to process leading pixels until destination
+  * pointer is properly aligned (at 16 bytes boundary). When destination
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch b/recipes/xorg-lib/pixman-0.21.2/0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch
new file mode 100644 (file)
index 0000000..acdfdf8
--- /dev/null
@@ -0,0 +1,170 @@
+From e6814837a6ccd3e4db329e0131eaf2055d2c864b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 26 Nov 2010 17:06:58 +0200
+Subject: [PATCH 07/24] ARM: better NEON instructions scheduling for over_n_8_0565
+
+Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9.
+Now it is ~30% faster for the pixel data in L1 cache and makes better use
+of memory bandwidth when running at lower clock frequencies (ex. 500MHz).
+Also register d24 (pixels from the mask image) is now not clobbered by
+supplementary macros, which allows to reuse them for the other variants
+of compositing operations later.
+
+Benchmark from ARM Cortex-A8 @500MHz:
+
+== before ==
+
+    over_n_8_0565 =  L1:  63.90  L2:  63.15  M: 60.97 ( 73.53%)
+                     HT:  28.89  VT:  24.14  R: 21.33  RT:  6.78 (  67Kops/s)
+
+== after ==
+
+    over_n_8_0565 =  L1:  82.64  L2:  75.19  M: 71.52 ( 84.14%)
+                     HT:  30.49  VT:  25.56  R: 22.36  RT:  6.89 (  68Kops/s)
+---
+ pixman/pixman-arm-neon-asm.S |  120 +++++++++++++++++++++++++++---------------
+ 1 files changed, 77 insertions(+), 43 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 155a236..ffffc1c 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -792,58 +792,92 @@ generate_composite_function \
+ /******************************************************************************/
+ .macro pixman_composite_over_n_8_0565_process_pixblock_head
+-    /* in */
+-    vmull.u8    q0, d24, d8
+-    vmull.u8    q1, d24, d9
+-    vmull.u8    q6, d24, d10
+-    vmull.u8    q7, d24, d11
+-    vrshr.u16   q10, q0, #8
+-    vrshr.u16   q11, q1, #8
+-    vrshr.u16   q12, q6, #8
+-    vrshr.u16   q13, q7, #8
+-    vraddhn.u16 d0, q0, q10
+-    vraddhn.u16 d1, q1, q11
+-    vraddhn.u16 d2, q6, q12
+-    vraddhn.u16 d3, q7, q13
+-
+-    vshrn.u16   d6, q2, #8
+-    vshrn.u16   d7, q2, #3
+-    vsli.u16    q2, q2, #5
+-    vsri.u8     d6, d6, #5
+-    vmvn.8      d3, d3
+-    vsri.u8     d7, d7, #6
+-    vshrn.u16   d30, q2, #2
+-    /* now do alpha blending */
+-    vmull.u8    q10, d3, d6
+-    vmull.u8    q11, d3, d7
+-    vmull.u8    q12, d3, d30
+-    vrshr.u16   q13, q10, #8
+-    vrshr.u16   q3, q11, #8
+-    vrshr.u16   q15, q12, #8
+-    vraddhn.u16 d20, q10, q13
+-    vraddhn.u16 d23, q11, q3
+-    vraddhn.u16 d22, q12, q15
++    vmull.u8    q0,  d24, d8    /* IN for SRC pixels (part1) */
++    vmull.u8    q1,  d24, d9
++    vmull.u8    q6,  d24, d10
++    vmull.u8    q7,  d24, d11
++        vshrn.u16   d6,  q2, #8 /* convert DST_R data to 32-bpp (part1) */
++        vshrn.u16   d7,  q2, #3
++        vsli.u16    q2,  q2, #5
++    vrshr.u16   q8,  q0,  #8    /* IN for SRC pixels (part2) */
++    vrshr.u16   q9,  q1,  #8
++    vrshr.u16   q10, q6,  #8
++    vrshr.u16   q11, q7,  #8
++    vraddhn.u16 d0,  q0,  q8
++    vraddhn.u16 d1,  q1,  q9
++    vraddhn.u16 d2,  q6,  q10
++    vraddhn.u16 d3,  q7,  q11
++        vsri.u8     d6,  d6, #5 /* convert DST_R data to 32-bpp (part2) */
++        vsri.u8     d7,  d7, #6
++    vmvn.8      d3,  d3
++        vshrn.u16   d30, q2, #2
++    vmull.u8    q8,  d3, d6     /* now do alpha blending */
++    vmull.u8    q9,  d3, d7
++    vmull.u8    q10, d3, d30
+ .endm
+ .macro pixman_composite_over_n_8_0565_process_pixblock_tail
+-    vqadd.u8    d16, d2, d20
+-    vqadd.u8    q9, q0, q11
+-    /* convert to r5g6b5 */
+-    vshll.u8    q14, d16, #8
+-    vshll.u8    q8, d19, #8
+-    vshll.u8    q9, d18, #8
+-    vsri.u16    q14, q8, #5
+-    vsri.u16    q14, q9, #11
++    /* 3 cycle bubble (after vmull.u8) */
++    vrshr.u16   q13, q8,  #8
++    vrshr.u16   q11, q9,  #8
++    vrshr.u16   q15, q10, #8
++    vraddhn.u16 d16, q8,  q13
++    vraddhn.u16 d27, q9,  q11
++    vraddhn.u16 d26, q10, q15
++    vqadd.u8    d16, d2,  d16
++    /* 1 cycle bubble */
++    vqadd.u8    q9,  q0,  q13
++    vshll.u8    q14, d16, #8    /* convert to 16bpp */
++    vshll.u8    q8,  d19, #8
++    vshll.u8    q9,  d18, #8
++    vsri.u16    q14, q8,  #5
++    /* 1 cycle bubble */
++    vsri.u16    q14, q9,  #11
+ .endm
+-/* TODO: expand macros and do better instructions scheduling */
+ .macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
+-    pixman_composite_over_n_8_0565_process_pixblock_tail
+-    vst1.16     {d28, d29}, [DST_W, :128]!
+     vld1.16     {d4, d5}, [DST_R, :128]!
++    vshrn.u16   d6,  q2,  #8
+     fetch_mask_pixblock
++    vshrn.u16   d7,  q2,  #3
++    fetch_src_pixblock
++    vmull.u8    q6,  d24, d10
++        vrshr.u16   q13, q8,  #8
++        vrshr.u16   q11, q9,  #8
++        vrshr.u16   q15, q10, #8
++        vraddhn.u16 d16, q8,  q13
++        vraddhn.u16 d27, q9,  q11
++        vraddhn.u16 d26, q10, q15
++        vqadd.u8    d16, d2,  d16
++    vmull.u8    q1,  d24, d9
++        vqadd.u8    q9,  q0,  q13
++        vshll.u8    q14, d16, #8
++    vmull.u8    q0,  d24, d8
++        vshll.u8    q8,  d19, #8
++        vshll.u8    q9,  d18, #8
++        vsri.u16    q14, q8,  #5
++    vmull.u8    q7,  d24, d11
++        vsri.u16    q14, q9,  #11
++
+     cache_preload 8, 8
+-    pixman_composite_over_n_8_0565_process_pixblock_head
++
++    vsli.u16    q2,  q2,  #5
++    vrshr.u16   q8,  q0,  #8
++    vrshr.u16   q9,  q1,  #8
++    vrshr.u16   q10, q6,  #8
++    vrshr.u16   q11, q7,  #8
++    vraddhn.u16 d0,  q0,  q8
++    vraddhn.u16 d1,  q1,  q9
++    vraddhn.u16 d2,  q6,  q10
++    vraddhn.u16 d3,  q7,  q11
++    vsri.u8     d6,  d6,  #5
++    vsri.u8     d7,  d7,  #6
++    vmvn.8      d3,  d3
++    vshrn.u16   d30, q2,  #2
++    vst1.16     {d28, d29}, [DST_W, :128]!
++    vmull.u8    q8,  d3,  d6
++    vmull.u8    q9,  d3,  d7
++    vmull.u8    q10, d3,  d30
+ .endm
+ /*
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch
new file mode 100644 (file)
index 0000000..4c5bf8d
--- /dev/null
@@ -0,0 +1,74 @@
+From a7c36681c0c1955ff9110b81f1789e56abb10a95 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sat, 27 Nov 2010 03:53:12 +0200
+Subject: [PATCH 08/24] ARM: added 'neon_composite_over_8888_n_0565' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   28 ++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 32 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index ffffc1c..3e52a49 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -917,6 +917,34 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_over_8888_n_0565_init
++    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
++    vpush       {d8-d15}
++    vld1.32     {d24[0]}, [DUMMY]
++    vdup.8      d24, d24[3]
++.endm
++
++.macro pixman_composite_over_8888_n_0565_cleanup
++    vpop        {d8-d15}
++.endm
++
++generate_composite_function \
++    pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_over_8888_n_0565_init, \
++    pixman_composite_over_8888_n_0565_cleanup, \
++    pixman_composite_over_n_8_0565_process_pixblock_head, \
++    pixman_composite_over_n_8_0565_process_pixblock_tail, \
++    pixman_composite_over_n_8_0565_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    8,  /* src_basereg   */ \
++    24  /* mask_basereg  */
++
++/******************************************************************************/
++
+ /* TODO: expand macros and do better instructions scheduling */
+ .macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+     vld1.16     {d4, d5}, [DST_R, :128]!
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 72ef75e..8156bbb 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -83,6 +83,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
++                                     uint32_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
+@@ -253,6 +255,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch b/recipes/xorg-lib/pixman-0.21.2/0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch
new file mode 100644 (file)
index 0000000..b45671e
--- /dev/null
@@ -0,0 +1,139 @@
+From 3990931bf6197eff1cec06cf24bce53ddf9a539a Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sat, 27 Nov 2010 04:47:39 +0200
+Subject: [PATCH 09/24] ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565
+
+Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565',
+because they can actually support all variants of this operation:
+over_8888_8_0565/over_n_8_0565/over_8888_n_0565.
+
+Also 'over_8888_8_0565' now uses more optimized common code instead of its
+own variant, improving performance a bit. Even though this operation is
+still memory bandwidth limited, scaled variants of these fast paths may
+put more stress on CPU later.
+
+Benchmarked on ARM Cortex-A8 @500MHz:
+
+== before ==
+
+    over_8888_8_0565 =  L1:  67.10  L2:  53.82  M: 44.70 (105.17%)
+                        HT:  18.73  VT:  16.91  R: 14.25  RT:  4.80 (52Kops/s)
+
+== after ==
+
+    over_8888_8_0565 =  L1:  77.83  L2:  58.14  M: 44.82 (105.52%)
+                        HT:  20.58  VT:  17.44  R: 15.05  RT:  4.88 (52Kops/s)
+---
+ pixman/pixman-arm-neon-asm.S |   61 +++++++++++++++++------------------------
+ 1 files changed, 25 insertions(+), 36 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 3e52a49..4175144 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -791,7 +791,7 @@ generate_composite_function \
+ /******************************************************************************/
+-.macro pixman_composite_over_n_8_0565_process_pixblock_head
++.macro pixman_composite_over_8888_8_0565_process_pixblock_head
+     vmull.u8    q0,  d24, d8    /* IN for SRC pixels (part1) */
+     vmull.u8    q1,  d24, d9
+     vmull.u8    q6,  d24, d10
+@@ -816,7 +816,7 @@ generate_composite_function \
+     vmull.u8    q10, d3, d30
+ .endm
+-.macro pixman_composite_over_n_8_0565_process_pixblock_tail
++.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
+     /* 3 cycle bubble (after vmull.u8) */
+     vrshr.u16   q13, q8,  #8
+     vrshr.u16   q11, q9,  #8
+@@ -835,7 +835,7 @@ generate_composite_function \
+     vsri.u16    q14, q9,  #11
+ .endm
+-.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
++.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+     vld1.16     {d4, d5}, [DST_R, :128]!
+     vshrn.u16   d6,  q2,  #8
+     fetch_mask_pixblock
+@@ -880,6 +880,23 @@ generate_composite_function \
+     vmull.u8    q10, d3,  d30
+ .endm
++generate_composite_function \
++    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    default_init_need_all_regs, \
++    default_cleanup_need_all_regs, \
++    pixman_composite_over_8888_8_0565_process_pixblock_head, \
++    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
++    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    8,  /* src_basereg   */ \
++    24  /* mask_basereg  */
++
++/******************************************************************************/
++
+ /*
+  * This function needs a special initialization of solid mask.
+  * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
+@@ -911,9 +928,9 @@ generate_composite_function \
+     5, /* prefetch distance */ \
+     pixman_composite_over_n_8_0565_init, \
+     pixman_composite_over_n_8_0565_cleanup, \
+-    pixman_composite_over_n_8_0565_process_pixblock_head, \
+-    pixman_composite_over_n_8_0565_process_pixblock_tail, \
+-    pixman_composite_over_n_8_0565_process_pixblock_tail_head
++    pixman_composite_over_8888_8_0565_process_pixblock_head, \
++    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
++    pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+ /******************************************************************************/
+@@ -935,36 +952,8 @@ generate_composite_function \
+     5, /* prefetch distance */ \
+     pixman_composite_over_8888_n_0565_init, \
+     pixman_composite_over_8888_n_0565_cleanup, \
+-    pixman_composite_over_n_8_0565_process_pixblock_head, \
+-    pixman_composite_over_n_8_0565_process_pixblock_tail, \
+-    pixman_composite_over_n_8_0565_process_pixblock_tail_head, \
+-    28, /* dst_w_basereg */ \
+-    4,  /* dst_r_basereg */ \
+-    8,  /* src_basereg   */ \
+-    24  /* mask_basereg  */
+-
+-/******************************************************************************/
+-
+-/* TODO: expand macros and do better instructions scheduling */
+-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+-    vld1.16     {d4, d5}, [DST_R, :128]!
+-    pixman_composite_over_n_8_0565_process_pixblock_tail
+-    fetch_src_pixblock
+-    cache_preload 8, 8
+-    fetch_mask_pixblock
+-    pixman_composite_over_n_8_0565_process_pixblock_head
+-    vst1.16     {d28, d29}, [DST_W, :128]!
+-.endm
+-
+-generate_composite_function \
+-    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
+-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+-    8, /* number of pixels, processed in a single block */ \
+-    5, /* prefetch distance */ \
+-    default_init_need_all_regs, \
+-    default_cleanup_need_all_regs, \
+-    pixman_composite_over_n_8_0565_process_pixblock_head, \
+-    pixman_composite_over_n_8_0565_process_pixblock_tail, \
++    pixman_composite_over_8888_8_0565_process_pixblock_head, \
++    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+     pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+     28, /* dst_w_basereg */ \
+     4,  /* dst_r_basereg */ \
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch
new file mode 100644 (file)
index 0000000..376631a
--- /dev/null
@@ -0,0 +1,74 @@
+From 6d2f7f981b52b41f4321071c325babcf792bd666 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sat, 27 Nov 2010 15:53:54 +0200
+Subject: [PATCH 10/24] ARM: added 'neon_composite_over_0565_n_0565' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   28 ++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 32 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 4175144..81c0a34 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1994,6 +1994,34 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_over_0565_n_0565_init
++    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
++    vpush       {d8-d15}
++    vld1.32     {d15[0]}, [DUMMY]
++    vdup.8      d15, d15[3]
++.endm
++
++.macro pixman_composite_over_0565_n_0565_cleanup
++    vpop        {d8-d15}
++.endm
++
++generate_composite_function \
++    pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
++    FLAG_DST_READWRITE, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_over_0565_n_0565_init, \
++    pixman_composite_over_0565_n_0565_cleanup, \
++    pixman_composite_over_0565_8_0565_process_pixblock_head, \
++    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
++    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    10, /* dst_r_basereg */ \
++    8,  /* src_basereg   */ \
++    15  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_add_0565_8_0565_process_pixblock_head
+     /* mask is in d15 */
+     convert_0565_to_x888 q4, d2, d1, d0
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 8156bbb..b01c3e0 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -85,6 +85,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
+                                      uint32_t, 1, uint16_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
++                                     uint16_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
+@@ -257,6 +259,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
+     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
++    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch
new file mode 100644 (file)
index 0000000..19f429b
--- /dev/null
@@ -0,0 +1,63 @@
+From c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 28 Nov 2010 22:05:53 +0200
+Subject: [PATCH 11/24] ARM: added 'neon_composite_add_8888_8_8888' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   17 +++++++++++++++++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 21 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 81c0a34..11ef166 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1595,6 +1595,23 @@ generate_composite_function_single_scanline \
+ /******************************************************************************/
++generate_composite_function \
++    pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    default_init, \
++    default_cleanup, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    27  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+     /* expecting source data in {d0, d1, d2, d3} */
+     /* destination data in {d4, d5, d6, d7} */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index b01c3e0..eaf9787 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
+                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
++                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+@@ -282,6 +284,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
+     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
++    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
++    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch b/recipes/xorg-lib/pixman-0.21.2/0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch
new file mode 100644 (file)
index 0000000..28dd8b6
--- /dev/null
@@ -0,0 +1,105 @@
+From 1fba7790367d7b726d05a33bbbcebe10b9280a31 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 29 Nov 2010 02:10:22 +0200
+Subject: [PATCH 12/24] ARM: better NEON instructions scheduling for add_8888_8888_8888
+
+Provides a minor performance improvement by using pipelining and hiding
+instructions latencies. Also do not clobber d0-d3 registers (source
+image pixels) while doing calculations in order to allow the use of
+the same macro for add_n_8_8888 fast path later.
+
+Benchmark from ARM Cortex-A8 @500MHz:
+
+== before ==
+
+  add_8888_8888_8888 = L1:  95.94  L2:  42.27  M: 25.60 (121.09%)
+                       HT:  14.54  VT:  13.13  R: 12.77  RT:  4.49 (48Kops/s)
+     add_8888_8_8888 = L1: 104.51  L2:  57.81  M: 36.06 (106.62%)
+                       HT:  19.24  VT:  16.45  R: 14.71  RT:  4.80 (51Kops/s)
+
+== after ==
+
+  add_8888_8888_8888 = L1: 106.66  L2:  47.82  M: 27.32 (129.30%)
+                       HT:  15.44  VT:  13.96  R: 12.86  RT:  4.48 (48Kops/s)
+     add_8888_8_8888 = L1: 107.72  L2:  61.02  M: 38.26 (113.16%)
+                       HT:  19.48  VT:  16.72  R: 14.82  RT:  4.80 (51Kops/s)
+---
+ pixman/pixman-arm-neon-asm.S |   52 +++++++++++++++++++++++++++--------------
+ 1 files changed, 34 insertions(+), 18 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 11ef166..829ef84 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1542,34 +1542,50 @@ generate_composite_function \
+     /* expecting source data in {d0, d1, d2, d3} */
+     /* destination data in {d4, d5, d6, d7} */
+     /* mask in {d24, d25, d26, d27} */
+-    vmull.u8    q8, d27, d0
+-    vmull.u8    q9, d27, d1
++    vmull.u8    q8,  d27, d0
++    vmull.u8    q9,  d27, d1
+     vmull.u8    q10, d27, d2
+     vmull.u8    q11, d27, d3
+-    vrshr.u16   q0, q8, #8
+-    vrshr.u16   q1, q9, #8
+-    vrshr.u16   q12, q10, #8
+-    vrshr.u16   q13, q11, #8
+-    vraddhn.u16 d0, q0, q8
+-    vraddhn.u16 d1, q1, q9
+-    vraddhn.u16 d2, q12, q10
+-    vraddhn.u16 d3, q13, q11
+-    vqadd.u8    q14, q0, q2
+-    vqadd.u8    q15, q1, q3
++    /* 1 cycle bubble */
++    vrsra.u16   q8,  q8,  #8
++    vrsra.u16   q9,  q9,  #8
++    vrsra.u16   q10, q10, #8
++    vrsra.u16   q11, q11, #8
+ .endm
+ .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
++    /* 2 cycle bubble */
++    vrshrn.u16  d28, q8,  #8
++    vrshrn.u16  d29, q9,  #8
++    vrshrn.u16  d30, q10, #8
++    vrshrn.u16  d31, q11, #8
++    vqadd.u8    q14, q2,  q14
++    /* 1 cycle bubble */
++    vqadd.u8    q15, q3,  q15
+ .endm
+-/* TODO: expand macros and do better instructions scheduling */
+ .macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+-    pixman_composite_add_8888_8888_8888_process_pixblock_tail
+-    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
+-    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
+-    fetch_mask_pixblock
+     fetch_src_pixblock
++        vrshrn.u16  d28, q8,  #8
++    fetch_mask_pixblock
++        vrshrn.u16  d29, q9,  #8
++    vmull.u8    q8,  d27, d0
++        vrshrn.u16  d30, q10, #8
++    vmull.u8    q9,  d27, d1
++        vrshrn.u16  d31, q11, #8
++    vmull.u8    q10, d27, d2
++        vqadd.u8    q14, q2,  q14
++    vmull.u8    q11, d27, d3
++        vqadd.u8    q15, q3,  q15
++    vrsra.u16   q8,  q8,  #8
++    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
++    vrsra.u16   q9,  q9,  #8
++        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
++    vrsra.u16   q10, q10, #8
++
+     cache_preload 8, 8
+-    pixman_composite_add_8888_8888_8888_process_pixblock_head
++
++    vrsra.u16   q11, q11, #8
+ .endm
+ generate_composite_function \
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch
new file mode 100644 (file)
index 0000000..a1da09f
--- /dev/null
@@ -0,0 +1,75 @@
+From b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 29 Nov 2010 02:38:52 +0200
+Subject: [PATCH 13/24] ARM: added 'neon_composite_add_n_8_8888' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   29 +++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 33 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 829ef84..dd6f2c5 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1628,6 +1628,35 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_add_n_8_8888_init
++    add         DUMMY, sp, #ARGS_STACK_OFFSET
++    vld1.32     {d3[0]}, [DUMMY]
++    vdup.8      d0, d3[0]
++    vdup.8      d1, d3[1]
++    vdup.8      d2, d3[2]
++    vdup.8      d3, d3[3]
++.endm
++
++.macro pixman_composite_add_n_8_8888_cleanup
++.endm
++
++generate_composite_function \
++    pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_add_n_8_8888_init, \
++    pixman_composite_add_n_8_8888_cleanup, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    27  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+     /* expecting source data in {d0, d1, d2, d3} */
+     /* destination data in {d4, d5, d6, d7} */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index eaf9787..5ad58bd 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -80,6 +80,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888,
++                                      uint8_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+@@ -281,6 +283,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
+     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
++    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
++    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
+     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch
new file mode 100644 (file)
index 0000000..0caa29d
--- /dev/null
@@ -0,0 +1,72 @@
+From f6843e3797eea7e4aed7614b1086f5cefc06c0f9 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 29 Nov 2010 03:31:32 +0200
+Subject: [PATCH 14/24] ARM: added 'neon_composite_add_8888_n_8888' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   26 ++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 30 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index dd6f2c5..2c0fd37 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1657,6 +1657,32 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_add_8888_n_8888_init
++    add         DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
++    vld1.32     {d27[0]}, [DUMMY]
++    vdup.8      d27, d27[3]
++.endm
++
++.macro pixman_composite_add_8888_n_8888_cleanup
++.endm
++
++generate_composite_function \
++    pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_add_8888_n_8888_init, \
++    pixman_composite_add_8888_n_8888_cleanup, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
++    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    27  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+     /* expecting source data in {d0, d1, d2, d3} */
+     /* destination data in {d4, d5, d6, d7} */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 5ad58bd..f0dc111 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -89,6 +89,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
+                                      uint32_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
+                                      uint16_t, 1, uint16_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888,
++                                     uint32_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
+@@ -291,6 +293,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
++    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
++    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch b/recipes/xorg-lib/pixman-0.21.2/0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch
new file mode 100644 (file)
index 0000000..5f24481
--- /dev/null
@@ -0,0 +1,153 @@
+From af7a69d90ea2b43a4e850870727723d719f09a1c Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 29 Nov 2010 09:00:46 +0200
+Subject: [PATCH 15/24] ARM: added flags parameter to some asm fast path wrapper macros
+
+Not all types of operations can be skipped when having transparent
+solid source or transparent solid mask. Add an extra flags parameter
+for providing this information to the wrappers.
+---
+ pixman/pixman-arm-common.h |   15 +++++++++------
+ pixman/pixman-arm-neon.c   |   26 +++++++++++++-------------
+ pixman/pixman-arm-simd.c   |    4 ++--
+ 3 files changed, 24 insertions(+), 21 deletions(-)
+
+diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
+index 2cff6c8..66f448d 100644
+--- a/pixman/pixman-arm-common.h
++++ b/pixman/pixman-arm-common.h
+@@ -47,6 +47,9 @@
+  * or mask), the corresponding stride argument is unused.
+  */
++#define SKIP_ZERO_SRC  1
++#define SKIP_ZERO_MASK 2
++
+ #define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
+                                           src_type, src_cnt,            \
+                                           dst_type, dst_cnt)            \
+@@ -87,7 +90,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                              src_line, src_stride);     \
+ }
+-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name,                  \
++#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name,           \
+                                         dst_type, dst_cnt)              \
+ void                                                                    \
+ pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+@@ -117,7 +120,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                                                         \
+     src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+                                                                         \
+-    if (src == 0)                                                       \
++    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
+       return;                                                         \
+                                                                         \
+     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+@@ -128,7 +131,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                              src);                      \
+ }
+-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name,             \
++#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name,      \
+                                              mask_type, mask_cnt,       \
+                                              dst_type, dst_cnt)         \
+ void                                                                    \
+@@ -163,7 +166,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                                                         \
+     src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+                                                                         \
+-    if (src == 0)                                                       \
++    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
+       return;                                                         \
+                                                                         \
+     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+@@ -177,7 +180,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                              mask_line, mask_stride);   \
+ }
+-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name,              \
++#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name,       \
+                                             src_type, src_cnt,          \
+                                             dst_type, dst_cnt)          \
+ void                                                                    \
+@@ -211,7 +214,7 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
+                                                                         \
+     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+                                                                         \
+-    if (mask == 0)                                                      \
++    if ((flags & SKIP_ZERO_MASK) && mask == 0)                          \
+       return;                                                         \
+                                                                         \
+     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index f0dc111..1a3741c 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -63,33 +63,33 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
+                                    uint8_t, 1, uint16_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
+                                  uint16_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
+                                  uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
++PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
+                                  uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
+                                       uint8_t, 1, uint16_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
+                                       uint32_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8888,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_0565,
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
+                                      uint32_t, 1, uint16_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_0565_n_0565,
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
+                                      uint16_t, 1, uint16_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, add_8888_n_8888,
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index 3b05007..dc2f471 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -381,10 +381,10 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+                                    uint32_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
++PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch b/recipes/xorg-lib/pixman-0.21.2/0016-ARM-added-neon_composite_in_n_8-fast-path.patch
new file mode 100644 (file)
index 0000000..8a22f54
--- /dev/null
@@ -0,0 +1,97 @@
+From 733f68912f4a44c24ad3973049a7e1d98f4c6ea8 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 29 Nov 2010 09:11:29 +0200
+Subject: [PATCH 16/24] ARM: added 'neon_composite_in_n_8' fast path
+
+---
+ pixman/pixman-arm-neon-asm.S |   52 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |    3 ++
+ 2 files changed, 55 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2c0fd37..cf014fa 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1427,6 +1427,58 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_in_n_8_process_pixblock_head
++    /* expecting source data in {d0, d1, d2, d3} */
++    /* and destination data in {d4, d5, d6, d7} */
++    vmull.u8    q8,  d4,  d3
++    vmull.u8    q9,  d5,  d3
++    vmull.u8    q10, d6,  d3
++    vmull.u8    q11, d7,  d3
++.endm
++
++.macro pixman_composite_in_n_8_process_pixblock_tail
++    vrshr.u16   q14, q8,  #8
++    vrshr.u16   q15, q9,  #8
++    vrshr.u16   q12, q10, #8
++    vrshr.u16   q13, q11, #8
++    vraddhn.u16 d28, q8,  q14
++    vraddhn.u16 d29, q9,  q15
++    vraddhn.u16 d30, q10, q12
++    vraddhn.u16 d31, q11, q13
++.endm
++
++.macro pixman_composite_in_n_8_process_pixblock_tail_head
++    pixman_composite_in_n_8_process_pixblock_tail
++    vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
++    cache_preload 32, 32
++    pixman_composite_in_n_8_process_pixblock_head
++    vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
++.endm
++
++.macro pixman_composite_in_n_8_init
++    add         DUMMY, sp, #ARGS_STACK_OFFSET
++    vld1.32     {d3[0]}, [DUMMY]
++    vdup.8      d3, d3[3]
++.endm
++
++.macro pixman_composite_in_n_8_cleanup
++.endm
++
++generate_composite_function \
++    pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
++    FLAG_DST_READWRITE, \
++    32, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_in_n_8_init, \
++    pixman_composite_in_n_8_cleanup, \
++    pixman_composite_in_n_8_process_pixblock_head, \
++    pixman_composite_in_n_8_process_pixblock_tail, \
++    pixman_composite_in_n_8_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    4,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    24  /* mask_basereg  */
++
+ .macro pixman_composite_add_n_8_8_process_pixblock_head
+     /* expecting source data in {d8, d9, d10, d11} */
+     /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 1a3741c..e3eca2b 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -69,6 +69,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
+                                  uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
+                                  uint32_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
++                                 uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
+                                       uint8_t, 1, uint16_t, 1)
+@@ -298,6 +300,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
+     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
+     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
+-- 
+1.6.6.1
+
@@ -1,7 +1,7 @@
-From fff598814365037c8ffdd97afe10a14bb09558fc Mon Sep 17 00:00:00 2001
+From 6593d86679fde724e49efa96b16ca22d9521b288 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 1/8] add _pixman_bits_override_accessors
+Subject: [PATCH 17/24] add _pixman_bits_override_accessors
 
 * from patch ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline
 * used in
@@ -71,5 +71,5 @@ index 383748a..969dfab 100644
                                          int             x,
                                          int             y,
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From 6b162fb9d4ede5faa25f24188964f31d7667e74e Mon Sep 17 00:00:00 2001
+From 8e8b2809b505486001dc213becab0d50bfd96c1b Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Tue, 16 Mar 2010 16:55:28 +0100
-Subject: [PATCH 1/7] Generic C implementation of pixman_blt with overlapping support
+Subject: [PATCH 18/24] Generic C implementation of pixman_blt with overlapping support
 
 Uses memcpy/memmove functions to copy pixels, can handle the
 case when both source and destination areas are in the same
@@ -110,5 +110,5 @@ index 969dfab..352bceb 100644
 +
  #endif /* PIXMAN_PRIVATE_H */
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From 2d6cc769d233c0b1a391b501e84f5c3b0f1af4f8 Mon Sep 17 00:00:00 2001
+From f5a54f7d5eb1169bc79f0e445e2998e98080ef13 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 22 Oct 2009 05:45:47 +0300
-Subject: [PATCH 2/7] Support of overlapping src/dst for pixman_blt_mmx
+Subject: [PATCH 19/24] Support of overlapping src/dst for pixman_blt_mmx
 
 ---
  pixman/pixman-mmx.c |   55 +++++++++++++++++++++++++++++---------------------
@@ -87,5 +87,5 @@ index 34637a4..f9dd473 100644
        while (w >= 2 && ((unsigned long)d & 3))
        {
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From 532b8f45cee61ea2509a7f263dd30f40f3de29ba Mon Sep 17 00:00:00 2001
+From c8755294fa9ea396f7113370230b17c424a93be1 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 22 Oct 2009 05:45:54 +0300
-Subject: [PATCH 3/7] Support of overlapping src/dst for pixman_blt_sse2
+Subject: [PATCH 20/24] Support of overlapping src/dst for pixman_blt_sse2
 
 ---
  pixman/pixman-sse2.c |   55 +++++++++++++++++++++++++++++--------------------
@@ -87,5 +87,5 @@ index 5907de0..25015ae 100644
        while (w >= 2 && ((unsigned long)d & 3))
        {
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,17 +1,17 @@
-From 4e101b976fa5fc72e44553a15516b804ffda0394 Mon Sep 17 00:00:00 2001
+From 86c8198598ef6d639e656c04644015795cc249aa Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Wed, 18 Nov 2009 06:08:48 +0200
-Subject: [PATCH 4/7] Support of overlapping src/dst for pixman_blt_neon
+Subject: [PATCH 21/24] Support of overlapping src/dst for pixman_blt_neon
 
 ---
  pixman/pixman-arm-neon.c |   62 +++++++++++++++++++++++++++++++++++++--------
  1 files changed, 51 insertions(+), 11 deletions(-)
 
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 2f82069..6a6ed37 100644
+index e3eca2b..74316a8 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -185,26 +185,66 @@ pixman_blt_neon (uint32_t *src_bits,
+@@ -199,26 +199,66 @@ pixman_blt_neon (uint32_t *src_bits,
                   int       width,
                   int       height)
  {
@@ -90,5 +90,5 @@ index 2f82069..6a6ed37 100644
      default:
        return FALSE;
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From 8d7a77b6780af1b96db32026fb8d79c5603f0fba Mon Sep 17 00:00:00 2001
+From 60d972afbae8613d700d3a6b3cb107429d7e11c6 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 5/7] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
+Subject: [PATCH 22/24] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
 
 ---
  pixman/pixman-arm-neon-asm.S |   20 ++++++++++++++++++++
@@ -9,7 +9,7 @@ Subject: [PATCH 5/7] ARM: added NEON optimizations for fetch/store r5g6b5 scanli
  2 files changed, 60 insertions(+), 0 deletions(-)
 
 diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index 91ec27d..b838f92 100644
+index cf014fa..25f7bf0 100644
 --- a/pixman/pixman-arm-neon-asm.S
 +++ b/pixman/pixman-arm-neon-asm.S
 @@ -459,6 +459,16 @@ generate_composite_function \
@@ -47,10 +47,10 @@ index 91ec27d..b838f92 100644
  
  .macro pixman_composite_add_8_8_process_pixblock_head
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 6a6ed37..4b2bbea 100644
+index 74316a8..f773e92 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -422,6 +422,42 @@ BIND_COMBINE_U (over)
+@@ -448,6 +448,42 @@ BIND_COMBINE_U (over)
  BIND_COMBINE_U (add)
  BIND_COMBINE_U (out_reverse)
  
@@ -93,7 +93,7 @@ index 6a6ed37..4b2bbea 100644
  pixman_implementation_t *
  _pixman_implementation_create_arm_neon (void)
  {
-@@ -437,6 +473,10 @@ _pixman_implementation_create_arm_neon (void)
+@@ -463,6 +499,10 @@ _pixman_implementation_create_arm_neon (void)
      imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
      imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
  
@@ -105,5 +105,5 @@ index 6a6ed37..4b2bbea 100644
      imp->fill = arm_neon_fill;
  
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From b689ddce66ce6391b6478d870f00fe21bbce944c Mon Sep 17 00:00:00 2001
+From cc99d8d6fcbabd7f9f3ed99e65c78a2fb71792fa Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 23 Sep 2010 21:10:56 +0300
-Subject: [PATCH 6/7] ARM: added NEON optimizations for fetch/store a8 scanline
+Subject: [PATCH 23/24] ARM: added NEON optimizations for fetch/store a8 scanline
 
 ---
  pixman/pixman-arm-neon-asm.S |   64 ++++++++++++++++++++++++++++++++++++++++++
@@ -9,7 +9,7 @@ Subject: [PATCH 6/7] ARM: added NEON optimizations for fetch/store a8 scanline
  2 files changed, 106 insertions(+), 0 deletions(-)
 
 diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index b838f92..8e43a3b 100644
+index 25f7bf0..439b06b 100644
 --- a/pixman/pixman-arm-neon-asm.S
 +++ b/pixman/pixman-arm-neon-asm.S
 @@ -418,6 +418,70 @@ generate_composite_function \
@@ -84,10 +84,10 @@ index b838f92..8e43a3b 100644
      vshll.u8    q8, d1, #8
      vshll.u8    q14, d2, #8
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 4b2bbea..1c68d32 100644
+index f773e92..55219b3 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -458,6 +458,45 @@ neon_store_scanline_r5g6b5 (bits_image_t *  image,
+@@ -484,6 +484,45 @@ neon_store_scanline_r5g6b5 (bits_image_t *  image,
      pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
  }
  
@@ -133,7 +133,7 @@ index 4b2bbea..1c68d32 100644
  pixman_implementation_t *
  _pixman_implementation_create_arm_neon (void)
  {
-@@ -476,6 +515,9 @@ _pixman_implementation_create_arm_neon (void)
+@@ -502,6 +541,9 @@ _pixman_implementation_create_arm_neon (void)
      _pixman_bits_override_accessors (PIXMAN_r5g6b5,
                                       neon_fetch_scanline_r5g6b5,
                                       neon_store_scanline_r5g6b5);
@@ -144,5 +144,5 @@ index 4b2bbea..1c68d32 100644
      imp->blt = arm_neon_blt;
      imp->fill = arm_neon_fill;
 -- 
-1.7.3.2
+1.6.6.1
 
@@ -1,7 +1,7 @@
-From 912d7b4f79cda5dd828f5db7608314057a39338e Mon Sep 17 00:00:00 2001
+From cf3b8fdc53144ff62c4054996559d3a1a4d62b75 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Fri, 24 Sep 2010 18:22:44 +0300
-Subject: [PATCH 7/7] ARM: added NEON optimizations for fetching x8r8g8b8 scanline
+Subject: [PATCH 24/24] ARM: added NEON optimizations for fetching x8r8g8b8 scanline
 
 ---
  pixman/pixman-arm-neon-asm.S |   14 ++++++++++++++
@@ -9,10 +9,10 @@ Subject: [PATCH 7/7] ARM: added NEON optimizations for fetching x8r8g8b8 scanlin
  2 files changed, 35 insertions(+), 0 deletions(-)
 
 diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index 8e43a3b..5ebee5a 100644
+index 439b06b..3e0dcfe 100644
 --- a/pixman/pixman-arm-neon-asm.S
 +++ b/pixman/pixman-arm-neon-asm.S
-@@ -1206,6 +1206,20 @@ generate_composite_function \
+@@ -1257,6 +1257,20 @@ generate_composite_function \
      0, /* src_basereg   */ \
      0  /* mask_basereg  */
  
@@ -34,10 +34,10 @@ index 8e43a3b..5ebee5a 100644
  
  .macro pixman_composite_over_n_8_8888_process_pixblock_head
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 1c68d32..0bcfc54 100644
+index 55219b3..8cef414 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -496,6 +496,24 @@ neon_store_scanline_a8 (bits_image_t *  image,
+@@ -522,6 +522,24 @@ neon_store_scanline_a8 (bits_image_t *  image,
      pixman_store_scanline_a8_asm_neon (width, pixel, values);
  }
  
@@ -62,7 +62,7 @@ index 1c68d32..0bcfc54 100644
  
  pixman_implementation_t *
  _pixman_implementation_create_arm_neon (void)
-@@ -518,6 +536,9 @@ _pixman_implementation_create_arm_neon (void)
+@@ -544,6 +562,9 @@ _pixman_implementation_create_arm_neon (void)
      _pixman_bits_override_accessors (PIXMAN_a8,
                                       neon_fetch_scanline_a8,
                                       neon_store_scanline_a8);
@@ -73,5 +73,5 @@ index 1c68d32..0bcfc54 100644
      imp->blt = arm_neon_blt;
      imp->fill = arm_neon_fill;
 -- 
-1.7.3.2
+1.6.6.1
 
index 7e361b6..19394d6 100644 (file)
@@ -3,17 +3,32 @@ require pixman.inc
 SRC_URI[archive.md5sum] = "9e09fd6e58cbf9717140891e0b7d4a7a"
 SRC_URI[archive.sha256sum] = "295f51416caf307ff7caf1153ee9b1d86b9f7f02a7876d12db6538d80451c5de"
 
-PR = "${INC_PR}.0"
+PR = "${INC_PR}.1"
 
 SRC_URI += "\
-           file://0000-Add-pixman_bits_override_accessors.patch \
-           file://0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
-           file://0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
-           file://0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
-           file://0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
-           file://0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \
-           file://0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \
-           file://0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \
+           file://0002-Fix-argument-quoting-for-AC_INIT.patch \
+           file://0003-Sun-s-copyrights-belong-to-Oracle-now.patch \
+           file://0004-C-fast-path-for-a1-fill-operation.patch \
+           file://0005-ARM-added-neon_composite_over_n_8_8-fast-path.patch \
+           file://0006-ARM-introduced-fetch_mask_pixblock-macro-to-simplify.patch \
+           file://0007-ARM-better-NEON-instructions-scheduling-for-over_n_8.patch \
+           file://0008-ARM-added-neon_composite_over_8888_n_0565-fast-path.patch \
+           file://0009-ARM-reuse-common-NEON-code-for-over_-n_8-8888_n-8888.patch \
+           file://0010-ARM-added-neon_composite_over_0565_n_0565-fast-path.patch \
+           file://0011-ARM-added-neon_composite_add_8888_8_8888-fast-path.patch \
+           file://0012-ARM-better-NEON-instructions-scheduling-for-add_8888.patch \
+           file://0013-ARM-added-neon_composite_add_n_8_8888-fast-path.patch \
+           file://0014-ARM-added-neon_composite_add_8888_n_8888-fast-path.patch \
+           file://0015-ARM-added-flags-parameter-to-some-asm-fast-path-wrap.patch \
+           file://0016-ARM-added-neon_composite_in_n_8-fast-path.patch \
+           file://0017-add-_pixman_bits_override_accessors.patch \
+           file://0018-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
+           file://0019-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
+           file://0020-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
+           file://0021-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
+           file://0022-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \
+           file://0023-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \
+           file://0024-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \
 "
 
 NEON = " --disable-arm-neon "