pixman 0.17.8: add and make default for angstrom
authorKoen Kooi <koen@openembedded.org>
Thu, 25 Feb 2010 09:17:18 +0000 (10:17 +0100)
committerKoen Kooi <koen@openembedded.org>
Thu, 25 Feb 2010 09:20:22 +0000 (10:20 +0100)
recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/1-composite.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/2-composite.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.17.8/3-composite.patch [new file with mode: 0644]
recipes/xorg-lib/pixman_0.17.8.bb [new file with mode: 0644]
recipes/xorg-lib/pixman_git.bb

diff --git a/recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.17.8/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
new file mode 100644 (file)
index 0000000..25ce7ee
--- /dev/null
@@ -0,0 +1,114 @@
+From c29c9fa826b7112156fd6150b5f1564227935c05 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:27:33 +0300
+Subject: [PATCH 3/6] Generic C implementation of pixman_blt with overlapping support
+
+Uses memcpy/memmove functions to copy pixels, can handle the
+case when both source and destination areas are in the same
+image (this is useful for scrolling).
+
+It is assumed that copying direction is only important when
+using the same image for both source and destination (and
+src_stride == dst_stride). Copying direction is undefined
+for the images with different source and destination stride
+which happen to be in the overlapped areas (but this is an
+unrealistic case anyway).
+---
+ pixman/pixman-general.c |   21 ++++++++++++++++++---
+ pixman/pixman-private.h |   43 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 61 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index c96a3f9..d71a299 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -300,9 +300,24 @@ general_blt (pixman_implementation_t *imp,
+              int                      width,
+              int                      height)
+ {
+-    /* We can't blit unless we have sse2 or mmx */
+-
+-    return FALSE;
++    uint8_t *dst_bytes = (uint8_t *)dst_bits;
++    uint8_t *src_bytes = (uint8_t *)src_bits;
++    int bpp;
++
++    if (src_bpp != dst_bpp || src_bpp & 7)
++      return FALSE;
++
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
++                       dst_bytes + dst_y * dst_stride + dst_x * bpp,
++                       src_stride,
++                       dst_stride,
++                       width,
++                       height);
++    return TRUE;
+ }
+ static pixman_bool_t
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index 5000f91..8c5d4fd 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -10,6 +10,7 @@
+ #include "pixman.h"
+ #include <time.h>
++#include <string.h>
+ #include <assert.h>
+ #include "pixman-compiler.h"
+@@ -794,4 +795,46 @@ void pixman_timer_register (pixman_timer_t *timer);
+ #endif /* PIXMAN_TIMERS */
++/* a helper function, can blit 8-bit images with src/dst overlapping support */
++static inline void
++pixman_blt_helper (uint8_t *src_bytes,
++                   uint8_t *dst_bytes,
++                   int      src_stride,
++                   int      dst_stride,
++                   int      width,
++                   int      height)
++{
++    /*
++     * The second part of this check is not strictly needed, but it prevents
++     * unnecessary upside-down processing of areas which belong to different
++     * images. Upside-down processing can be slower with fixed-distance-ahead
++     * prefetch and perceived as having more tearing.
++     */
++    if (src_bytes < dst_bytes + width &&
++      src_bytes + src_stride * height > dst_bytes)
++    {
++      src_bytes += src_stride * height - src_stride;
++      dst_bytes += dst_stride * height - dst_stride;
++      dst_stride = -dst_stride;
++      src_stride = -src_stride;
++      /* Horizontal scrolling to the left needs memmove */
++      if (src_bytes + width > dst_bytes)
++      {
++          while (--height >= 0)
++          {
++              memmove (dst_bytes, src_bytes, width);
++              dst_bytes += dst_stride;
++              src_bytes += src_stride;
++          }
++          return;
++      }
++    }
++    while (--height >= 0)
++    {
++      memcpy (dst_bytes, src_bytes, width);
++      dst_bytes += dst_stride;
++      src_bytes += src_stride;
++    }
++}
++
+ #endif /* PIXMAN_PRIVATE_H */
+-- 
+1.6.2.4
+
diff --git a/recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.17.8/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
new file mode 100644 (file)
index 0000000..74c7b45
--- /dev/null
@@ -0,0 +1,91 @@
+From 7ca32542c957ff308a6ca7e3715e6552a65ae395 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:47 +0300
+Subject: [PATCH 4/6] Support of overlapping src/dst for pixman_blt_mmx
+
+---
+ pixman/pixman-mmx.c |   55 +++++++++++++++++++++++++++++---------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
+index 819e3a0..dcccadb 100644
+--- a/pixman/pixman-mmx.c
++++ b/pixman/pixman-mmx.c
+@@ -3002,34 +3002,43 @@ pixman_blt_mmx (uint32_t *src_bits,
+ {
+     uint8_t *   src_bytes;
+     uint8_t *   dst_bytes;
+-    int byte_width;
++    int         bpp;
+-    if (src_bpp != dst_bpp)
++    if (src_bpp != dst_bpp || src_bpp & 7)
+       return FALSE;
+-    if (src_bpp == 16)
+-    {
+-      src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+-      dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+-      src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+-      dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-      byte_width = 2 * width;
+-      src_stride *= 2;
+-      dst_stride *= 2;
+-    }
+-    else if (src_bpp == 32)
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
+     {
+-      src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+-      dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+-      src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+-      dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-      byte_width = 4 * width;
+-      src_stride *= 4;
+-      dst_stride *= 4;
++      pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++                         width, height);
++      return TRUE;
+     }
+-    else
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+     {
+-      return FALSE;
++      src_bytes += src_stride * height - src_stride;
++      dst_bytes += dst_stride * height - dst_stride;
++      dst_stride = -dst_stride;
++      src_stride = -src_stride;
++
++      if (src_bytes + width > dst_bytes)
++      {
++          /* TODO: reverse scanline copy using MMX */
++          while (--height >= 0)
++          {
++              memmove (dst_bytes, src_bytes, width);
++              dst_bytes += dst_stride;
++              src_bytes += src_stride;
++          }
++          return TRUE;
++      }
+     }
+     while (height--)
+@@ -3039,7 +3048,7 @@ pixman_blt_mmx (uint32_t *src_bits,
+       uint8_t *d = dst_bytes;
+       src_bytes += src_stride;
+       dst_bytes += dst_stride;
+-      w = byte_width;
++      w = width;
+       while (w >= 2 && ((unsigned long)d & 3))
+       {
+-- 
+1.6.2.4
+
diff --git a/recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.17.8/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
new file mode 100644 (file)
index 0000000..3704fbf
--- /dev/null
@@ -0,0 +1,91 @@
+From edc80b41c6480b7c80ec5f7c835c92b2debb3774 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:54 +0300
+Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2
+
+---
+ pixman/pixman-sse2.c |   55 +++++++++++++++++++++++++++++--------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 78b0ad1..b84636b 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5300,34 +5300,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
+ {
+     uint8_t *   src_bytes;
+     uint8_t *   dst_bytes;
+-    int byte_width;
++    int         bpp;
+-    if (src_bpp != dst_bpp)
++    if (src_bpp != dst_bpp || src_bpp & 7)
+       return FALSE;
+-    if (src_bpp == 16)
+-    {
+-      src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+-      dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+-      src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+-      dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-      byte_width = 2 * width;
+-      src_stride *= 2;
+-      dst_stride *= 2;
+-    }
+-    else if (src_bpp == 32)
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
+     {
+-      src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+-      dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+-      src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+-      dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-      byte_width = 4 * width;
+-      src_stride *= 4;
+-      dst_stride *= 4;
++      pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++                         width, height);
++      return TRUE;
+     }
+-    else
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+     {
+-      return FALSE;
++      src_bytes += src_stride * height - src_stride;
++      dst_bytes += dst_stride * height - dst_stride;
++      dst_stride = -dst_stride;
++      src_stride = -src_stride;
++
++      if (src_bytes + width > dst_bytes)
++      {
++          /* TODO: reverse scanline copy using SSE2 */
++          while (--height >= 0)
++          {
++              memmove (dst_bytes, src_bytes, width);
++              dst_bytes += dst_stride;
++              src_bytes += src_stride;
++          }
++          return TRUE;
++      }
+     }
+     cache_prefetch ((__m128i*)src_bytes);
+@@ -5340,7 +5349,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
+       uint8_t *d = dst_bytes;
+       src_bytes += src_stride;
+       dst_bytes += dst_stride;
+-      w = byte_width;
++      w = width;
+       cache_prefetch_next ((__m128i*)s);
+       cache_prefetch_next ((__m128i*)d);
+-- 
+1.6.2.4
+
diff --git a/recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.17.8/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
new file mode 100644 (file)
index 0000000..7c22483
--- /dev/null
@@ -0,0 +1,94 @@
+From 86870ff530b5e435034bd80207e5758466d96cff Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 18 Nov 2009 06:08:48 +0200
+Subject: [PATCH 6/6] Support of overlapping src/dst for pixman_blt_neon
+
+---
+ pixman/pixman-arm-neon.c |   63 ++++++++++++++++++++++++++++++++++++++-------
+ 1 files changed, 53 insertions(+), 10 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 495fda4..c632ff5 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -357,26 +357,66 @@
+                  int       width,
+                  int       height)
+ {
+-    if (src_bpp != dst_bpp)
++    uint8_t *   src_bytes;
++    uint8_t *   dst_bytes;
++    int         bpp;
++
++    if (src_bpp != dst_bpp || src_bpp & 7)
+       return FALSE;
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
++    {
++      pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++                         width, height);
++      return TRUE;
++    }
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
++    {
++      src_bytes += src_stride * height - src_stride;
++      dst_bytes += dst_stride * height - dst_stride;
++      dst_stride = -dst_stride;
++      src_stride = -src_stride;
++
++      if (src_bytes + width > dst_bytes)
++      {
++          /* TODO: reverse scanline copy using NEON */
++          while (--height >= 0)
++          {
++              memmove (dst_bytes, src_bytes, width);
++              dst_bytes += dst_stride;
++              src_bytes += src_stride;
++          }
++          return TRUE;
++      }
++    }
++
+     switch (src_bpp)
+     {
+     case 16:
+       pixman_composite_src_0565_0565_asm_neon (
+-              width, height,
+-              (uint16_t *)(((char *) dst_bits) +
+-              dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+-              (uint16_t *)(((char *) src_bits) +
+-              src_y * src_stride * 4 + src_x * 2), src_stride * 2);
++              width >> 1,
++              height,
++              (uint16_t *) dst_bytes,
++              dst_stride >> 1,
++              (uint16_t *) src_bytes,
++              src_stride >> 1);
+       return TRUE;
+     case 32:
+       pixman_composite_src_8888_8888_asm_neon (
+-              width, height,
+-              (uint32_t *)(((char *) dst_bits) +
+-              dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+-              (uint32_t *)(((char *) src_bits) +
+-              src_y * src_stride * 4 + src_x * 4), src_stride);
++              width >> 2,
++              height,
++              (uint32_t *) dst_bytes,
++              dst_stride >> 2,
++              (uint32_t *) src_bytes,
++              src_stride >> 2);
+       return TRUE;
+     default:
+       return FALSE;
+-- 
+1.6.2.4
+
diff --git a/recipes/xorg-lib/pixman-0.17.8/1-composite.patch b/recipes/xorg-lib/pixman-0.17.8/1-composite.patch
new file mode 100644 (file)
index 0000000..761a2b9
--- /dev/null
@@ -0,0 +1,161 @@
+From 138d38f68bb2f955ca209f7412002a983a32a2fd Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 23 Feb 2010 23:44:00 +0000
+Subject: ARM: added 'neon_composite_over_n_8888_8888_ca' fast path
+
+This fast path function improves performance of 'firefox-talos-gfx'
+cairo-perf trace.
+
+Benchmark from ARM Cortex-A8 @720MHz
+
+before:
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image            firefox-talos-gfx  139.969  141.176   0.35%    6/6
+
+after:
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image            firefox-talos-gfx  111.810  112.196   0.23%    6/6
+---
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2986884..2db4da8 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1026,6 +1026,113 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
++    /*
++     * 'combine_mask_ca' replacement
++     *
++     * input:  solid src (n) in {d8,  d9,  d10, d11}
++     *         dest in          {d4,  d5,  d6,  d7 }
++     *         mask in          {d24, d25, d26, d27}
++     * output: updated src in   {d0,  d1,  d2,  d3 }
++     *         updated mask in  {d24, d25, d26, d27}
++     */
++    vmull.u8    q0,  d24, d8
++    vmull.u8    q1,  d25, d9
++    vmull.u8    q6,  d26, d10
++    vmull.u8    q7,  d27, d11
++    vmull.u8    q9,  d11, d24
++    vmull.u8    q12, d11, d25
++    vmull.u8    q13, d11, d26
++    vrshr.u16   q10, q0,  #8
++    vrshr.u16   q11, q1,  #8
++    vrshr.u16   q8,  q6,  #8
++    vraddhn.u16 d0,  q0,  q10
++    vraddhn.u16 d1,  q1,  q11
++    vraddhn.u16 d2,  q6,  q8
++    vrshr.u16   q10, q7,  #8
++    vrshr.u16   q11, q9,  #8
++    vrshr.u16   q8,  q12, #8
++    vraddhn.u16 d3,  q7,  q10
++    vrshr.u16   q10, q13, #8
++    vraddhn.u16 d25, q12, q8
++    vrshr.u16   q8,  q7,  #8
++    vraddhn.u16 d24, q9,  q11
++    vraddhn.u16 d26, q13, q10
++    vraddhn.u16 d27, q7,  q8
++    /*
++     * 'combine_over_ca' replacement
++     *
++     * output: updated dest in {d28, d29, d30, d31}
++     */
++    vmvn.8      d24, d24
++    vmvn.8      d25, d25
++    vmull.u8    q8,  d24, d4
++    vmull.u8    q9,  d25, d5
++    vmvn.8      d26, d26
++    vmvn.8      d27, d27
++    vmull.u8    q10, d26, d6
++    vmull.u8    q11, d27, d7
++.endm
++
++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
++    /* ... continue 'combine_over_ca' replacement */
++    vrshr.u16   q14, q8,  #8
++    vrshr.u16   q15, q9,  #8
++    vrshr.u16   q6,  q10, #8
++    vrshr.u16   q7,  q11, #8
++    vraddhn.u16 d28, q14, q8
++    vraddhn.u16 d29, q15, q9
++    vraddhn.u16 d30, q6,  q10
++    vraddhn.u16 d31, q7,  q11
++    vqadd.u8    q14, q0,  q14
++    vqadd.u8    q15, q1,  q15
++.endm
++
++.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
++        vrshr.u16   q14, q8, #8
++        vrshr.u16   q15, q9, #8
++    vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
++        vrshr.u16   q6, q10, #8
++        vrshr.u16   q7, q11, #8
++        vraddhn.u16 d28, q14, q8
++        vraddhn.u16 d29, q15, q9
++        vraddhn.u16 d30, q6, q10
++        vraddhn.u16 d31, q7, q11
++    vld4.8      {d24, d25, d26, d27}, [MASK]!
++        vqadd.u8    q14, q0, q14
++        vqadd.u8    q15, q1, q15
++    cache_preload 8, 8
++    pixman_composite_over_n_8888_8888_ca_process_pixblock_head
++    vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
++.endm
++
++.macro pixman_composite_over_n_8888_8888_ca_init
++    add         DUMMY, sp, #ARGS_STACK_OFFSET
++    vpush       {d8-d15}
++    vld1.32     {d11[0]}, [DUMMY]
++    vdup.8      d8, d11[0]
++    vdup.8      d9, d11[1]
++    vdup.8      d10, d11[2]
++    vdup.8      d11, d11[3]
++.endm
++
++.macro pixman_composite_over_n_8888_8888_ca_cleanup
++    vpop        {d8-d15}
++.endm
++
++generate_composite_function \
++    pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_over_n_8888_8888_ca_init, \
++    pixman_composite_over_n_8888_8888_ca_cleanup, \
++    pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
++    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
++    pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
++
++/******************************************************************************/
++
+ .macro pixman_composite_add_n_8_8_process_pixblock_head
+     /* expecting source data in {d8, d9, d10, d11} */
+     /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 557301e..00b5c35 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -269,6 +269,7 @@ BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
+ BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
+ BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
++BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1)
+ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
+ BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
+@@ -412,6 +413,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
+     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
++    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
+--
+cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman-0.17.8/2-composite.patch b/recipes/xorg-lib/pixman-0.17.8/2-composite.patch
new file mode 100644 (file)
index 0000000..96c87e2
--- /dev/null
@@ -0,0 +1,100 @@
+From fa7f7b05fbb08b23678cf0d8928f1511e5a20ecc Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 24 Feb 2010 00:26:57 +0000
+Subject: ARM: added 'neon_composite_src_x888_8888' fast path
+
+This fast path function improves performance of 'gnome-system-monitor'
+cairo-perf trace.
+
+Benchmark from ARM Cortex-A8 @720MHz
+
+before:
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image         gnome-system-monitor   68.838   68.899   0.05%    5/6
+
+after:
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image         gnome-system-monitor   53.336   53.384   0.09%    6/6
+---
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2db4da8..42ac1cb 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -947,6 +947,44 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_src_x888_8888_process_pixblock_head
++    vorr     q0, q0, q2
++    vorr     q1, q1, q2
++.endm
++
++.macro pixman_composite_src_x888_8888_process_pixblock_tail
++.endm
++
++.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
++    vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
++    vld1.32 {d0, d1, d2, d3}, [SRC]!
++    vorr     q0, q0, q2
++    vorr     q1, q1, q2
++    cache_preload 8, 8
++.endm
++
++.macro pixman_composite_src_x888_8888_init
++    vmov.u8  q2, #0xFF
++    vshl.u32 q2, q2, #24
++.endm
++
++generate_composite_function \
++    pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
++    FLAG_DST_WRITEONLY, \
++    8, /* number of pixels, processed in a single block */ \
++    10, /* prefetch distance */ \
++    pixman_composite_src_x888_8888_init, \
++    default_cleanup, \
++    pixman_composite_src_x888_8888_process_pixblock_head, \
++    pixman_composite_src_x888_8888_process_pixblock_tail, \
++    pixman_composite_src_x888_8888_process_pixblock_tail_head, \
++    0, /* dst_w_basereg */ \
++    0, /* dst_r_basereg */ \
++    0, /* src_basereg   */ \
++    0  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_over_n_8_8888_process_pixblock_head
+     /* expecting deinterleaved source data in {d8, d9, d10, d11} */
+     /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 00b5c35..12d92a2 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -251,6 +251,7 @@ neon_composite_##name (pixman_implementation_t *imp,                    \
+ BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
++BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1)
+ BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
+ BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
+ BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
+@@ -400,6 +401,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
+     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
+     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
++    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
++    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
+     PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
+     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
+     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
+@@ -428,6 +431,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
+     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+--
+cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman-0.17.8/3-composite.patch b/recipes/xorg-lib/pixman-0.17.8/3-composite.patch
new file mode 100644 (file)
index 0000000..81f3ff1
--- /dev/null
@@ -0,0 +1,105 @@
+From 80b75405277bacc0df0ef7d91f1a2eabefb97901 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 24 Feb 2010 02:14:45 +0000
+Subject: ARM: added 'neon_composite_over_reverse_n_8888' fast path
+
+This fast path function improves performance of 'poppler' cairo-perf trace.
+
+Benchmark from ARM Cortex-A8 @720MHz
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image                      poppler   38.986   39.158   0.23%    6/6
+
+after:
+
+[ # ]  backend                         test   min(s) median(s) stddev. count
+[  0]    image                      poppler   24.981   25.136   0.28%    6/6
+---
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 42ac1cb..272da27 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -680,6 +680,61 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
++        vrshr.u16   q14, q8, #8
++                                    PF add PF_X, PF_X, #8
++                                    PF tst PF_CTL, #0xF
++        vrshr.u16   q15, q9, #8
++        vrshr.u16   q12, q10, #8
++        vrshr.u16   q13, q11, #8
++                                    PF addne PF_X, PF_X, #8
++                                    PF subne PF_CTL, PF_CTL, #1
++        vraddhn.u16 d28, q14, q8
++        vraddhn.u16 d29, q15, q9
++                                    PF cmp PF_X, ORIG_W
++        vraddhn.u16 d30, q12, q10
++        vraddhn.u16 d31, q13, q11
++        vqadd.u8    q14, q0, q14
++        vqadd.u8    q15, q1, q15
++    vld4.8      {d0, d1, d2, d3}, [DST_R, :128]!
++    vmvn.8      d22, d3
++                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
++        vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
++                                    PF subge PF_X, PF_X, ORIG_W
++    vmull.u8    q8, d22, d4
++                                    PF subges PF_CTL, PF_CTL, #0x10
++    vmull.u8    q9, d22, d5
++    vmull.u8    q10, d22, d6
++                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
++    vmull.u8    q11, d22, d7
++.endm
++
++.macro pixman_composite_over_reverse_n_8888_init
++    add         DUMMY, sp, #ARGS_STACK_OFFSET
++    vld1.32     {d7[0]}, [DUMMY]
++    vdup.8      d4, d7[0]
++    vdup.8      d5, d7[1]
++    vdup.8      d6, d7[2]
++    vdup.8      d7, d7[3]
++.endm
++
++generate_composite_function \
++    pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
++    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    5, /* prefetch distance */ \
++    pixman_composite_over_reverse_n_8888_init, \
++    default_cleanup, \
++    pixman_composite_over_8888_8888_process_pixblock_head, \
++    pixman_composite_over_8888_8888_process_pixblock_tail, \
++    pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
++    28, /* dst_w_basereg */ \
++    0,  /* dst_r_basereg */ \
++    4,  /* src_basereg   */ \
++    24  /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_over_n_8_0565_process_pixblock_head
+     /* in */
+     vmull.u8    q0, d24, d8
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 12d92a2..417ce5a 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -264,6 +264,7 @@ BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
+ BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
+ BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
++BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1)
+ BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
+ BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
+@@ -438,6 +439,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8000_8000),
+     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
+     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
++    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+     { PIXMAN_OP_NONE },
+ };
+--
+cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman_0.17.8.bb b/recipes/xorg-lib/pixman_0.17.8.bb
new file mode 100644 (file)
index 0000000..db07089
--- /dev/null
@@ -0,0 +1,33 @@
+SECTION = "libs"
+PRIORITY = "optional"
+DESCRIPTION = "Low-level pixel manipulation library."
+LICENSE = "X11"
+
+DEFAULT_PREFERENCE = "-1"
+DEFAULT_PREFERENCE_angstrom = "2"
+
+BBCLASSEXTEND="native"
+
+SRC_URI = "http://cairographics.org/releases/pixman-${PV}.tar.gz;name=archive \
+           file://0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch;patch=1 \
+           file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch;patch=1 \
+           file://0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch;patch=1 \
+           file://0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch;patch=1 \
+           file://1-composite.patch;patch=1 \
+           file://2-composite.patch;patch=1 \          
+           file://3-composite.patch;patch=1 \          
+          "
+
+SRC_URI[archive.md5sum] = "a7deb2ff6b286b676d67aa6ae91317ae"
+SRC_URI[archive.sha256sum] = "ea24e9003455a0881bd43bf7e4169f2b34c90c8521405103e3490553876a81b4"
+
+
+inherit autotools_stage
+
+NEON = " --disable-arm-neon "
+NEON_armv7a = ""
+
+EXTRA_OECONF = "${NEON} --disable-gtk"
+
+AUTOTOOLS_STAGE_PKGCONFIG = "1"
+
index 06547d3..199c2fb 100644 (file)
@@ -12,7 +12,6 @@ BBCLASSEXTEND="native"
 SRCREV = "c97b1e803fc214e9880eaeff98410c8fa37f9ddc"
 
 DEFAULT_PREFERENCE = "-1"
-DEFAULT_PREFERENCE_angstrom = "1"
 DEFAULT_PREFERENCE_shr = "1"
 
 SRC_URI = "git://anongit.freedesktop.org/pixman;protocol=git \