pixman: move from 0.19.4 to 0.19.6
authorMartin Jansa <Martin.Jansa@gmail.com>
Mon, 25 Oct 2010 08:00:23 +0000 (10:00 +0200)
committerMartin Jansa <Martin.Jansa@gmail.com>
Tue, 26 Oct 2010 11:33:27 +0000 (13:33 +0200)
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
14 files changed:
recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch [deleted file]
recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch [deleted file]
recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch [deleted file]
recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch [deleted file]
recipes/xorg-lib/pixman-0.19.6/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch [moved from recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch with 93% similarity]
recipes/xorg-lib/pixman-0.19.6/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch [moved from recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch with 94% similarity]
recipes/xorg-lib/pixman-0.19.6/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch [moved from recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch with 85% similarity]
recipes/xorg-lib/pixman-0.19.6/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch [moved from recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch with 91% similarity]
recipes/xorg-lib/pixman-0.19.6/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch [moved from recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch with 62% similarity]
recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch [new file with mode: 0644]
recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch [new file with mode: 0644]
recipes/xorg-lib/pixman_0.19.4.bb [deleted file]
recipes/xorg-lib/pixman_0.19.6.bb [new file with mode: 0644]

diff --git a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch
deleted file mode 100644 (file)
index 178dad9..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-From e3bfd272cf813b8419757a3b59128b3568e5f800 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Mon, 20 Sep 2010 19:07:33 +0300
-Subject: [PATCH 2/9] Don't discriminate PAD and REFLECT repeat in standard fast paths
-
-Without this fix, setting PAD repeat on a source image prevents
-the use of any nonscaled standard fast paths, affecting performance
-a lot. But as long as no pixels outside the source image boundaries
-are touched by the compositing operation, all the repeat types
-behave the same and can take the same fast paths.
-
-This is important because setting PAD repeat instead of NONE is
-more hardware acceleration friendly (for the drivers implementing
-RENDER extension) and does not inhibit OVER->SRC operator
-optimization in pixman.
----
- pixman/pixman-image.c   |    6 ++----
- pixman/pixman-private.h |    2 --
- 2 files changed, 2 insertions(+), 6 deletions(-)
-
-diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
-index 8397f6a..14a2219 100644
---- a/pixman/pixman-image.c
-+++ b/pixman/pixman-image.c
-@@ -363,16 +363,14 @@ compute_image_info (pixman_image_t *image)
-       flags |=
-           FAST_PATH_NO_PAD_REPEAT             |
-           FAST_PATH_NO_NONE_REPEAT            |
--          FAST_PATH_NO_NORMAL_REPEAT          |
--          FAST_PATH_COVERS_CLIP;
-+          FAST_PATH_NO_NORMAL_REPEAT;
-       break;
-     case PIXMAN_REPEAT_PAD:
-       flags |=
-           FAST_PATH_NO_REFLECT_REPEAT         |
-           FAST_PATH_NO_NONE_REPEAT            |
--          FAST_PATH_NO_NORMAL_REPEAT          |
--          FAST_PATH_COVERS_CLIP;
-+          FAST_PATH_NO_NORMAL_REPEAT;
-       break;
-     default:
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index 564f8f0..440ae7a 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -602,8 +602,6 @@ _pixman_choose_implementation (void);
-     (FAST_PATH_ID_TRANSFORM           |                               \
-      FAST_PATH_NO_ALPHA_MAP           |                               \
-      FAST_PATH_NO_CONVOLUTION_FILTER  |                               \
--     FAST_PATH_NO_PAD_REPEAT          |                               \
--     FAST_PATH_NO_REFLECT_REPEAT      |                               \
-      FAST_PATH_NO_ACCESSORS           |                               \
-      FAST_PATH_NARROW_FORMAT          |                               \
-      FAST_PATH_COVERS_CLIP)
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch
deleted file mode 100644 (file)
index d62f12d..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-From c3f1715c4698e90396d02f7b5acc314d99780941 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 23 Sep 2010 22:28:55 +0300
-Subject: [PATCH 7/9] ARM: added 'neon_composite_add_0565_8_0565' fast path
-
-TODO: That's an initial variant, needs performance tuning
----
- pixman/pixman-arm-neon-asm.S |   52 ++++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-arm-neon.c     |    4 +++
- 2 files changed, 56 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index e1a697e..23ddae9 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -1890,3 +1890,55 @@ generate_composite_function \
-     10,  /* dst_r_basereg */ \
-     8,  /* src_basereg   */ \
-     15  /* mask_basereg  */
-+
-+/******************************************************************************/
-+
-+.macro pixman_composite_add_0565_8_0565_process_pixblock_head
-+    /* mask is in d15 */
-+    convert_0565_to_x888 q4, d2, d1, d0
-+    convert_0565_to_x888 q5, d6, d5, d4
-+    /* source pixel data is in      {d0, d1, d2, XX} */
-+    /* destination pixel data is in {d4, d5, d6, XX} */
-+    vmull.u8    q6,  d15, d2
-+    vmull.u8    q5,  d15, d1
-+    vmull.u8    q4,  d15, d0
-+    vrshr.u16   q12, q6,  #8
-+    vrshr.u16   q11, q5,  #8
-+    vrshr.u16   q10, q4,  #8
-+    vraddhn.u16 d2,  q6,  q12
-+    vraddhn.u16 d1,  q5,  q11
-+    vraddhn.u16 d0,  q4,  q10
-+.endm
-+
-+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
-+    vqadd.u8    q0,  q0,  q2
-+    vqadd.u8    q1,  q1,  q3
-+    /* 32bpp result is in {d0, d1, d2, XX} */
-+    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-+.endm
-+
-+/* TODO: expand macros and do better instructions scheduling */
-+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
-+    vld1.8     {d15}, [MASK]!
-+    pixman_composite_add_0565_8_0565_process_pixblock_tail
-+    vld1.16    {d8, d9}, [SRC]!
-+    vld1.16    {d10, d11}, [DST_R, :128]!
-+    cache_preload 8, 8
-+    pixman_composite_add_0565_8_0565_process_pixblock_head
-+    vst1.16    {d28, d29}, [DST_W, :128]!
-+.endm
-+
-+generate_composite_function \
-+    pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
-+    FLAG_DST_READWRITE, \
-+    8, /* number of pixels, processed in a single block */ \
-+    5, /* prefetch distance */ \
-+    default_init_need_all_regs, \
-+    default_cleanup_need_all_regs, \
-+    pixman_composite_add_0565_8_0565_process_pixblock_head, \
-+    pixman_composite_add_0565_8_0565_process_pixblock_tail, \
-+    pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
-+    28, /* dst_w_basereg */ \
-+    10, /* dst_r_basereg */ \
-+    8,  /* src_basereg   */ \
-+    15  /* mask_basereg  */
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index db1c2df..a8be7e4 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -82,6 +82,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
- PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
-                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
-+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
-+                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
- PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
-                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
- PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
-@@ -296,6 +298,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
-     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
-     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
-     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
-+    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
-+    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
-     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
-     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8000_8000),
-     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch b/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch
deleted file mode 100644 (file)
index e720a6a..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-From 89cbe6eb5df2a1c85ba996caea6479e2434d51a5 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 23 Sep 2010 23:09:46 +0300
-Subject: [PATCH 8/9] ARM: added 'neon_composite_out_reverse_0565_8_0565' fast path
-
-TODO: That's an initial variant, needs performance tuning
----
- pixman/pixman-arm-neon-asm.S |   66 ++++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-arm-neon.c     |    4 ++
- 2 files changed, 70 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index 23ddae9..7a599f0 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -1942,3 +1942,69 @@ generate_composite_function \
-     10, /* dst_r_basereg */ \
-     8,  /* src_basereg   */ \
-     15  /* mask_basereg  */
-+
-+/******************************************************************************/
-+
-+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_head
-+    /* mask is in d15 */
-+    convert_0565_to_8888 q4, d3, d2, d1, d0
-+    convert_0565_to_x888 q5, d6, d5, d4
-+    /* source pixel data is in      {d0, d1, d2, d3} */
-+    /* destination pixel data is in {d4, d5, d6, xx} */
-+
-+    /* 'in' */
-+    vmull.u8    q8, d15, d3
-+    vmull.u8    q6, d15, d2
-+    vmull.u8    q5, d15, d1
-+    vmull.u8    q4, d15, d0
-+    vrshr.u16   q13, q8, #8
-+    vrshr.u16   q12, q6, #8
-+    vrshr.u16   q11, q5, #8
-+    vrshr.u16   q10, q4, #8
-+    vraddhn.u16 d3, q8, q13
-+    vraddhn.u16 d2, q6, q12
-+    vraddhn.u16 d1, q5, q11
-+    vraddhn.u16 d0, q4, q10
-+    vmvn.8      d24, d3  /* get inverted alpha */
-+    /* now do alpha blending */
-+    vmull.u8    q8, d24, d4
-+    vmull.u8    q9, d24, d5
-+    vmull.u8    q10, d24, d6
-+.endm
-+
-+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail
-+    vrshr.u16   q14, q8, #8
-+    vrshr.u16   q15, q9, #8
-+    vrshr.u16   q12, q10, #8
-+    vraddhn.u16 d0, q14, q8
-+    vraddhn.u16 d1, q15, q9
-+    vraddhn.u16 d2, q12, q10
-+    /* 32bpp result is in {d0, d1, d2, XX} */
-+    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-+.endm
-+
-+/* TODO: expand macros and do better instructions scheduling */
-+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head
-+    vld1.8     {d15}, [MASK]!
-+    pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail
-+    vld1.16    {d8, d9}, [SRC]!
-+    vld1.16    {d10, d11}, [DST_R, :128]!
-+    cache_preload 8, 8
-+    pixman_composite_out_reverse_0565_8_0565_process_pixblock_head
-+    vst1.16    {d28, d29}, [DST_W, :128]!
-+.endm
-+
-+generate_composite_function \
-+    pixman_composite_out_reverse_0565_8_0565_asm_neon, 16, 8, 16, \
-+    FLAG_DST_READWRITE, \
-+    8, /* number of pixels, processed in a single block */ \
-+    5, /* prefetch distance */ \
-+    default_init_need_all_regs, \
-+    default_cleanup_need_all_regs, \
-+    pixman_composite_out_reverse_0565_8_0565_process_pixblock_head, \
-+    pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail, \
-+    pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head, \
-+    28, /* dst_w_basereg */ \
-+    10, /* dst_r_basereg */ \
-+    8,  /* src_basereg   */ \
-+    15  /* mask_basereg  */
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index a8be7e4..da1fdeb 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -94,6 +94,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
-                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
- PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
-                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
-+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, out_reverse_0565_8_0565,
-+                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
- void
- pixman_composite_src_n_8_asm_neon (int32_t   w,
-@@ -306,6 +308,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
-     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
-     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
-     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
-+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5,   a8,       r5g6b5,   neon_composite_out_reverse_0565_8_0565),
-+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5,   a8,       b5g6r5,   neon_composite_out_reverse_0565_8_0565),
-     { PIXMAN_OP_NONE },
- };
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch b/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch
deleted file mode 100644 (file)
index 1c7f6ec..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-From 130211983628678ccee23535765994aa0b9d5122 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 23 Sep 2010 23:41:50 +0300
-Subject: [PATCH 9/9] ARM: added 'neon_composite_out_reverse_8_0565' fast path
-
-TODO: That's an initial variant, needs performance tuning
----
- pixman/pixman-arm-neon-asm.S |   50 ++++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-arm-neon.c     |    4 +++
- 2 files changed, 54 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index 7a599f0..2234b1b 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -2008,3 +2008,53 @@ generate_composite_function \
-     10, /* dst_r_basereg */ \
-     8,  /* src_basereg   */ \
-     15  /* mask_basereg  */
-+
-+/******************************************************************************/
-+
-+.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
-+    /* mask is in d15 */
-+    convert_0565_to_x888 q5, d6, d5, d4
-+    /* destination pixel data is in {d4, d5, d6, xx} */
-+    vmvn.8      d24, d15 /* get inverted alpha */
-+    /* now do alpha blending */
-+    vmull.u8    q8, d24, d4
-+    vmull.u8    q9, d24, d5
-+    vmull.u8    q10, d24, d6
-+.endm
-+
-+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
-+    vrshr.u16   q14, q8, #8
-+    vrshr.u16   q15, q9, #8
-+    vrshr.u16   q12, q10, #8
-+    vraddhn.u16 d0, q14, q8
-+    vraddhn.u16 d1, q15, q9
-+    vraddhn.u16 d2, q12, q10
-+    /* 32bpp result is in {d0, d1, d2, XX} */
-+    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-+.endm
-+
-+/* TODO: expand macros and do better instructions scheduling */
-+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
-+    vld1.8     {d15}, [SRC]!
-+    pixman_composite_out_reverse_8_0565_process_pixblock_tail
-+    vld1.16    {d10, d11}, [DST_R, :128]!
-+    cache_preload 8, 8
-+    pixman_composite_out_reverse_8_0565_process_pixblock_head
-+    vst1.16    {d28, d29}, [DST_W, :128]!
-+.endm
-+
-+generate_composite_function \
-+    pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
-+    FLAG_DST_READWRITE, \
-+    8, /* number of pixels, processed in a single block */ \
-+    5, /* prefetch distance */ \
-+    default_init_need_all_regs, \
-+    default_cleanup_need_all_regs, \
-+    pixman_composite_out_reverse_8_0565_process_pixblock_head, \
-+    pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
-+    pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
-+    28, /* dst_w_basereg */ \
-+    10, /* dst_r_basereg */ \
-+    15, /* src_basereg   */ \
-+    0   /* mask_basereg  */
-+
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index da1fdeb..2e37a4a 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -60,6 +60,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
-                                    uint32_t, 1, uint16_t, 1)
- PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
-+                                   uint8_t, 1, uint16_t, 1)
- PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
-                                  uint16_t, 1)
-@@ -310,6 +312,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
-     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
-     PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5,   a8,       r5g6b5,   neon_composite_out_reverse_0565_8_0565),
-     PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5,   a8,       b5g6r5,   neon_composite_out_reverse_0565_8_0565),
-+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,       null,     r5g6b5,   neon_composite_out_reverse_8_0565),
-+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,       null,     b5g6r5,   neon_composite_out_reverse_8_0565),
-     { PIXMAN_OP_NONE },
- };
--- 
-1.6.6.1
-
@@ -1,7 +1,7 @@
-From c5099dabb417cab343185d6e22ae4925e53a756f Mon Sep 17 00:00:00 2001
+From 97b2bb933455f222b392b5c60a8bde82d7d6329f Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Tue, 16 Mar 2010 16:55:28 +0100
-Subject: [PATCH 3/9] Generic C implementation of pixman_blt with overlapping support
+Subject: [PATCH 1/8] Generic C implementation of pixman_blt with overlapping support
 
 Uses memcpy/memmove functions to copy pixels, can handle the
 case when both source and destination areas are in the same
@@ -51,7 +51,7 @@ index 4d234a0..c4d2c14 100644
  
  static pixman_bool_t
 diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index 440ae7a..aac2067 100644
+index c43172b..f980454 100644
 --- a/pixman/pixman-private.h
 +++ b/pixman/pixman-private.h
 @@ -10,6 +10,7 @@
@@ -62,7 +62,7 @@ index 440ae7a..aac2067 100644
  #include <assert.h>
  #include <stdio.h>
  #include <string.h>
-@@ -883,4 +884,46 @@ void pixman_timer_register (pixman_timer_t *timer);
+@@ -873,4 +874,46 @@ void pixman_timer_register (pixman_timer_t *timer);
  
  #endif /* PIXMAN_TIMERS */
  
@@ -1,14 +1,14 @@
-From f8c3deb1f7a26992fe217d1748a1fa5c832bbbd2 Mon Sep 17 00:00:00 2001
+From 47b31f936641da07431093ede340465625bfcb3d Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 22 Oct 2009 05:45:47 +0300
-Subject: [PATCH 4/9] Support of overlapping src/dst for pixman_blt_mmx
+Subject: [PATCH 2/8] Support of overlapping src/dst for pixman_blt_mmx
 
 ---
  pixman/pixman-mmx.c |   55 +++++++++++++++++++++++++++++---------------------
  1 files changed, 32 insertions(+), 23 deletions(-)
 
 diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
-index b284cd3..5b6afaa 100644
+index e936c4c..2413197 100644
 --- a/pixman/pixman-mmx.c
 +++ b/pixman/pixman-mmx.c
 @@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits,
@@ -1,17 +1,17 @@
-From 79fe7f347fe396aa2c917a1928fc18ab9321336c Mon Sep 17 00:00:00 2001
+From 13be027637602fffda3b3cb6e171d8d6a67b3b4b Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 22 Oct 2009 05:45:54 +0300
-Subject: [PATCH 5/9] Support of overlapping src/dst for pixman_blt_sse2
+Subject: [PATCH 3/8] Support of overlapping src/dst for pixman_blt_sse2
 
 ---
  pixman/pixman-sse2.c |   55 +++++++++++++++++++++++++++++--------------------
  1 files changed, 32 insertions(+), 23 deletions(-)
 
 diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
-index 33d71ee..dcd6dcd 100644
+index 5907de0..25015ae 100644
 --- a/pixman/pixman-sse2.c
 +++ b/pixman/pixman-sse2.c
-@@ -5614,34 +5614,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
+@@ -5027,34 +5027,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
  {
      uint8_t *   src_bytes;
      uint8_t *   dst_bytes;
@@ -76,16 +76,16 @@ index 33d71ee..dcd6dcd 100644
 +      }
      }
  
-     cache_prefetch ((__m128i*)src_bytes);
-@@ -5654,7 +5663,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
+     while (height--)
+@@ -5064,7 +5073,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
        uint8_t *d = dst_bytes;
        src_bytes += src_stride;
        dst_bytes += dst_stride;
 -      w = byte_width;
 +      w = width;
  
-       cache_prefetch_next ((__m128i*)s);
-       cache_prefetch_next ((__m128i*)d);
+       while (w >= 2 && ((unsigned long)d & 3))
+       {
 -- 
 1.6.6.1
 
@@ -1,17 +1,17 @@
-From ea0f7b1ae605bb57ca23e88b38b9c19390596723 Mon Sep 17 00:00:00 2001
+From a913cc05a1a1c5a813cf06d248334edede9caab7 Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Wed, 18 Nov 2009 06:08:48 +0200
-Subject: [PATCH 6/9] Support of overlapping src/dst for pixman_blt_neon
+Subject: [PATCH 4/8] Support of overlapping src/dst for pixman_blt_neon
 
 ---
  pixman/pixman-arm-neon.c |   62 +++++++++++++++++++++++++++++++++++++--------
  1 files changed, 51 insertions(+), 11 deletions(-)
 
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index e0d2001..db1c2df 100644
+index be5d403..cbfd7cf 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -172,26 +172,66 @@ pixman_blt_neon (uint32_t *src_bits,
+@@ -176,26 +176,66 @@ pixman_blt_neon (uint32_t *src_bits,
                   int       width,
                   int       height)
  {
@@ -1,61 +1,18 @@
-From 38aabb3be87ea68e37f34256c778d07f62680ec6 Mon Sep 17 00:00:00 2001
+From f75e9d1868e21dd75ff3a2ca3561546d23877ddb Mon Sep 17 00:00:00 2001
 From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
 Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 1/9] ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline
+Subject: [PATCH 5/8] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
 
 ---
- pixman/pixman-access.c       |   23 ++++++++++++++++++++++-
  pixman/pixman-arm-neon-asm.S |   20 ++++++++++++++++++++
  pixman/pixman-arm-neon.c     |   40 ++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-private.h      |    5 +++++
- 4 files changed, 87 insertions(+), 1 deletions(-)
+ 2 files changed, 60 insertions(+), 0 deletions(-)
 
-diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
-index f1ce0ba..b33da29 100644
---- a/pixman/pixman-access.c
-+++ b/pixman/pixman-access.c
-@@ -2836,7 +2836,7 @@ typedef struct
-           store_scanline_ ## format, store_scanline_generic_64        \
-     }
--static const format_info_t accessors[] =
-+static format_info_t accessors[] =
- {
- /* 32 bpp formats */
-     FORMAT_INFO (a8r8g8b8),
-@@ -2978,6 +2978,27 @@ _pixman_bits_image_setup_accessors (bits_image_t *image)
-       setup_accessors (image);
- }
-+void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+                                 fetch_scanline_t     fetch_func,
-+                                 store_scanline_t     store_func)
-+{
-+    format_info_t *info = accessors;
-+
-+    while (info->format != PIXMAN_null)
-+    {
-+      if (info->format == format)
-+      {
-+          if (fetch_func)
-+              info->fetch_scanline_32 = fetch_func;
-+          if (store_func)
-+              info->store_scanline_32 = store_func;
-+          return;
-+      }
-+      info++;
-+    }
-+}
-+
- #else
- void
 diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index 9f6568f..e1a697e 100644
+index e4db5cd..c79ba81 100644
 --- a/pixman/pixman-arm-neon-asm.S
 +++ b/pixman/pixman-arm-neon-asm.S
-@@ -458,6 +458,16 @@ generate_composite_function \
+@@ -459,6 +459,16 @@ generate_composite_function \
      pixman_composite_src_8888_0565_process_pixblock_tail, \
      pixman_composite_src_8888_0565_process_pixblock_tail_head
  
@@ -72,7 +29,7 @@ index 9f6568f..e1a697e 100644
  /******************************************************************************/
  
  .macro pixman_composite_src_0565_8888_process_pixblock_head
-@@ -493,6 +503,16 @@ generate_composite_function \
+@@ -494,6 +504,16 @@ generate_composite_function \
      pixman_composite_src_0565_8888_process_pixblock_tail, \
      pixman_composite_src_0565_8888_process_pixblock_tail_head
  
@@ -88,12 +45,12 @@ index 9f6568f..e1a697e 100644
 +
  /******************************************************************************/
  
- .macro pixman_composite_add_8000_8000_process_pixblock_head
+ .macro pixman_composite_add_8_8_process_pixblock_head
 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index ece6054..e0d2001 100644
+index cbfd7cf..f88c8f8 100644
 --- a/pixman/pixman-arm-neon.c
 +++ b/pixman/pixman-arm-neon.c
-@@ -344,6 +344,42 @@ BIND_COMBINE_U (over)
+@@ -392,6 +392,42 @@ BIND_COMBINE_U (over)
  BIND_COMBINE_U (add)
  BIND_COMBINE_U (out_reverse)
  
@@ -136,7 +93,7 @@ index ece6054..e0d2001 100644
  pixman_implementation_t *
  _pixman_implementation_create_arm_neon (void)
  {
-@@ -355,6 +391,10 @@ _pixman_implementation_create_arm_neon (void)
+@@ -407,6 +443,10 @@ _pixman_implementation_create_arm_neon (void)
      imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
      imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
  
@@ -147,22 +104,6 @@ index ece6054..e0d2001 100644
      imp->blt = arm_neon_blt;
      imp->fill = arm_neon_fill;
  
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index d85868f..564f8f0 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -206,6 +206,11 @@ void
- _pixman_bits_image_setup_accessors (bits_image_t *image);
- void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+                                 fetch_scanline_t     fetch_func,
-+                                 store_scanline_t     store_func);
-+
-+void
- _pixman_image_get_scanline_generic_64  (pixman_image_t *image,
-                                         int             x,
-                                         int             y,
 -- 
 1.6.6.1
 
diff --git a/recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch
new file mode 100644 (file)
index 0000000..d050646
--- /dev/null
@@ -0,0 +1,148 @@
+From a1cd695c5e22f0f4a2b7272fab675a3cc510bacb Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 23 Sep 2010 21:10:56 +0300
+Subject: [PATCH 6/8] ARM: added NEON optimizations for fetch/store a8 scanline
+
+---
+ pixman/pixman-arm-neon-asm.S |   64 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |   42 +++++++++++++++++++++++++++
+ 2 files changed, 106 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index c79ba81..ca0825c 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -418,6 +418,70 @@ generate_composite_function \
+ /******************************************************************************/
++.macro pixman_composite_src_8_8888_process_pixblock_head
++    /* This is tricky part: we can't set these values just once in 'init' macro
++     * because leading/trailing pixels handling part uses VZIP.8 instructions,
++     * and they operate on values in-place and destroy original registers
++     * content. Think about it like VST4.8 instruction corrupting NEON
++     * registers after write in 'tail_head' macro. Except that 'tail_head'
++     * macro itself actually does not need these extra VMOVs because it uses
++     * real VST4.8 instruction.
++     */
++    vmov.u8     q0, #0
++    vmov.u8     d2, #0
++.endm
++
++.macro pixman_composite_src_8_8888_process_pixblock_tail
++.endm
++
++.macro pixman_composite_src_8_8888_process_pixblock_tail_head
++    vst4.8      {d0, d1, d2, d3}, [DST_W, :128]!
++    vld1.8      {d3}, [SRC]!
++.endm
++
++generate_composite_function_single_scanline \
++    pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \
++    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    default_init, \
++    default_cleanup, \
++    pixman_composite_src_8_8888_process_pixblock_head, \
++    pixman_composite_src_8_8888_process_pixblock_tail, \
++    pixman_composite_src_8_8888_process_pixblock_tail_head, \
++    0,  /* dst_w_basereg */ \
++    0,  /* dst_r_basereg */ \
++    3,  /* src_basereg   */ \
++    0   /* mask_basereg  */
++
++/******************************************************************************/
++
++.macro pixman_composite_src_8888_8_process_pixblock_head
++.endm
++
++.macro pixman_composite_src_8888_8_process_pixblock_tail
++.endm
++
++.macro pixman_composite_src_8888_8_process_pixblock_tail_head
++    vst1.8      {d3}, [DST_W, :64]!
++    vld4.8      {d0, d1, d2, d3}, [SRC]!
++.endm
++
++generate_composite_function_single_scanline \
++    pixman_store_scanline_a8_asm_neon, 32, 0, 8, \
++    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
++    8, /* number of pixels, processed in a single block */ \
++    default_init, \
++    default_cleanup, \
++    pixman_composite_src_8888_8_process_pixblock_head, \
++    pixman_composite_src_8888_8_process_pixblock_tail, \
++    pixman_composite_src_8888_8_process_pixblock_tail_head, \
++    3,  /* dst_w_basereg */ \
++    0,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    0   /* mask_basereg  */
++
++/******************************************************************************/
++
+ .macro pixman_composite_src_8888_0565_process_pixblock_head
+     vshll.u8    q8, d1, #8
+     vshll.u8    q14, d2, #8
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index f88c8f8..43091d2 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -428,6 +428,45 @@ neon_store_scanline_r5g6b5 (bits_image_t *  image,
+     pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
+ }
++void
++pixman_fetch_scanline_a8_asm_neon (int             width,
++                                   uint32_t       *buffer,
++                                   const uint8_t  *pixel);
++
++
++void
++pixman_store_scanline_a8_asm_neon (int             width,
++                                   uint8_t        *pixel,
++                                   const uint32_t *values);
++
++static void
++neon_fetch_scanline_a8 (pixman_image_t *image,
++                        int             x,
++                        int             y,
++                        int             width,
++                        uint32_t *      buffer,
++                        const uint32_t *mask)
++{
++    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++    const uint8_t *pixel = (const uint8_t *) bits + x;
++
++    pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel);
++}
++
++static void
++neon_store_scanline_a8 (bits_image_t *  image,
++                        int             x,
++                        int             y,
++                        int             width,
++                        const uint32_t *values)
++{
++    uint32_t *bits = image->bits + image->rowstride * y;
++    uint8_t *pixel = (uint8_t *) bits + x;
++
++    pixman_store_scanline_a8_asm_neon (width, pixel, values);
++}
++
++
+ pixman_implementation_t *
+ _pixman_implementation_create_arm_neon (void)
+ {
+@@ -446,6 +485,9 @@ _pixman_implementation_create_arm_neon (void)
+     _pixman_bits_override_accessors (PIXMAN_r5g6b5,
+                                      neon_fetch_scanline_r5g6b5,
+                                      neon_store_scanline_r5g6b5);
++    _pixman_bits_override_accessors (PIXMAN_a8,
++                                     neon_fetch_scanline_a8,
++                                     neon_store_scanline_a8);
+     imp->blt = arm_neon_blt;
+     imp->fill = arm_neon_fill;
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch b/recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch
new file mode 100644 (file)
index 0000000..7f28f47
--- /dev/null
@@ -0,0 +1,77 @@
+From d6ae7da60cc797900b5eff0786536c4a11ab0f50 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 24 Sep 2010 18:22:44 +0300
+Subject: [PATCH 7/8] ARM: added NEON optimizations for fetching x8r8g8b8 scanline
+
+---
+ pixman/pixman-arm-neon-asm.S |   14 ++++++++++++++
+ pixman/pixman-arm-neon.c     |   21 +++++++++++++++++++++
+ 2 files changed, 35 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index ca0825c..ffd0b83 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1206,6 +1206,20 @@ generate_composite_function \
+     0, /* src_basereg   */ \
+     0  /* mask_basereg  */
++generate_composite_function_single_scanline \
++    pixman_fetch_scanline_x888_asm_neon, 32, 0, 32, \
++    FLAG_DST_WRITEONLY, \
++    8, /* number of pixels, processed in a single block */ \
++    pixman_composite_src_x888_8888_init, \
++    default_cleanup, \
++    pixman_composite_src_x888_8888_process_pixblock_head, \
++    pixman_composite_src_x888_8888_process_pixblock_tail, \
++    pixman_composite_src_x888_8888_process_pixblock_tail_head, \
++    0,  /* dst_w_basereg */ \
++    0,  /* dst_r_basereg */ \
++    0,  /* src_basereg   */ \
++    0   /* mask_basereg  */
++
+ /******************************************************************************/
+ .macro pixman_composite_over_n_8_8888_process_pixblock_head
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 43091d2..f84b5e6 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -466,6 +466,24 @@ neon_store_scanline_a8 (bits_image_t *  image,
+     pixman_store_scanline_a8_asm_neon (width, pixel, values);
+ }
++void
++pixman_fetch_scanline_x888_asm_neon (int             width,
++                                     uint32_t       *buffer,
++                                     const uint32_t *pixel);
++
++static void
++neon_fetch_scanline_x888 (pixman_image_t *image,
++                          int             x,
++                          int             y,
++                          int             width,
++                          uint32_t *      buffer,
++                          const uint32_t *mask)
++{
++    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++    const uint32_t *pixel = (const uint32_t *) bits + x;
++
++    pixman_fetch_scanline_x888_asm_neon (width, buffer, pixel);
++}
+ pixman_implementation_t *
+ _pixman_implementation_create_arm_neon (void)
+@@ -488,6 +506,9 @@ _pixman_implementation_create_arm_neon (void)
+     _pixman_bits_override_accessors (PIXMAN_a8,
+                                      neon_fetch_scanline_a8,
+                                      neon_store_scanline_a8);
++    _pixman_bits_override_accessors (PIXMAN_x8r8g8b8,
++                                     neon_fetch_scanline_x888,
++                                     NULL);
+     imp->blt = arm_neon_blt;
+     imp->fill = arm_neon_fill;
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch b/recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch
new file mode 100644 (file)
index 0000000..6efdb62
--- /dev/null
@@ -0,0 +1,172 @@
+From e1191ad6563a1fb02a45982b1c4d7fed3c655e97 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 4 Oct 2010 01:56:59 +0300
+Subject: [PATCH 8/8] ARM optimization for scaled src_0565_0565 operation with nearest filter
+
+The code actually uses only armv4t instructions.
+
+Benchmark from ARM11:
+
+    == before ==
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=34.86 MPix/s
+
+    == after ==
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=36.62 MPix/s
+
+Benchmark from ARM Cortex-A8:
+
+    == before ==
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
+
+    == after ==
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=94.91 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S |   66 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-simd.c     |   37 +++++++++++++++++++++++
+ 2 files changed, 103 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index a3d2d40..b6f69db 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -1,5 +1,6 @@
+ /*
+  * Copyright © 2008 Mozilla Corporation
++ * Copyright © 2010 Nokia Corporation
+  *
+  * Permission to use, copy, modify, distribute, and sell this software and its
+  * documentation for any purpose is hereby granted without fee, provided that
+@@ -328,3 +329,68 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+       pop     {r4, r5, r6, r7, r8, r9, r10, r11}
+       bx      lr
+ .endfunc
++
++/*
++ * Note: This function is actually primarily optimized for ARM Cortex-A8
++ * pipeline. In order to get good performance on ARM9/ARM11 cores (which
++ * don't have efficient write combining), it needs to be changed to use
++ * 16-byte aligned writes using STM instruction.
++ */
++pixman_asm_function pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6
++      DST     .req    r0
++      SRC     .req    r1
++      W       .req    r2
++      VX      .req    r3
++      UNIT_X  .req    r12
++      TMP1    .req    r4
++      TMP2    .req    r5
++      MASK    .req    r6
++      ldr     UNIT_X, [sp]
++      push    {r4, r5, r6, r7}
++      mvn     MASK, #1
++
++      /* define helper macro */
++      .macro  scale_2_pixels
++              ldrh    TMP1, [SRC, TMP1]
++              and     TMP2, MASK, VX, lsr #15
++              add     VX, VX, UNIT_X
++              strh    TMP1, [DST], #2
++
++              ldrh    TMP2, [SRC, TMP2]
++              and     TMP1, MASK, VX, lsr #15
++              add     VX, VX, UNIT_X
++              strh    TMP2, [DST], #2
++      .endm
++
++      /* now do the scaling */
++      and     TMP1, MASK, VX, lsr #15
++      add     VX, VX, UNIT_X
++      subs    W, #4
++      blt     2f
++1: /* main loop, process 4 pixels per iteration */
++      scale_2_pixels
++      scale_2_pixels
++      subs    W, W, #4
++      bge     1b
++2:
++      tst     W, #2
++      beq     2f
++      scale_2_pixels
++2:
++      tst     W, #1
++      ldrneh  TMP1, [SRC, TMP1]
++      strneh  TMP1, [DST], #2
++      /* cleanup helper macro */
++      .purgem scale_2_pixels
++      .unreq  DST
++      .unreq  SRC
++      .unreq  W
++      .unreq  VX
++      .unreq  UNIT_X
++      .unreq  TMP1
++      .unreq  TMP2
++      .unreq  MASK
++      /* return */
++      pop     {r4, r5, r6, r7}
++      bx      lr
++.endfunc
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index d466a31..f6f464c 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -29,6 +29,7 @@
+ #include "pixman-private.h"
+ #include "pixman-arm-common.h"
++#include "pixman-fast-path.h"
+ #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+@@ -375,6 +376,35 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
+ #endif
++void
++pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (uint16_t *      dst,
++                                                    uint16_t *      src,
++                                                    int32_t         w,
++                                                    pixman_fixed_t  vx,
++                                                    pixman_fixed_t  unit_x);
++
++static force_inline void
++scaled_nearest_scanline_armv6_565_565_SRC (uint16_t *      dst,
++                                         uint16_t *      src,
++                                         int32_t         w,
++                                         pixman_fixed_t  vx,
++                                         pixman_fixed_t  unit_x,
++                                         pixman_fixed_t  max_vx)
++{
++    pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (dst, src, w,
++                                                        vx, unit_x);
++}
++
++FAST_NEAREST_MAINLOOP (armv6_565_565_cover_SRC,
++                     scaled_nearest_scanline_armv6_565_565_SRC,
++                     uint16_t, uint16_t, COVER);
++FAST_NEAREST_MAINLOOP (armv6_565_565_none_SRC,
++                     scaled_nearest_scanline_armv6_565_565_SRC,
++                     uint16_t, uint16_t, NONE);
++FAST_NEAREST_MAINLOOP (armv6_565_565_pad_SRC,
++                     scaled_nearest_scanline_armv6_565_565_SRC,
++                     uint16_t, uint16_t, PAD);
++
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
+                                    uint8_t, 1, uint8_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+@@ -404,6 +434,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
++    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, r5g6b5, armv6_565_565),
++    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, b5g6r5, armv6_565_565),
++    SIMPLE_NEAREST_FAST_PATH_NONE (SRC, r5g6b5, r5g6b5, armv6_565_565),
++    SIMPLE_NEAREST_FAST_PATH_NONE (SRC, b5g6r5, b5g6r5, armv6_565_565),
++    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, r5g6b5, armv6_565_565),
++    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, b5g6r5, armv6_565_565),
++
+     { PIXMAN_OP_NONE },
+ };
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman_0.19.4.bb b/recipes/xorg-lib/pixman_0.19.4.bb
deleted file mode 100644 (file)
index b02a7a3..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-require pixman.inc
-
-SRC_URI[archive.md5sum] = "100a2d23f1d5683fdaa5d7ca71a0182b"
-SRC_URI[archive.sha256sum] = "04e613f87fec13e5d6e8540587af1112e9ab19f9d550751e848a2d65deb26fd6"
-
-PR = "${INC_PR}.1"
-
-SRC_URI += "\
-           file://0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch \
-           file://0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch \
-           file://0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
-           file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
-           file://0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
-           file://0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
-           file://0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch \
-           file://0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch \
-           file://0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch \
-"
-
-NEON = " --disable-arm-neon "
-NEON_armv7a = " "
-
-EXTRA_OECONF = "${NEON} --disable-gtk"
diff --git a/recipes/xorg-lib/pixman_0.19.6.bb b/recipes/xorg-lib/pixman_0.19.6.bb
new file mode 100644 (file)
index 0000000..984fde2
--- /dev/null
@@ -0,0 +1,22 @@
+require pixman.inc
+
+SRC_URI[archive.md5sum] = "3f31cf670880199979d71a3234308cc9"
+SRC_URI[archive.sha256sum] = "1bc9f0b00de69e3aeab3525012506608ea3d913eb452d0134c729c1d7abab1b5"
+
+PR = "${INC_PR}.0"
+
+SRC_URI += "\
+           file://0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
+           file://0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
+           file://0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
+           file://0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
+           file://0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \
+           file://0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \
+           file://0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \
+           file://0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch \
+"
+
+NEON = " --disable-arm-neon "
+NEON_armv7a = " "
+
+EXTRA_OECONF = "${NEON} --disable-gtk"