pixman git: add some ARMv6 optimizations
authorKoen Kooi <koen@openembedded.org>
Wed, 10 Mar 2010 11:12:47 +0000 (12:12 +0100)
committerKoen Kooi <koen@openembedded.org>
Thu, 11 Mar 2010 10:22:37 +0000 (11:22 +0100)
recipes/xorg-lib/pixman/over-n-8-0565.patch [new file with mode: 0644]
recipes/xorg-lib/pixman/src-8888-0565.patch [new file with mode: 0644]
recipes/xorg-lib/pixman_git.bb

diff --git a/recipes/xorg-lib/pixman/over-n-8-0565.patch b/recipes/xorg-lib/pixman/over-n-8-0565.patch
new file mode 100644 (file)
index 0000000..3911068
--- /dev/null
@@ -0,0 +1,231 @@
+From de2221a32d0b6628116565563f7b4ccd0a44e8b6 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 04 Mar 2010 23:20:25 +0000
+Subject: ARM: added 'armv6_composite_over_n_8_0565' fast path
+
+Provides ~3x performance improvement when working with
+data in L1 cache and memory. This fast path is important
+for fonts rendering when using 16bpp desktop.
+
+Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
+
+before:
+
+ over_n_8_0565 = L1:   2.99 M:  2.86
+
+after:
+
+ over_n_8_0565 = L1:   9.07 M:  8.05
+---
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index 09a2888..c375c01 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -419,6 +419,193 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
+     }
+ }
++#if defined(__ARM_EABI__) && defined(__linux__)
++/*
++ * ARMv6 assembly optimized version of 'composite_over_n_8_0565'. It is
++ * a bare metal 'naked' function which uses all the available CPU registers
++ * and is compatible with ARM EABI. It might (or might not) break when used
++ * with a different ABI, anyway it is better to be safe than sorry.
++ */
++static void __attribute__((naked)) armv6_composite_over_n_8_0565_asm (
++    uint16_t *dst, uint8_t *mask, uint32_t src, int w,
++    int dst_stride_delta, int mask_stride_delta, int h)
++{
++    asm volatile (
++        ".macro composite_internal_armv6_asm opaque_flag\n"
++            /* save all registers (8 words) to stack */
++            "stmdb   sp!, {r4-r11, ip, lr}\n"
++            /* some register aliases for better readability */
++            "DST     .req  r0\n"
++            "MASK    .req  r1\n"
++            "S       .req  r2\n"
++            "W       .req  r3\n"
++            "A       .req  r8\n"
++            "D       .req  r10\n"
++            "C0000FF .req  r11\n"
++            "C00001F .req  r9\n"
++            "C800080 .req  ip\n"
++            "CE000E0 .req  lr\n"
++            /* precalculate some stuff and put it on stack */
++            "mov     r6, #0xF8\n"
++            "mov     r7, #0xFC\n"
++
++            "str     W, [sp, #-8]!\n"
++
++            ".if \\opaque_flag\n"
++                /* precalculate and save it to stack for later use:
++                 * ((src >> 3) & 0x001F) |
++                 * ((src >> 5) & 0x07E0) |
++                 * ((src >> 8) & 0xF800)
++                 */
++                "mov     A, #0x1F\n"
++                "and     D, A, S, lsr #3\n"
++                "and     r4, S, #0xF80000\n"
++                "and     r5, S, #0xFC00\n"
++                "orr     D, r4, lsr #8\n"
++                "orr     D, r5, lsr #5\n"
++                "str     D, [sp, #4]\n"
++            ".endif\n"
++
++            "ldr     D, [sp, #(8 + 10*4 + 8)]\n" /* h */
++            "ldr     A, =0xFF00FF\n"
++            "ldr     C800080, =0x800080\n"
++            "ldr     CE000E0, =0xE000E0\n"
++            "ldr     C0000FF, =0xFF\n"
++            "ldr     C00001F, =0x1F\n"
++            "and     r4, A, S\n"           /* r4 = src & 0x00FF00FF */
++            "and     r5, A, S, lsr #8\n"   /* r5 = (src >> 8) & 0x00FF00FF */
++            "stmdb   sp!, {r4, r5, r6, r7}\n"
++        "0:\n"
++            "subs    D, D, #1\n"
++            "blt     6f\n"
++        "1:\n"
++            "subs    W, W, #1\n"
++            "blt     5f\n"
++        "2:\n"
++            "ldrb    A, [MASK], #1\n"
++            "ldmia   sp, {r4, r5, r6, r7}\n" /* load constants from stack */
++            "add     DST, DST, #2\n"
++            "cmp     A, #0\n"
++            "beq     1b\n"
++
++            ".if \\opaque_flag\n"
++                "cmp     A, #0xFF\n"
++                "bne     3f\n"
++                "ldr     D, [sp, #(4*4 + 4)]\n" /* load precalculated value */
++                "subs    W, #1\n"
++                "strh    D, [DST, #-2]\n"
++                "bge     2b\n"
++            ".endif\n"
++
++        "3:\n"
++            "ldrh    D, [DST, #-2]\n"
++            "mla     r4, A, r4, C800080\n"
++            "mla     r5, A, r5, C800080\n"
++            "and     r6, r6, D, lsl #3\n" /* & 0xF8 */
++            "and     r7, r7, D, lsr #3\n" /* & 0xFC */
++            "and     D, D, #0xF800\n"
++            "bic     S, r4, #0xFF0000\n"
++            "bic     A, r5, #0xFF0000\n"
++            "add     r4, r4, S, lsr #8\n"
++            "add     r5, r5, A, lsr #8\n"
++
++            "and     S, r7, #0xC0\n"
++            "orr     r6, r6, D, lsl #8\n"
++            "and     D, r6, CE000E0\n"
++            "eor     A, C0000FF, r5, lsr #24\n"
++            "orr     r6, D, lsr #5\n"
++            "orr     r7, S, lsr #6\n"
++
++            "mla     r6, A, r6, C800080\n"
++            "mla     r7, A, r7, C800080\n"
++            "subs    W, #1\n"
++            "bic     D, r6, #0xFF0000\n"
++            "bic     A, r7, #0xFF0000\n"
++            "add     r6, r6, D, lsr #8\n"
++            "uqadd8  r4, r4, r6\n"
++            "add     r7, r7, A, lsr #8\n"
++            "uqadd8  r5, r5, r7\n"
++            "and     D, C00001F, r4, lsr #11\n"
++            "and     r4, r4, #0xF8000000\n"
++            "and     r5, r5, #0xFC00\n"
++            "orr     D, r4, lsr #16\n"
++            "orr     D, r5, lsr #5\n"
++            "strh    D, [DST, #-2]\n"
++            "bge     2b\n"
++        "5:\n"
++            "ldr     r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
++            "ldr     r4, [sp, #(4*4 + 8 + 10*4 + 4)]\n" /* mask stride */
++            "ldr     r5, [sp, #(4*4 + 8 + 10*4 + 0)]\n" /* dst stride */
++            "ldr     W, [sp, #(4*4)]\n"
++            "subs    r6, r6, #1\n" /* h */
++            "str     r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
++            "add     MASK, MASK, r4\n"
++            "add     DST, DST, r5, lsl #1\n"
++            "bgt     1b\n"
++        "6:\n"
++            "add     sp, sp, #(4*4 + 8)\n"
++            /* restore all registers and return */
++            "ldmia   sp!, {r4-r11, ip, pc}\n"
++            ".unreq DST\n"
++            ".unreq MASK\n"
++            ".unreq S\n"
++            ".unreq W\n"
++            ".unreq A\n"
++            ".unreq D\n"
++            ".unreq C0000FF\n"
++            ".unreq C00001F\n"
++            ".unreq C800080\n"
++            ".unreq CE000E0\n"
++        ".endm\n"
++
++        "mov     ip, r2, lsr #24\n"
++        "cmp     ip, #0xFF\n"
++        "beq     9f\n"
++        "composite_internal_armv6_asm 0\n"
++    "9:\n"
++        "composite_internal_armv6_asm 1\n"
++        ".ltorg\n"
++        ".purgem composite_internal_armv6_asm\n"
++    );
++}
++
++static void
++armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
++                             pixman_op_t               op,
++                             pixman_image_t *          src_image,
++                             pixman_image_t *          mask_image,
++                             pixman_image_t *          dst_image,
++                             int32_t                   src_x,
++                             int32_t                   src_y,
++                             int32_t                   mask_x,
++                             int32_t                   mask_y,
++                             int32_t                   dest_x,
++                             int32_t                   dest_y,
++                             int32_t                   width,
++                             int32_t                   height)
++{
++    uint32_t src;
++    uint16_t *dst;
++    uint8_t  *mask;
++    int dst_stride, mask_stride;
++
++    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++    /* bail out if fully transparent */
++    if (src == 0)
++      return;
++
++    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
++                         dst_stride, dst, 1);
++    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t,
++                         mask_stride, mask, 1);
++
++    armv6_composite_over_n_8_0565_asm (dst, mask, src, width,
++      dst_stride - width, mask_stride - width, height);
++}
++
++#endif
++
+ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ {
+     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
+@@ -434,7 +621,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
+-
++#if defined(__ARM_EABI__) && defined(__linux__)
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
++#endif
+     { PIXMAN_OP_NONE },
+ };
+--
+cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/src-8888-0565.patch b/recipes/xorg-lib/pixman/src-8888-0565.patch
new file mode 100644 (file)
index 0000000..c544225
--- /dev/null
@@ -0,0 +1,324 @@
+From 6494f9ae8820078d0e6109bf8f294156f7a5da4c Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Fri, 05 Mar 2010 00:40:34 +0000
+Subject: ARM: added 'armv6_composite_src_8888_0565' fast path
+
+Provides ~3x performance improvement when working with
+data in L1 cache, and ~80% performace improvement when working
+with memory. This fast path is important for 32bpp -> 16bpp
+color format conversion and is commonly used with 16bpp desktop.
+
+Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
+
+before:
+
+ src_8888_0565 = L1:  21.54 M: 15.62
+
+after (armv4):
+
+ src_8888_0565 = L1:  45.26 M: 23.29
+
+after (armv6):
+
+ src_8888_0565 = L1:  60.62 M: 28.37
+---
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index c375c01..69243c1 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -604,6 +604,282 @@ armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
+       dst_stride - width, mask_stride - width, height);
+ }
++static inline void
++armv4_composite_src_8888_0565_asm (
++    uint16_t *dst, uint32_t *src, int w, int dst_stride,
++    int src_stride, int h)
++{
++    uint32_t a, x, y, c1F001F = 0x1F001F, cFFFF = 0xFFFF;
++    int backup_w = w;
++    while (h--)
++    {
++        w = backup_w;
++        if (w > 0 && (uintptr_t)dst & 2)
++        {
++            x = *src++;
++
++            a = (x >> 3) & c1F001F;
++            x &= 0xFC00;
++            a |= a >> 5;
++            a |= x >> 5;
++
++            *dst++ = a;
++            w--;
++        }
++
++        asm volatile(
++            "subs  %[w], %[w], #2\n"
++            "blt   2f\n"
++        "1:\n"
++            "ldr   %[x], [%[src]], #4\n"
++            "ldr   %[y], [%[src]], #4\n"
++            "subs  %[w], %[w], #2\n"
++
++            "and   %[a], %[c1F001F], %[x], lsr #3\n"
++            "and   %[x], %[x], #0xFC00\n\n"
++            "orr   %[a], %[a], %[a], lsr #5\n"
++            "orr   %[x], %[a], %[x], lsr #5\n"
++
++            "and   %[a], %[c1F001F], %[y], lsr #3\n"
++            "and   %[y], %[y], #0xFC00\n\n"
++            "orr   %[a], %[a], %[a], lsr #5\n"
++            "orr   %[y], %[a], %[y], lsr #5\n"
++            /*
++             * Writing single 32-bit value is much faster than two
++             * separate 16-bit values for older CPUs without (efficient)
++             * write combining, even though it costs an extra instruction.
++             */
++            "and   %[x], %[x], %[cFFFF]\n"
++            "orr   %[x], %[x], %[y], lsl #16\n"
++            "str   %[x], [%[dst]], #4\n"
++            "bge   1b\n"
++        "2:\n"
++        : [c1F001F] "+&r" (c1F001F), [cFFFF] "+&r" (cFFFF),
++          [src] "+&r" (src), [dst] "+&r" (dst), [a] "=&r" (a),
++          [x] "=&r" (x), [y] "=&r" (y), [w] "+&r" (w)
++        );
++
++        if (w & 1)
++        {
++            x = *src++;
++
++            a = (x >> 3) & c1F001F;
++            x = x & 0xFC00;
++            a |= a >> 5;
++            a |= x >> 5;
++
++            *dst++ = a;
++        }
++
++        src += src_stride - backup_w;
++        dst += dst_stride - backup_w;
++    }
++}
++
++/*
++ * Conversion x8r8g8b8 -> r5g6b5
++ *
++ * Note: 'w' must be >= 7 here
++ */
++static void __attribute__((naked))
++armv6_composite_src_8888_0565_asm (
++    uint16_t *dst, uint32_t *src, int w, int dst_stride,
++    int src_stride, int h)
++{
++    asm volatile(
++        /* define supplementary macros */
++        ".macro cvt8888to565 PIX\n"
++            "and   A, C1F001F, \\PIX, lsr #3\n"
++            "and   \\PIX, \\PIX, #0xFC00\n\n"
++            "orr   A, A, A, lsr #5\n"
++            "orr   \\PIX, A, \\PIX, lsr #5\n"
++        ".endm\n"
++
++        ".macro combine_pixels_pair PIX1, PIX2\n"
++            /* Note: assume little endian byte order */
++            "pkhbt \\PIX1, \\PIX1, \\PIX2, lsl #16\n"
++        ".endm\n"
++
++        /* function entry, save all registers (10 words) to stack */
++        "stmdb   sp!, {r4-r11, ip, lr}\n"
++
++        /* define some aliases */
++        "DST     .req  r0\n"
++        "SRC     .req  r1\n"
++        "W       .req  r2\n"
++        "H       .req  r3\n"
++
++        "TMP1    .req  r4\n"
++        "TMP2    .req  r5\n"
++        "TMP3    .req  r6\n"
++        "TMP4    .req  r7\n"
++        "TMP5    .req  r8\n"
++        "TMP6    .req  r9\n"
++        "TMP7    .req  r10\n"
++        "TMP8    .req  r11\n"
++
++        "C1F001F .req  ip\n"
++        "A       .req  lr\n"
++
++        "ldr     TMP1, [sp, #(10*4+0)]\n" /* load src_stride */
++        "ldr     C1F001F, =0x1F001F\n"
++        "sub     r3, r3, W\n"
++        "str     r3, [sp, #(10*4+0)]\n" /* store (dst_stride-w) */
++        "ldr     r3, [sp, #(10*4+4)]\n" /* load h */
++        "sub     TMP1, TMP1, W\n"
++        "str     TMP1, [sp, #(10*4+4)]\n" /* store (src_stride-w) */
++
++        "str     W, [sp, #(8*4)]\n" /* saved ip = W */
++
++    "0:\n"
++        "subs    H, H, #1\n"
++        "blt     6f\n"
++    "1:\n"
++        /* align DST at 4 byte boundary */
++        "tst     DST, #2\n"
++        "beq     2f\n"
++        "ldr     TMP1, [SRC], #4\n"
++        "sub     W, W, #1\n"
++        "cvt8888to565 TMP1\n"
++        "strh    TMP1, [DST], #2\n"
++    "2:"
++        /* align DST at 8 byte boundary */
++        "tst     DST, #4\n"
++        "beq     2f\n"
++        "ldmia   SRC!, {TMP1, TMP2}\n"
++        "sub     W, W, #2\n"
++        "cvt8888to565 TMP1\n"
++        "cvt8888to565 TMP2\n"
++        "combine_pixels_pair TMP1, TMP2\n"
++        "str     TMP1, [DST], #4\n"
++    "2:"
++        /* align DST at 16 byte boundary */
++        "tst     DST, #8\n"
++        "beq     2f\n"
++        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
++        "sub     W, W, #4\n"
++        "cvt8888to565 TMP1\n"
++        "cvt8888to565 TMP2\n"
++        "cvt8888to565 TMP3\n"
++        "cvt8888to565 TMP4\n"
++        "combine_pixels_pair TMP1, TMP2\n"
++        "combine_pixels_pair TMP3, TMP4\n"
++        "stmia DST!, {TMP1, TMP3}\n"
++    "2:"
++        /* inner loop, process 8 pixels per iteration */
++        "subs    W, W, #8\n"
++        "blt     4f\n"
++    "3:\n"
++        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8}\n"
++        "subs    W, W, #8\n"
++        "cvt8888to565 TMP1\n"
++        "cvt8888to565 TMP2\n"
++        "cvt8888to565 TMP3\n"
++        "cvt8888to565 TMP4\n"
++        "cvt8888to565 TMP5\n"
++        "cvt8888to565 TMP6\n"
++        "cvt8888to565 TMP7\n"
++        "cvt8888to565 TMP8\n"
++        "combine_pixels_pair TMP1, TMP2\n"
++        "combine_pixels_pair TMP3, TMP4\n"
++        "combine_pixels_pair TMP5, TMP6\n"
++        "combine_pixels_pair TMP7, TMP8\n"
++        "stmia   DST!, {TMP1, TMP3, TMP5, TMP7}\n"
++        "bge     3b\n"
++    "4:\n"
++
++        /* process the remaining pixels */
++        "tst     W, #4\n"
++        "beq     4f\n"
++        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
++        "cvt8888to565 TMP1\n"
++        "cvt8888to565 TMP2\n"
++        "cvt8888to565 TMP3\n"
++        "cvt8888to565 TMP4\n"
++        "combine_pixels_pair TMP1, TMP2\n"
++        "combine_pixels_pair TMP3, TMP4\n"
++        "stmia   DST!, {TMP1, TMP3}\n"
++    "4:\n"
++        "tst     W, #2\n"
++        "beq     4f\n"
++        "ldmia   SRC!, {TMP1, TMP2}\n"
++        "cvt8888to565 TMP1\n"
++        "cvt8888to565 TMP2\n"
++        "combine_pixels_pair TMP1, TMP2\n"
++        "str     TMP1, [DST], #4\n"
++    "4:\n"
++        "tst     W, #1\n"
++        "beq     4f\n"
++        "ldr     TMP1, [SRC], #4\n"
++        "cvt8888to565 TMP1\n"
++        "strh    TMP1, [DST], #2\n"
++    "4:\n"
++        "ldr     TMP1, [sp, #(10*4+0)]\n" /* (dst_stride-w) */
++        "ldr     TMP2, [sp, #(10*4+4)]\n" /* (src_stride-w) */
++        "ldr     W, [sp, #(8*4)]\n"
++        "subs    H, H, #1\n"
++        "add     DST, DST, TMP1, lsl #1\n"
++        "add     SRC, SRC, TMP2, lsl #2\n"
++        "bge     1b\n"
++    "6:\n"
++        /* restore all registers and return */
++        "ldmia   sp!, {r4-r11, ip, pc}\n"
++        ".ltorg\n"
++
++        ".unreq   DST\n"
++        ".unreq   SRC\n"
++        ".unreq   W\n"
++        ".unreq   H\n"
++
++        ".unreq   TMP1\n"
++        ".unreq   TMP2\n"
++        ".unreq   TMP3\n"
++        ".unreq   TMP4\n"
++        ".unreq   TMP5\n"
++        ".unreq   TMP6\n"
++        ".unreq   TMP7\n"
++        ".unreq   TMP8\n"
++
++        ".unreq   C1F001F\n"
++        ".unreq   A\n"
++
++        ".purgem  cvt8888to565\n"
++        ".purgem  combine_pixels_pair\n"
++    );
++}
++
++static void
++armv6_composite_src_8888_0565 (pixman_implementation_t * impl,
++                             pixman_op_t               op,
++                             pixman_image_t *          src_image,
++                             pixman_image_t *          mask_image,
++                             pixman_image_t *          dst_image,
++                             int32_t                   src_x,
++                             int32_t                   src_y,
++                             int32_t                   mask_x,
++                             int32_t                   mask_y,
++                             int32_t                   dest_x,
++                             int32_t                   dest_y,
++                             int32_t                   width,
++                             int32_t                   height)
++{
++    uint32_t *src;
++    uint16_t *dst;
++    int src_stride, dst_stride;
++
++    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
++                         dst_stride, dst, 1);
++    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t,
++                         src_stride, src, 1);
++
++    if (width < 7)
++      armv4_composite_src_8888_0565_asm (dst, src, width,
++                                         dst_stride, src_stride, height);
++    else
++      armv6_composite_src_8888_0565_asm (dst, src, width,
++                                         dst_stride, src_stride, height);
++}
++
+ #endif
+ static const pixman_fast_path_t arm_simd_fast_paths[] =
+@@ -624,6 +900,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ #if defined(__ARM_EABI__) && defined(__linux__)
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
+     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
++    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
++    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
+ #endif
+     { PIXMAN_OP_NONE },
+ };
+--
+cgit v0.8.3-6-g21f6
index 3fdb698..b15fc35 100644 (file)
@@ -4,15 +4,15 @@ DESCRIPTION = "Low-level pixel manipulation library."
 LICENSE = "X11"
 
 PV = "0.17.8"
-PR = "r6"
+PR = "r8"
 PR_append = "+gitr${SRCREV}"
 
 BBCLASSEXTEND="native"
 
 SRCREV = "7a15fa25ddb88ebb4ac00772854271f102f06e81"
-
 DEFAULT_PREFERENCE = "-1"
-
 SRC_URI = "git://anongit.freedesktop.org/~sandmann/pixman;protocol=git;branch=flags \
            file://0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch;patch=1 \
            file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch;patch=1 \
@@ -21,6 +21,8 @@ SRC_URI = "git://anongit.freedesktop.org/~sandmann/pixman;protocol=git;branch=fl
            file://1-composite.patch;patch=1 \
            file://2-composite.patch;patch=1 \
            file://3-composite.patch;patch=1 \
+           file://over-n-8-0565.patch;patch=1 \
+           file://src-8888-0565.patch;patch=1 \
 "
 
 S = "${WORKDIR}/git"
@@ -28,7 +30,7 @@ S = "${WORKDIR}/git"
 inherit autotools_stage
 
 NEON = " --disable-arm-neon "
-NEON_armv7a = ""
+NEON_armv7a = " "
 
 EXTRA_OECONF = "${NEON} --disable-gtk"