omap3-pandora-kernel2: update
[openpandora.oe.git] / recipes / openal / openal-soft / neon_write_converter.patch
1 diff -ur openal-soft-1.15.1.orig/Alc/ALu.c openal-soft-1.15.1/Alc/ALu.c
2 --- openal-soft-1.15.1.orig/Alc/ALu.c   2014-07-02 03:36:17.874323362 +0300
3 +++ openal-soft-1.15.1/Alc/ALu.c        2014-07-03 02:41:18.000116397 +0300
4 @@ -956,11 +956,78 @@
5      return SamplesToDo*numchans*sizeof(T);                                    \
6  }
7  
8 +static int Write_ALshort(ALCdevice *device, ALshort *RESTRICT buffer, ALuint SamplesToDo)
9 +{
10 +    ALfloat (*RESTRICT DryBuffer)[BUFFERSIZE] = device->DryBuffer;
11 +    ALuint numchans = ChannelsFromDevFmt(device->FmtChans);
12 +    const ALuint *offsets = device->ChannelOffsets;
13 +    ALuint i, j;
14 +
15 +#ifdef __ARM_NEON__
16 +    if (numchans == 2 && offsets[0] == 0 && offsets[1] == 1)
17 +    {
18 +        ALfloat *cl = DryBuffer[0];
19 +        ALfloat *cr = DryBuffer[1];
20 +        ALuint samples = SamplesToDo;
21 +        asm volatile (
22 +             "movw       r3, #0x0000fe00\n"
23 +             "movt       r3, #0x46ff\n"
24 +             "vdup.32    d4, r3 @ 32767.0\n"
25 +            "0:\n"
26 +             "vld1.32    {q0}, [%1, :128]!\n"
27 +             "vld1.32    {q1}, [%2, :128]!\n"
28 +             "subs       %3, #4\n"
29 +             "vmul.f32   q0, d4[0]\n"
30 +             "vmul.f32   q1, d4[0]\n"
31 +             "vcvt.s32.f32 q0, q0\n"
32 +             "vcvt.s32.f32 q1, q1\n"
33 +             "pld        [%1, #64*2]\n"
34 +             "pld        [%2, #64*2]\n"
35 +             "vqmovn.s32 d0, q0\n"
36 +             "vqmovn.s32 d1, q1\n"
37 +             "blt        1f\n"
38 +             "vst2.16    {d0,d1}, [%0]!\n"
39 +             "bgt        0b\n"
40 +             "nop\n"
41 +             "b          2f\n" /* eq 4 - all done */
42 +            "1:\n"
43 +             "vzip.16    q0, q0\n"
44 +             "add        %3, #4\n"
45 +             "vst1.32    {d0[0]}, [%0]!\n"
46 +             "cmp        %3, #1\n"
47 +             "ble        2f\n"
48 +             "vst1.32    {d0[1]}, [%0]!\n"
49 +             "cmp        %3, #2\n"
50 +             "ble        2f\n"
51 +             "vst1.32    {d1[0]}, [%0]!\n"
52 +            "2:\n"
53 +            : "=&r"(buffer), "=&r"(cl), "=&r"(cr), "=&r"(samples)
54 +            : "0"(buffer), "1"(cl), "2"(cr), "3"(samples)
55 +            : "r3", "q0", "q1", "d4", "cc", "memory"
56 +        );
57 +        return SamplesToDo * numchans * sizeof(ALshort);
58 +    }
59 +#endif
60 +
61 +    for(j = 0;j < MaxChannels;j++)
62 +    {
63 +        ALshort *RESTRICT out;
64 +
65 +        if(offsets[j] == INVALID_OFFSET)
66 +            continue;
67 +
68 +        out = buffer + offsets[j];
69 +        for(i = 0;i < SamplesToDo;i++)
70 +            out[i * numchans] = aluF2S(DryBuffer[j][i]);
71 +    }
72 +    return SamplesToDo * numchans * sizeof(ALshort);
73 +}
74 +
75  DECL_TEMPLATE(ALfloat, aluF2F)
76  DECL_TEMPLATE(ALuint, aluF2UI)
77  DECL_TEMPLATE(ALint, aluF2I)
78  DECL_TEMPLATE(ALushort, aluF2US)
79 -DECL_TEMPLATE(ALshort, aluF2S)
80 +//DECL_TEMPLATE(ALshort, aluF2S)
81  DECL_TEMPLATE(ALubyte, aluF2UB)
82  DECL_TEMPLATE(ALbyte, aluF2B)
83