2 * Copyright (C) 2010-2014 Freescale Semiconductor, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 .global hdmi_dma_copy_16_neon_lut
22 .global hdmi_dma_copy_16_neon_fast
23 .global hdmi_dma_copy_24_neon_lut
24 .global hdmi_dma_copy_24_neon_fast
28 * hdmi_dma_copy_16_neon_lut
29 * Convert pcm sample to iec sample. Pcm sample is 16 bits.
30 * Frame index's between 0 and 47 inclusively. Channel count can be 1, 2, 4, 8.
31 * Frame count should be multipliable by 4, and Sample count by 8.
34 * void hdmi_dma_copy_16_neon_lut(unsigned short *src, unsigned int *dst,
35 * int samples, unsigned char *lookup_table);
39 * src Source PCM16 samples
40 * dst Dest buffer to store pcm with header
41 * samples Contains sample count (=frame_count * channel_count)
42 * lookup_table Preconstructed header table. Channels interleaved.
45 hdmi_dma_copy_16_neon_lut:
46 mov r12, #1 /* construct vector(1) */
49 hdmi_dma_copy_16_neon_lut_start:
51 /* get 8 samples to q0 */
52 vld1.16 {d0, d1}, [r0]! /* TODO: aligned */
54 /* pld [r1, #(64*4)] */
57 vcnt.8 q1, q0 /* count of 1s */
58 vpadd.i8 d2, d2, d3 /* only care about the LST in every element */
59 vand d2, d2, d6 /* clear other bits while keep the least bit */
60 vshl.u8 d2, d2, #3 /* bit p: d2 = d2 << 3 */
62 /* get packet header */
64 veor d4, d5, d2 /* xor bit c */
66 /* store: (d4 << 16 | q0) << 8 */
67 vmovl.u8 q2, d4 /* expand from char to short */
71 vst1.32 {d0, d1, d2, d3}, [r1]!
73 /* decrease sample count */
75 bne hdmi_dma_copy_16_neon_lut_start
80 * hdmi_dma_copy_16_neon_fast
81 * Convert pcm sample to iec sample. Pcm sample is 16 bits.
82 * Frame index's between 48 and 191 inclusively.
83 * Channel count can be 1, 2, 4 or 8.
84 * Frame count should be multipliable by 4, and Sample count by 8.
87 * void hdmi_dma_copy_16_neon_fast(unsigned short *src,
88 * unsigned int *dst, int samples);
92 * src Source PCM16 samples
93 * dst Dest buffer to store pcm with header
94 * samples Contains sample count (=frame_count * channel_count)
97 hdmi_dma_copy_16_neon_fast:
98 mov r12, #1 /* construct vector(1) */
101 hdmi_dma_copy_16_neon_fast_start:
102 /* get 8 samples to q0 */
103 vld1.16 {d0, d1}, [r0]! /* TODO: aligned */
105 /* pld [r1, #(64*4)] */
108 vcnt.8 q1, q0 /* count of 1s */
110 vand d2, d2, d6 /* clear other bits while keep the LST */
111 /* finally we construct packet header */
112 vshl.u8 d4, d2, #3 /* bit p: d2 = d2 << 3 */
114 /* get packet header: always 0 */
116 /* store: (d4 << 16 | q0) << 8 */
117 vmovl.u8 q2, d4 /* expand from char to short */
121 vst1.32 {d0, d1, d2, d3}, [r1]!
123 /* decrease sample count */
125 bne hdmi_dma_copy_16_neon_fast_start
132 * hdmi_dma_copy_24_neon_lut
133 * Convert pcm sample to iec sample. Pcm sample is 24 bits.
134 * Frame index's between 0 and 47 inclusively. Channel count can be 1, 2, 4, 8.
135 * Frame count should be multipliable by 4, and Sample count by 8.
138 * void hdmi_dma_copy_24_neon_lut(unsigned int *src, unsigned int *dst,
139 * int samples, unsigned char *lookup_table);
143 * src Source PCM24 samples
144 * dst Dest buffer to store pcm with header
145 * samples Contains sample count (=frame_count * channel_count)
146 * lookup_table Preconstructed header table. Channels interleaved.
149 hdmi_dma_copy_24_neon_lut:
152 mov r12, #1 /* construct vector(1) */
155 hdmi_dma_copy_24_neon_lut_start:
157 /* get 8 samples to q0 and q1 */
158 vld1.32 {d0, d1, d2, d3}, [r0]! /* TODO: aligned */
160 /* pld [r1, #(64*4)] */
163 vcnt.8 q2, q0 /* count of 1s */
164 vpadd.i8 d4, d4, d5 /* only care about the LSB in every element */
167 vpadd.i8 d4, d4, d6 /* d4: contains xor result and other dirty bits */
168 vand d4, d4, d8 /* clear other bits while keep the least bit */
169 vshl.u8 d4, d4, #3 /* bit p: d4 = d4 << 3 */
171 /* get packet header */
172 vld1.8 {d5}, [r3]!/* d5: original header */
173 veor d5, d5, d4 /* fix bit p */
175 /* store: (d5 << 24 | q0) */
176 vmovl.u8 q3, d5 /* expand from char to short */
177 vmovl.u16 q2, d6 /* expand from short to int */
183 vst1.32 {d0, d1, d2, d3}, [r1]!
185 /* decrease sample count */
187 bne hdmi_dma_copy_24_neon_lut_start
193 * hdmi_dma_copy_24_neon_fast
194 * Convert pcm sample to iec sample. Pcm sample is 24 bits.
195 * Frame index's between 48 and 191 inclusively.
196 * Channel count can be 1, 2, 4 or 8.
197 * Frame count should be multipliable by 4, and Sample count by 8.
200 * void hdmi_dma_copy_24_neon_fast(unsigned int *src,
201 * unsigned int *dst, int samples);
205 * src Source PCM24 samples
206 * dst Dest buffer to store pcm with header
207 * samples Contains sample count (=frame_count * channel_count)
210 hdmi_dma_copy_24_neon_fast:
213 mov r12, #1 /* construct vector(1) */
216 hdmi_dma_copy_24_neon_fast_start:
217 /* get 8 samples to q0 and q1 */
218 vld1.32 {d0, d1, d2, d3}, [r0]! /* TODO: aligned */
220 /* pld [r1, #(64*4)] */
223 vcnt.8 q2, q0 /* count of 1s */
224 vpadd.i8 d4, d4, d5 /* only care about the LSB in every element */
227 vpadd.i8 d4, d4, d6 /* d4: contains xor result and other dirty bits */
228 vand d4, d4, d8 /* clear other bits while keep the least bit */
229 vshl.u8 d4, d4, #3 /* bit p: d4 = d4 << 3 */
231 /* store: (d4 << 24 | q0) */
232 vmovl.u8 q3, d4 /* expand from char to short */
233 vmovl.u16 q2, d6 /* expand from short to int */
239 vst1.32 {d0, d1, d2, d3}, [r1]!
241 /* decrease sample count */
243 bne hdmi_dma_copy_24_neon_fast_start