From: Sandor Yu Date: Fri, 27 Apr 2012 09:18:25 +0000 (+0800) Subject: ENGR00181188 Added miss file hdmi_cpm.S for patch ENGR00181130 X-Git-Tag: v3.0.35-fsl~1105 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=e53967e3dfe56d70011c683c932287afe07834d7;p=karo-tx-linux.git ENGR00181188 Added miss file hdmi_cpm.S for patch ENGR00181130 Added miss file hdmi_cpm.S for patch ENGR00181130 Signed-off-by: Sandor Yu --- diff --git a/sound/soc/imx/hdmi_pcm.S b/sound/soc/imx/hdmi_pcm.S new file mode 100644 index 000000000000..6e97e1a41170 --- /dev/null +++ b/sound/soc/imx/hdmi_pcm.S @@ -0,0 +1,238 @@ +/* +* Copyright (C) 2010-2012 Freescale Semiconductor, Inc. All Rights Reserved. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +.section .text + +.global hdmi_dma_copy_16_neon_lut +.global hdmi_dma_copy_16_neon_fast +.global hdmi_dma_copy_24_neon_lut +.global hdmi_dma_copy_24_neon_fast + + +/* hdmi_dma_copy_16_neon_lut + * Convert pcm sample to iec sample. Pcm sample is 16 bits. + * Frame index is between 0 and 47 inclusively. Channel count can be 1, 2, 4 or 8. + * Frame count should be multipliable by 4. Sample count should be multipliable by 8. + * + * C Prototype + * void hdmi_dma_copy_16_neon_lut(unsigned short *src, unsigned int *dst, + * int samples, unsigned char *lookup_table); + * Return value + * None + * Parameters + * src Source PCM16 samples + * dst Dest buffer to store pcm with header + * samples Contains sample count (=frame_count * channel_count) + * lookup_table Preconstructed header table. Channels interleaved. + */ + +hdmi_dma_copy_16_neon_lut: + mov r12, #1 /* construct vector(1) */ + vdup.8 d6, r12 + +hdmi_dma_copy_16_neon_lut_start: + + /* get 8 samples to q0 */ + vld1.16 {d0, d1}, [r0]! /* TODO: aligned */ + +/* pld [r1, #(64*4)] */ + + /* xor every bit */ + vcnt.8 q1, q0 /* count of 1s */ + vpadd.i8 d2, d2, d3 /* only care about the least bit in every element */ + vand d2, d2, d6 /* clear other bits while keep the least bit */ + vshl.u8 d2, d2, #3 /* bit p: d2 = d2 << 3 */ + + /* get packet header */ + vld1.8 {d5}, [r3]! + veor d4, d5, d2 /* xor bit c */ + + /* store: (d4 << 16 | q0) << 8 */ + vmovl.u8 q2, d4 /* expand from char to short */ + vzip.16 q0, q2 + vshl.u32 q0, q0, #8 + vshl.u32 q1, q2, #8 + vst1.32 {d0, d1, d2, d3}, [r1]! + + /* decrease sample count */ + subs r2, r2, #8 + bne hdmi_dma_copy_16_neon_lut_start + + mov pc, lr + +/* hdmi_dma_copy_16_neon_fast + * Convert pcm sample to iec sample. Pcm sample is 16 bits. + * Frame index is between 48 and 191 inclusively. Channel count can be 1, 2, 4 or 8. + * Frame count should be multipliable by 4. Sample count should be multipliable by 8. + * + * C Prototype + * void hdmi_dma_copy_16_neon_fast(unsigned short *src, unsigned int *dst, int samples); + * Return value + * None + * Parameters + * src Source PCM16 samples + * dst Dest buffer to store pcm with header + * samples Contains sample count (=frame_count * channel_count) + */ + +hdmi_dma_copy_16_neon_fast: + mov r12, #1 /* construct vector(1) */ + vdup.8 d6, r12 + +hdmi_dma_copy_16_neon_fast_start: + /* get 8 samples to q0 */ + vld1.16 {d0, d1}, [r0]! /* TODO: aligned */ + +/* pld [r1, #(64*4)] */ + + /* xor every bit */ + vcnt.8 q1, q0 /* count of 1s */ + vpadd.i8 d2, d2, d3 + vand d2, d2, d6 /* clear other bits while keep the least bit */ + vshl.u8 d4, d2, #3 /* bit p: d2 = d2 << 3 */ /* finally we construct packet header */ + + /* get packet header: always 0 */ + + /* store: (d4 << 16 | q0) << 8 */ + vmovl.u8 q2, d4 /* expand from char to short */ + vzip.16 q0, q2 + vshl.u32 q0, q0, #8 + vshl.u32 q1, q2, #8 + vst1.32 {d0, d1, d2, d3}, [r1]! + + /* decrease sample count */ + subs r2, r2, #8 + bne hdmi_dma_copy_16_neon_fast_start + + mov pc, lr + + + +/* hdmi_dma_copy_24_neon_lut + * Convert pcm sample to iec sample. Pcm sample is 24 bits. + * Frame index is between 0 and 47 inclusively. Channel count can be 1, 2, 4 or 8. + * Frame count should be multipliable by 4. Sample count should be multipliable by 8. + * + * C Prototype + * void hdmi_dma_copy_24_neon_lut(unsigned int *src, unsigned int *dst, + * int samples, unsigned char *lookup_table); + * Return value + * None + * Parameters + * src Source PCM24 samples + * dst Dest buffer to store pcm with header + * samples Contains sample count (=frame_count * channel_count) + * lookup_table Preconstructed header table. Channels interleaved. + */ + +hdmi_dma_copy_24_neon_lut: + vpush {d8} + + mov r12, #1 /* construct vector(1) */ + vdup.8 d8, r12 + +hdmi_dma_copy_24_neon_lut_start: + + /* get 8 samples to q0 and q1 */ + vld1.32 {d0, d1, d2, d3}, [r0]! /* TODO: aligned */ + +/* pld [r1, #(64*4)] */ + + /* xor every bit */ + vcnt.8 q2, q0 /* count of 1s */ + vpadd.i8 d4, d4, d5 /* only care about the least bit in every element */ + vcnt.8 q3, q1 + vpadd.i8 d6, d6, d7 + vpadd.i8 d4, d4, d6 /* d4: contains xor result and other dirty bits */ + vand d4, d4, d8 /* clear other bits while keep the least bit */ + vshl.u8 d4, d4, #3 /* bit p: d4 = d4 << 3 */ + + /* get packet header */ + vld1.8 {d5}, [r3]! /* d5: original header */ + veor d5, d5, d4 /* fix bit p */ + + /* store: (d5 << 24 | q0) */ + vmovl.u8 q3, d5 /* expand from char to short */ + vmovl.u16 q2, d6 /* expand from short to int */ + vmovl.u16 q3, d7 + vshl.u32 q2, q2, #24 + vshl.u32 q3, q3, #24 + vorr q0, q0, q2 + vorr q1, q1, q3 + vst1.32 {d0, d1, d2, d3}, [r1]! + + /* decrease sample count */ + subs r2, r2, #8 + bne hdmi_dma_copy_24_neon_lut_start + + vpop {d8} + mov pc, lr + +/* hdmi_dma_copy_24_neon_fast + * Convert pcm sample to iec sample. Pcm sample is 24 bits. + * Frame index is between 48 and 191 inclusively. Channel count can be 1, 2, 4 or 8. + * Frame count should be multipliable by 4. Sample count should be multipliable by 8. + * + * C Prototype + * void hdmi_dma_copy_24_neon_fast(unsigned int *src, unsigned int *dst, int samples); + * Return value + * None + * Parameters + * src Source PCM24 samples + * dst Dest buffer to store pcm with header + * samples Contains sample count (=frame_count * channel_count) + */ + +hdmi_dma_copy_24_neon_fast: + vpush {d8} + + mov r12, #1 /* construct vector(1) */ + vdup.8 d8, r12 + +hdmi_dma_copy_24_neon_fast_start: + /* get 8 samples to q0 and q1 */ + vld1.32 {d0, d1, d2, d3}, [r0]! /* TODO: aligned */ + +/* pld [r1, #(64*4)] */ + + /* xor every bit */ + vcnt.8 q2, q0 /* count of 1s */ + vpadd.i8 d4, d4, d5 /* only care about the least bit in every element */ + vcnt.8 q3, q1 + vpadd.i8 d6, d6, d7 + vpadd.i8 d4, d4, d6 /* d4: contains xor result and other dirty bits */ + vand d4, d4, d8 /* clear other bits while keep the least bit */ + vshl.u8 d4, d4, #3 /* bit p: d4 = d4 << 3 */ + + /* store: (d4 << 24 | q0) */ + vmovl.u8 q3, d4 /* expand from char to short */ + vmovl.u16 q2, d6 /* expand from short to int */ + vmovl.u16 q3, d7 + vshl.u32 q2, q2, #24 + vshl.u32 q3, q3, #24 + vorr q0, q0, q2 + vorr q1, q1, q3 + vst1.32 {d0, d1, d2, d3}, [r1]! + + /* decrease sample count */ + subs r2, r2, #8 + bne hdmi_dma_copy_24_neon_fast_start + + vpop {d8} + mov pc, lr +