From ca15c8ecd588dda4377d18d6d27bc1e87b4177cb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:17 +0530 Subject: [PATCH] ARC: Checksum/byteorder/swab routines TBD: do_csum still needs to be written in asm Signed-off-by: Vineet Gupta Acked-by: Arnd Bergmann --- arch/arc/include/asm/byteorder.h | 18 ++++++ arch/arc/include/asm/checksum.h | 101 +++++++++++++++++++++++++++++++ arch/arc/include/asm/swab.h | 98 ++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 arch/arc/include/asm/byteorder.h create mode 100644 arch/arc/include/asm/checksum.h create mode 100644 arch/arc/include/asm/swab.h diff --git a/arch/arc/include/asm/byteorder.h b/arch/arc/include/asm/byteorder.h new file mode 100644 index 000000000000..9da71d415c38 --- /dev/null +++ b/arch/arc/include/asm/byteorder.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARC_BYTEORDER_H +#define __ASM_ARC_BYTEORDER_H + +#ifdef CONFIG_CPU_BIG_ENDIAN +#include +#else +#include +#endif + +#endif /* ASM_ARC_BYTEORDER_H */ diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h new file mode 100644 index 000000000000..10957298b7a3 --- /dev/null +++ b/arch/arc/include/asm/checksum.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Joern Rennecke : Jan 2012 + * -Insn Scheduling improvements to csum core routines. + * = csum_fold( ) largely derived from ARM version. + * = ip_fast_cum( ) to have module scheduling + * -gcc 4.4.x broke networking. Alias analysis needed to be primed. + * worked around by adding memory clobber to ip_fast_csum( ) + * + * vineetg: May 2010 + * -Rewrote ip_fast_cscum( ) and csum_fold( ) with fast inline asm + */ + +#ifndef _ASM_ARC_CHECKSUM_H +#define _ASM_ARC_CHECKSUM_H + +/* + * Fold a partial checksum + * + * The 2 swords comprising the 32bit sum are added, any carry to 16th bit + * added back and final sword result inverted. + */ +static inline __sum16 csum_fold(__wsum s) +{ + unsigned r = s << 16 | s >> 16; /* ror */ + s = ~s; + s -= r; + return s >> 16; +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline __sum16 +ip_fast_csum(const void *iph, unsigned int ihl) +{ + const void *ptr = iph; + unsigned int tmp, tmp2, sum; + + __asm__( + " ld.ab %0, [%3, 4] \n" + " ld.ab %2, [%3, 4] \n" + " sub %1, %4, 2 \n" + " lsr.f lp_count, %1, 1 \n" + " bcc 0f \n" + " add.f %0, %0, %2 \n" + " ld.ab %2, [%3, 4] \n" + "0: lp 1f \n" + " ld.ab %1, [%3, 4] \n" + " adc.f %0, %0, %2 \n" + " ld.ab %2, [%3, 4] \n" + " adc.f %0, %0, %1 \n" + "1: adc.f %0, %0, %2 \n" + " add.cs %0,%0,1 \n" + : "=&r"(sum), "=r"(tmp), "=&r"(tmp2), "+&r" (ptr) + : "r"(ihl) + : "cc", "lp_count", "memory"); + + return csum_fold(sum); +} + +/* + * TCP pseudo Header is 12 bytes: + * SA [4], DA [4], zeroes [1], Proto[1], TCP Seg(hdr+data) Len [2] + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + __asm__ __volatile__( + " add.f %0, %0, %1 \n" + " adc.f %0, %0, %2 \n" + " adc.f %0, %0, %3 \n" + " adc.f %0, %0, %4 \n" + " adc %0, %0, 0 \n" + : "+&r"(sum) + : "r"(saddr), "r"(daddr), +#ifdef CONFIG_CPU_BIG_ENDIAN + "r"(len), +#else + "r"(len << 8), +#endif + "r"(htons(proto)) + : "cc"); + + return sum; +} + +#define csum_fold csum_fold +#define ip_fast_csum ip_fast_csum +#define csum_tcpudp_nofold csum_tcpudp_nofold + +#include + +#endif /* _ASM_ARC_CHECKSUM_H */ diff --git a/arch/arc/include/asm/swab.h b/arch/arc/include/asm/swab.h new file mode 100644 index 000000000000..095599a73195 --- /dev/null +++ b/arch/arc/include/asm/swab.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: May 2011 + * -Support single cycle endian-swap insn in ARC700 4.10 + * + * vineetg: June 2009 + * -Better htonl implementation (5 instead of 9 ALU instructions) + * -Hardware assisted single cycle bswap (Use Case of ARC custom instrn) + */ + +#ifndef __ASM_ARC_SWAB_H +#define __ASM_ARC_SWAB_H + +#include + +/* Native single cycle endian swap insn */ +#ifdef CONFIG_ARC_HAS_SWAPE + +#define __arch_swab32(x) \ +({ \ + unsigned int tmp = x; \ + __asm__( \ + " swape %0, %1 \n" \ + : "=r" (tmp) \ + : "r" (tmp)); \ + tmp; \ +}) + +#else + +/* Several ways of Endian-Swap Emulation for ARC + * 0: kernel generic + * 1: ARC optimised "C" + * 2: ARC Custom instruction + */ +#define ARC_BSWAP_TYPE 1 + +#if (ARC_BSWAP_TYPE == 1) /******* Software only ********/ + +/* The kernel default implementation of htonl is + * return x<<24 | x>>24 | + * (x & (__u32)0x0000ff00UL)<<8 | (x & (__u32)0x00ff0000UL)>>8; + * + * This generates 9 instructions on ARC (excluding the ld/st) + * + * 8051fd8c: ld r3,[r7,20] ; Mem op : Get the value to be swapped + * 8051fd98: asl r5,r3,24 ; get 3rd Byte + * 8051fd9c: lsr r2,r3,24 ; get 0th Byte + * 8051fda0: and r4,r3,0xff00 + * 8051fda8: asl r4,r4,8 ; get 1st Byte + * 8051fdac: and r3,r3,0x00ff0000 + * 8051fdb4: or r2,r2,r5 ; combine 0th and 3rd Bytes + * 8051fdb8: lsr r3,r3,8 ; 2nd Byte at correct place in Dst Reg + * 8051fdbc: or r2,r2,r4 ; combine 0,3 Bytes with 1st Byte + * 8051fdc0: or r2,r2,r3 ; combine 0,3,1 Bytes with 2nd Byte + * 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem + * + * Joern suggested a better "C" algorithm which is great since + * (1) It is portable to any architecure + * (2) At the same time it takes advantage of ARC ISA (rotate intrns) + */ + +#define __arch_swab32(x) \ +({ unsigned long __in = (x), __tmp; \ + __tmp = __in << 8 | __in >> 24; /* ror tmp,in,24 */ \ + __in = __in << 24 | __in >> 8; /* ror in,in,8 */ \ + __tmp ^= __in; \ + __tmp &= 0xff00ff; \ + __tmp ^ __in; \ +}) + +#elif (ARC_BSWAP_TYPE == 2) /* Custom single cycle bwap instruction */ + +#define __arch_swab32(x) \ +({ \ + unsigned int tmp = x; \ + __asm__( \ + " .extInstruction bswap, 7, 0x00, SUFFIX_NONE, SYNTAX_2OP \n"\ + " bswap %0, %1 \n"\ + : "=r" (tmp) \ + : "r" (tmp)); \ + tmp; \ +}) + +#endif /* ARC_BSWAP_TYPE=zzz */ + +#endif /* CONFIG_ARC_HAS_SWAPE */ + +#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) +#define __SWAB_64_THRU_32__ +#endif + +#endif -- 2.39.5