From: Ma, Ling Date: Mon, 23 Aug 2010 21:11:12 +0000 (-0700) Subject: x86, mem: Don't implement forward memmove() as memcpy() X-Git-Tag: v2.6.37-rc1~204^2~2 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=fdf4289679fd41d76553ce224750e9737cd80eea;p=karo-tx-linux.git x86, mem: Don't implement forward memmove() as memcpy() memmove() allow source and destination address to be overlap, but there is no such limitation for memcpy(). Therefore, explicitly implement memmove() in both the forwards and backward directions, to give us the ability to optimize memcpy(). Signed-off-by: Ma Ling LKML-Reference: Signed-off-by: H. Peter Anvin --- diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index 5415a9d06f53..be424dfcf365 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n) int d0, d1, d2; if (dest < src) { - memcpy(dest, src, n); + if ((dest + n) < src) + return memcpy(dest, src, n); + else + __asm__ __volatile__( + "rep\n\t" + "movsb\n\t" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n), + "1" (src), + "2" (dest) + :"memory"); + } else { - __asm__ __volatile__( - "std\n\t" - "rep\n\t" - "movsb\n\t" - "cld" - : "=&c" (d0), "=&S" (d1), "=&D" (d2) - :"0" (n), - "1" (n-1+src), - "2" (n-1+dest) - :"memory"); + + if((src + count) < dest) + return memcpy(dest, src, count); + else + __asm__ __volatile__( + "std\n\t" + "rep\n\t" + "movsb\n\t" + "cld" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n), + "1" (n-1+src), + "2" (n-1+dest) + :"memory"); } + return dest; } EXPORT_SYMBOL(memmove); diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c index 0a33909bf122..ecacc4b3d9e5 100644 --- a/arch/x86/lib/memmove_64.c +++ b/arch/x86/lib/memmove_64.c @@ -8,13 +8,49 @@ #undef memmove void *memmove(void *dest, const void *src, size_t count) { + unsigned long d0, d1, d2, d3; if (dest < src) { - return memcpy(dest, src, count); + if ((dest + count) < src) + return memcpy(dest, src, count); + else + __asm__ __volatile__( + "movq %0, %3\n\t" + "shr $3, %0\n\t" + "andq $7, %3\n\t" + "rep\n\t" + "movsq\n\t" + "movq %3, %0\n\t" + "rep\n\t" + "movsb" + : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3) + :"0" (count), + "1" (src), + "2" (dest) + :"memory"); } else { - char *p = dest + count; - const char *s = src + count; - while (count--) - *--p = *--s; + if((src + count) < dest) + return memcpy(dest, src, count); + else + __asm__ __volatile__( + "movq %0, %3\n\t" + "lea -8(%1, %0), %1\n\t" + "lea -8(%2, %0), %2\n\t" + "shr $3, %0\n\t" + "andq $7, %3\n\t" + "std\n\t" + "rep\n\t" + "movsq\n\t" + "lea 7(%1), %1\n\t" + "lea 7(%2), %2\n\t" + "movq %3, %0\n\t" + "rep\n\t" + "movsb\n\t" + "cld" + : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3) + :"0" (count), + "1" (src), + "2" (dest) + :"memory"); } return dest; }