]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 31 Oct 2011 23:13:44 +0000 (16:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 31 Oct 2011 23:13:44 +0000 (16:13 -0700)
* 'next' of git://git.monstr.eu/linux-2.6-microblaze:
  microblaze: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW usage
  microblaze: Use delay slot in __strnlen_user, __strncpy_user
  microblaze: Remove NET_IP_ALIGN from system.h
  microblaze: Add __ucmpdi2() helper function
  microblaze: Raise SIGFPE/FPE_INTDIV for div by zero
  microblaze: Switch ELF_ARCH code to 189
  microblaze: Added DMA sync operations
  microblaze: Moved __dma_sync() to dma-mapping.h
  microblaze: Add PVR for Microblaze v8.20.a
  microblaze: Fix access_ok macro
  microblaze: Add loop unrolling for PAGE in copy_tofrom_user
  microblaze: Simplify logic for unaligned byte copying
  microblaze: Change label names - copy_tofrom_user
  microblaze: Separate fixup section definition
  microblaze: Change label name in copy_tofrom_user
  microblaze: Clear top bit from cnt32_to_63

13 files changed:
arch/microblaze/include/asm/dma-mapping.h
arch/microblaze/include/asm/elf.h
arch/microblaze/include/asm/system.h
arch/microblaze/include/asm/uaccess.h
arch/microblaze/kernel/cpu/cpuinfo.c
arch/microblaze/kernel/dma.c
arch/microblaze/kernel/exceptions.c
arch/microblaze/kernel/process.c
arch/microblaze/kernel/ptrace.c
arch/microblaze/kernel/timer.c
arch/microblaze/lib/Makefile
arch/microblaze/lib/uaccess_old.S
arch/microblaze/lib/ucmpdi2.c [new file with mode: 0644]

index a569514cf19f49b73962432c98c0da9880e3c216..3a3e5b8868544718c7b9b9c0af796f2aed4a6625 100644 (file)
 #include <linux/dma-attrs.h>
 #include <asm/io.h>
 #include <asm-generic/dma-coherent.h>
+#include <asm/cacheflush.h>
 
 #define DMA_ERROR_CODE         (~(dma_addr_t)0x0)
 
 #define __dma_alloc_coherent(dev, gfp, size, handle)   NULL
 #define __dma_free_coherent(size, addr)                ((void)0)
-#define __dma_sync(addr, size, rw)             ((void)0)
 
 static inline unsigned long device_to_mask(struct device *dev)
 {
@@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 
 #include <asm-generic/dma-mapping-common.h>
 
+static inline void __dma_sync(unsigned long paddr,
+                             size_t size, enum dma_data_direction direction)
+{
+       switch (direction) {
+       case DMA_TO_DEVICE:
+       case DMA_BIDIRECTIONAL:
+               flush_dcache_range(paddr, paddr + size);
+               break;
+       case DMA_FROM_DEVICE:
+               invalidate_dcache_range(paddr, paddr + size);
+               break;
+       default:
+               BUG();
+       }
+}
+
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
        struct dma_map_ops *ops = get_dma_ops(dev);
@@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
                enum dma_data_direction direction)
 {
        BUG_ON(direction == DMA_NONE);
-       __dma_sync(vaddr, size, (int)direction);
+       __dma_sync(virt_to_phys(vaddr), size, (int)direction);
 }
 
 #endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */
index 098dfdde4b061e067c56ad2a5ce0016a98fa8ff1..834849f59ae8a5474e80d2e13cd4a3f0a9e2976f 100644 (file)
  * I've snaffled the value from the microblaze binutils source code
  * /binutils/microblaze/include/elf/microblaze.h
  */
-#define EM_XILINX_MICROBLAZE   0xbaab
-#define ELF_ARCH               EM_XILINX_MICROBLAZE
+#define EM_MICROBLAZE          189
+#define EM_MICROBLAZE_OLD      0xbaab
+#define ELF_ARCH               EM_MICROBLAZE
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(x)      ((x)->e_machine == EM_XILINX_MICROBLAZE)
+#define elf_check_arch(x)      ((x)->e_machine == EM_MICROBLAZE \
+                                || (x)->e_machine == EM_MICROBLAZE_OLD)
 
 /*
  * These are used to set parameters in the core dumps.
index e6a2284571dcfc46646582d4a5327e1797887011..5a433cbaafb3296f1c9df2239e9b4ed513b95418 100644 (file)
@@ -17,8 +17,6 @@
 #include <asm-generic/cmpxchg.h>
 #include <asm-generic/cmpxchg-local.h>
 
-#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
-
 struct task_struct;
 struct thread_info;
 
@@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root;
 
 #define arch_align_stack(x) (x)
 
-/*
- * MicroBlaze doesn't handle unaligned accesses in hardware.
- *
- * Based on this we force the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN   2
-
 #endif /* _ASM_MICROBLAZE_SYSTEM_H */
index 5bb95a11880d27f3deb10d86ab7c837a3aad7b78..072b0077abf95a4ae9494c47900913cfd79ccd71 100644 (file)
@@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size)
  *  - "addr", "addr + size" and "size" are all below the limit
  */
 #define access_ok(type, addr, size) \
-       (get_fs().seg > (((unsigned long)(addr)) | \
+       (get_fs().seg >= (((unsigned long)(addr)) | \
                (size) | ((unsigned long)(addr) + (size))))
 
 /* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",
index 44394d80a6836c4118254eab4ce24d7b9c2b91ab..54194b28574af687f32734a0c0a53452bcc5caba 100644 (file)
@@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
        {"8.00.a", 0x12},
        {"8.00.b", 0x13},
        {"8.10.a", 0x14},
+       {"8.20.a", 0x15},
        {NULL, 0},
 };
 
index 393e6b2db688545aac0a1784f25ad51d73d89567..dc6416d265d616e1ac65382b5174c81c1b22d723 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/gfp.h>
 #include <linux/dma-debug.h>
 #include <asm/bug.h>
-#include <asm/cacheflush.h>
 
 /*
  * Generic direct DMA implementation
  * can set archdata.dma_data to an unsigned long holding the offset. By
  * default the offset is PCI_DRAM_OFFSET.
  */
-static inline void __dma_sync_page(unsigned long paddr, unsigned long offset,
-                               size_t size, enum dma_data_direction direction)
-{
-       switch (direction) {
-       case DMA_TO_DEVICE:
-       case DMA_BIDIRECTIONAL:
-               flush_dcache_range(paddr + offset, paddr + offset + size);
-               break;
-       case DMA_FROM_DEVICE:
-               invalidate_dcache_range(paddr + offset, paddr + offset + size);
-               break;
-       default:
-               BUG();
-       }
-}
 
 static unsigned long get_dma_direct_offset(struct device *dev)
 {
@@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
        /* FIXME this part of code is untested */
        for_each_sg(sgl, sg, nents, i) {
                sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
-               __dma_sync_page(page_to_phys(sg_page(sg)), sg->offset,
+               __dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
                                                        sg->length, direction);
        }
 
@@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
                                             enum dma_data_direction direction,
                                             struct dma_attrs *attrs)
 {
-       __dma_sync_page(page_to_phys(page), offset, size, direction);
+       __dma_sync(page_to_phys(page) + offset, size, direction);
        return page_to_phys(page) + offset + get_dma_direct_offset(dev);
 }
 
@@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev,
  * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
  * dma_address is physical address
  */
-       __dma_sync_page(dma_address, 0 , size, direction);
+       __dma_sync(dma_address, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_cpu(struct device *dev,
+                              dma_addr_t dma_handle, size_t size,
+                              enum dma_data_direction direction)
+{
+       /*
+        * It's pointless to flush the cache as the memory segment
+        * is given to the CPU
+        */
+
+       if (direction == DMA_FROM_DEVICE)
+               __dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_device(struct device *dev,
+                                 dma_addr_t dma_handle, size_t size,
+                                 enum dma_data_direction direction)
+{
+       /*
+        * It's pointless to invalidate the cache if the device isn't
+        * supposed to write to the relevant region
+        */
+
+       if (direction == DMA_TO_DEVICE)
+               __dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_cpu(struct device *dev,
+                          struct scatterlist *sgl, int nents,
+                          enum dma_data_direction direction)
+{
+       struct scatterlist *sg;
+       int i;
+
+       /* FIXME this part of code is untested */
+       if (direction == DMA_FROM_DEVICE)
+               for_each_sg(sgl, sg, nents, i)
+                       __dma_sync(sg->dma_address, sg->length, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_device(struct device *dev,
+                             struct scatterlist *sgl, int nents,
+                             enum dma_data_direction direction)
+{
+       struct scatterlist *sg;
+       int i;
+
+       /* FIXME this part of code is untested */
+       if (direction == DMA_TO_DEVICE)
+               for_each_sg(sgl, sg, nents, i)
+                       __dma_sync(sg->dma_address, sg->length, direction);
 }
 
 struct dma_map_ops dma_direct_ops = {
@@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = {
        .dma_supported  = dma_direct_dma_supported,
        .map_page       = dma_direct_map_page,
        .unmap_page     = dma_direct_unmap_page,
+       .sync_single_for_cpu            = dma_direct_sync_single_for_cpu,
+       .sync_single_for_device         = dma_direct_sync_single_for_device,
+       .sync_sg_for_cpu                = dma_direct_sync_sg_for_cpu,
+       .sync_sg_for_device             = dma_direct_sync_sg_for_device,
 };
 EXPORT_SYMBOL(dma_direct_ops);
 
index 66fad23012216e86cb2f91bdfff053f0e080f208..6348dc82f4289f48b36843ad4e94b81ff0e7e36e 100644 (file)
@@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
        case MICROBLAZE_DIV_ZERO_EXCEPTION:
                if (user_mode(regs)) {
                        pr_debug("Divide by zero exception in user mode\n");
-                       _exception(SIGILL, regs, FPE_INTDIV, addr);
+                       _exception(SIGFPE, regs, FPE_INTDIV, addr);
                        return;
                }
                printk(KERN_WARNING "Divide by zero exception " \
index dbb812421d8a15629dd051cd931aea24c625c58f..95cc295976a73279878ddd103aeb6777c263c9a6 100644 (file)
@@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
        ti->cpu_context.msr = (childregs->msr|MSR_VM);
        ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
+       ti->cpu_context.msr &= ~MSR_IE;
 #endif
        ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
 
index 6a8e0cc5c57d40dbfc165c5df15c86b9cece0277..043cb58f9c443e72843fa44a9ebd399442a965b5 100644 (file)
@@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
                ret = -1L;
 
        if (unlikely(current->audit_context))
-               audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+               audit_syscall_entry(EM_MICROBLAZE, regs->r12,
                                    regs->r5, regs->r6,
                                    regs->r7, regs->r8);
 
index e5550ce4e0eb783f5e6a4f29e7526c1cf3cbc6b0..af74b1113aabb32fb3849a952109ffba07eacdcf 100644 (file)
@@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void)
 {
        if (timer_initialized) {
                struct clocksource *cs = &clocksource_microblaze;
-               cycle_t cyc = cnt32_to_63(cs->read(NULL));
+
+               cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX;
                return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
        }
        return 0;
index 10c320aa908b2325963264271c3673175feb931f..c13067b243c3925e486bef404a1ac80aba5ebc6a 100644 (file)
@@ -25,5 +25,6 @@ lib-y += lshrdi3.o
 lib-y += modsi3.o
 lib-y += muldi3.o
 lib-y += mulsi3.o
+lib-y += ucmpdi2.o
 lib-y += udivsi3.o
 lib-y += umodsi3.o
index 5810cec54a7a36e356d5c48b4ad29923ee0829e4..f037266cdaf3e358676c0428e83393c16dd410cb 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
 
 /*
  * int __strncpy_user(char *to, char *from, int len);
@@ -33,8 +34,8 @@ __strncpy_user:
         * r3 - temp count
         * r4 - temp val
         */
+       beqid   r7,3f
        addik   r3,r7,0         /* temp_count = len */
-       beqi    r3,3f
 1:
        lbu     r4,r6,r0
        sb      r4,r5,r0
@@ -76,8 +77,8 @@ __strncpy_user:
 .type  __strnlen_user, @function
 .align 4;
 __strnlen_user:
+       beqid   r6,3f
        addik   r3,r6,0
-       beqi    r3,3f
 1:
        lbu     r4,r5,r0
        beqid   r4,2f           /* break on NUL */
@@ -102,6 +103,49 @@ __strnlen_user:
        .section        __ex_table,"a"
        .word   1b,4b
 
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset)   \
+1:     lwi     r4 , r6, 0x0000 + offset;       \
+2:     lwi     r19, r6, 0x0004 + offset;       \
+3:     lwi     r20, r6, 0x0008 + offset;       \
+4:     lwi     r21, r6, 0x000C + offset;       \
+5:     lwi     r22, r6, 0x0010 + offset;       \
+6:     lwi     r23, r6, 0x0014 + offset;       \
+7:     lwi     r24, r6, 0x0018 + offset;       \
+8:     lwi     r25, r6, 0x001C + offset;       \
+9:     swi     r4 , r5, 0x0000 + offset;       \
+10:    swi     r19, r5, 0x0004 + offset;       \
+11:    swi     r20, r5, 0x0008 + offset;       \
+12:    swi     r21, r5, 0x000C + offset;       \
+13:    swi     r22, r5, 0x0010 + offset;       \
+14:    swi     r23, r5, 0x0014 + offset;       \
+15:    swi     r24, r5, 0x0018 + offset;       \
+16:    swi     r25, r5, 0x001C + offset;       \
+       .section __ex_table,"a";                \
+       .word   1b, 0f;                         \
+       .word   2b, 0f;                         \
+       .word   3b, 0f;                         \
+       .word   4b, 0f;                         \
+       .word   5b, 0f;                         \
+       .word   6b, 0f;                         \
+       .word   7b, 0f;                         \
+       .word   8b, 0f;                         \
+       .word   9b, 0f;                         \
+       .word   10b, 0f;                        \
+       .word   11b, 0f;                        \
+       .word   12b, 0f;                        \
+       .word   13b, 0f;                        \
+       .word   14b, 0f;                        \
+       .word   15b, 0f;                        \
+       .word   16b, 0f;                        \
+       .text
+
+#define COPY_80(offset)        \
+       COPY(0x00 + offset);\
+       COPY(0x20 + offset);\
+       COPY(0x40 + offset);\
+       COPY(0x60 + offset);
+
 /*
  * int __copy_tofrom_user(char *to, char *from, int len)
  * Return:
@@ -119,34 +163,79 @@ __copy_tofrom_user:
         * r7, r3 - count
         * r4 - tempval
         */
-       beqid   r7, 3f /* zero size is not likely */
-       andi    r3, r7, 0x3 /* filter add count */
-       bneid   r3, 4f /* if is odd value then byte copying */
+       beqid   r7, 0f /* zero size is not likely */
        or      r3, r5, r6 /* find if is any to/from unaligned */
-       andi    r3, r3, 0x3 /* mask unaligned */
-       bneid   r3, 1f /* it is unaligned -> then jump */
+       or      r3, r3, r7 /* find if count is unaligned */
+       andi    r3, r3, 0x3 /* mask last 3 bits */
+       bneid   r3, bu1 /* if r3 is not zero then byte copying */
+       or      r3, r0, r0
+
+       rsubi   r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+       beqid   r3, page;
        or      r3, r0, r0
 
-/* at least one 4 byte copy */
-5:     lw      r4, r6, r3
-6:     sw      r4, r5, r3
+w1:    lw      r4, r6, r3 /* at least one 4 byte copy */
+w2:    sw      r4, r5, r3
        addik   r7, r7, -4
-       bneid   r7, 5b
+       bneid   r7, w1
        addik   r3, r3, 4
        addik   r3, r7, 0
        rtsd    r15, 8
        nop
-4:     or      r3, r0, r0
-1:     lbu     r4,r6,r3
-2:     sb      r4,r5,r3
+
+       .section        __ex_table,"a"
+       .word   w1, 0f;
+       .word   w2, 0f;
+       .text
+
+.align 4 /* Alignment is important to keep icache happy */
+page:  /* Create room on stack and save registers for storign values */
+       addik   r1, r1, -32
+       swi     r19, r1, 4
+       swi     r20, r1, 8
+       swi     r21, r1, 12
+       swi     r22, r1, 16
+       swi     r23, r1, 20
+       swi     r24, r1, 24
+       swi     r25, r1, 28
+loop:  /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+       /* Loop unrolling to get performance boost */
+       COPY_80(0x000);
+       COPY_80(0x080);
+       COPY_80(0x100);
+       COPY_80(0x180);
+       /* copy loop */
+       addik   r6, r6, 0x200
+       addik   r7, r7, -0x200
+       bneid   r7, loop
+       addik   r5, r5, 0x200
+       /* Restore register content */
+       lwi     r19, r1, 4
+       lwi     r20, r1, 8
+       lwi     r21, r1, 12
+       lwi     r22, r1, 16
+       lwi     r23, r1, 20
+       lwi     r24, r1, 24
+       lwi     r25, r1, 28
+       addik   r1, r1, 32
+       /* return back */
+       addik   r3, r7, 0
+       rtsd    r15, 8
+       nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1:   lbu     r4,r6,r3
+bu2:   sb      r4,r5,r3
        addik   r7,r7,-1
-       bneid   r7,1b
+       bneid   r7,bu1
        addik   r3,r3,1         /* delay slot */
-3:
+0:
        addik   r3,r7,0
        rtsd    r15,8
        nop
        .size   __copy_tofrom_user, . - __copy_tofrom_user
 
        .section        __ex_table,"a"
-       .word   1b,3b,2b,3b,5b,3b,6b,3b
+       .word   bu1, 0b;
+       .word   bu2, 0b;
+       .text
diff --git a/arch/microblaze/lib/ucmpdi2.c b/arch/microblaze/lib/ucmpdi2.c
new file mode 100644 (file)
index 0000000..63ca105
--- /dev/null
@@ -0,0 +1,20 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+       const DWunion au = {.ll = a};
+       const DWunion bu = {.ll = b};
+
+       if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+               return 0;
+       else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+               return 2;
+       if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+               return 0;
+       else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+               return 2;
+       return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);